• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm64.h"
18 
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "art_method.h"
21 #include "code_generator_arm64.h"
22 #include "common_arm64.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "heap_poisoning.h"
25 #include "intrinsics.h"
26 #include "lock_word.h"
27 #include "mirror/array-inl.h"
28 #include "mirror/object_array-inl.h"
29 #include "mirror/reference.h"
30 #include "mirror/string-inl.h"
31 #include "scoped_thread_state_change-inl.h"
32 #include "thread-current-inl.h"
33 #include "utils/arm64/assembler_arm64.h"
34 
35 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
36 
37 // TODO(VIXL): Make VIXL compile with -Wshadow.
38 #pragma GCC diagnostic push
39 #pragma GCC diagnostic ignored "-Wshadow"
40 #include "aarch64/disasm-aarch64.h"
41 #include "aarch64/macro-assembler-aarch64.h"
42 #pragma GCC diagnostic pop
43 
44 namespace art {
45 
46 namespace arm64 {
47 
48 using helpers::DRegisterFrom;
49 using helpers::FPRegisterFrom;
50 using helpers::HeapOperand;
51 using helpers::LocationFrom;
52 using helpers::OperandFrom;
53 using helpers::RegisterFrom;
54 using helpers::SRegisterFrom;
55 using helpers::WRegisterFrom;
56 using helpers::XRegisterFrom;
57 using helpers::InputRegisterAt;
58 using helpers::OutputRegister;
59 
60 namespace {
61 
AbsoluteHeapOperandFrom(Location location,size_t offset=0)62 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
63   return MemOperand(XRegisterFrom(location), offset);
64 }
65 
66 }  // namespace
67 
GetVIXLAssembler()68 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
69   return codegen_->GetVIXLAssembler();
70 }
71 
GetAllocator()72 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
73   return codegen_->GetGraph()->GetAllocator();
74 }
75 
76 #define __ codegen->GetVIXLAssembler()->
77 
MoveFromReturnRegister(Location trg,DataType::Type type,CodeGeneratorARM64 * codegen)78 static void MoveFromReturnRegister(Location trg,
79                                    DataType::Type type,
80                                    CodeGeneratorARM64* codegen) {
81   if (!trg.IsValid()) {
82     DCHECK(type == DataType::Type::kVoid);
83     return;
84   }
85 
86   DCHECK_NE(type, DataType::Type::kVoid);
87 
88   if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
89     Register trg_reg = RegisterFrom(trg, type);
90     Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
91     __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
92   } else {
93     FPRegister trg_reg = FPRegisterFrom(trg, type);
94     FPRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
95     __ Fmov(trg_reg, res_reg);
96   }
97 }
98 
MoveArguments(HInvoke * invoke,CodeGeneratorARM64 * codegen)99 static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) {
100   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
101   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
102 }
103 
104 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
105 // call. This will copy the arguments into the positions for a regular call.
106 //
107 // Note: The actual parameters are required to be in the locations given by the invoke's location
108 //       summary. If an intrinsic modifies those locations before a slowpath call, they must be
109 //       restored!
110 class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
111  public:
IntrinsicSlowPathARM64(HInvoke * invoke)112   explicit IntrinsicSlowPathARM64(HInvoke* invoke)
113       : SlowPathCodeARM64(invoke), invoke_(invoke) { }
114 
EmitNativeCode(CodeGenerator * codegen_in)115   void EmitNativeCode(CodeGenerator* codegen_in) override {
116     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
117     __ Bind(GetEntryLabel());
118 
119     SaveLiveRegisters(codegen, invoke_->GetLocations());
120 
121     MoveArguments(invoke_, codegen);
122 
123     {
124       // Ensure that between the BLR (emitted by Generate*Call) and RecordPcInfo there
125       // are no pools emitted.
126       vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
127       if (invoke_->IsInvokeStaticOrDirect()) {
128         codegen->GenerateStaticOrDirectCall(
129             invoke_->AsInvokeStaticOrDirect(), LocationFrom(kArtMethodRegister), this);
130       } else {
131         codegen->GenerateVirtualCall(
132             invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister), this);
133       }
134     }
135 
136     // Copy the result back to the expected output.
137     Location out = invoke_->GetLocations()->Out();
138     if (out.IsValid()) {
139       DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
140       DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
141       MoveFromReturnRegister(out, invoke_->GetType(), codegen);
142     }
143 
144     RestoreLiveRegisters(codegen, invoke_->GetLocations());
145     __ B(GetExitLabel());
146   }
147 
GetDescription() const148   const char* GetDescription() const override { return "IntrinsicSlowPathARM64"; }
149 
150  private:
151   // The instruction where this slow path is happening.
152   HInvoke* const invoke_;
153 
154   DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
155 };
156 
157 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
158 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
159  public:
ReadBarrierSystemArrayCopySlowPathARM64(HInstruction * instruction,Location tmp)160   ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
161       : SlowPathCodeARM64(instruction), tmp_(tmp) {
162     DCHECK(kEmitCompilerReadBarrier);
163     DCHECK(kUseBakerReadBarrier);
164   }
165 
EmitNativeCode(CodeGenerator * codegen_in)166   void EmitNativeCode(CodeGenerator* codegen_in) override {
167     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
168     LocationSummary* locations = instruction_->GetLocations();
169     DCHECK(locations->CanCall());
170     DCHECK(instruction_->IsInvokeStaticOrDirect())
171         << "Unexpected instruction in read barrier arraycopy slow path: "
172         << instruction_->DebugName();
173     DCHECK(instruction_->GetLocations()->Intrinsified());
174     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
175 
176     const int32_t element_size = DataType::Size(DataType::Type::kReference);
177 
178     Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
179     Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
180     Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
181     Register tmp_reg = WRegisterFrom(tmp_);
182 
183     __ Bind(GetEntryLabel());
184     vixl::aarch64::Label slow_copy_loop;
185     __ Bind(&slow_copy_loop);
186     __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
187     codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
188     // TODO: Inline the mark bit check before calling the runtime?
189     // tmp_reg = ReadBarrier::Mark(tmp_reg);
190     // No need to save live registers; it's taken care of by the
191     // entrypoint. Also, there is no need to update the stack mask,
192     // as this runtime call will not trigger a garbage collection.
193     // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
194     // explanations.)
195     DCHECK_NE(tmp_.reg(), LR);
196     DCHECK_NE(tmp_.reg(), WSP);
197     DCHECK_NE(tmp_.reg(), WZR);
198     // IP0 is used internally by the ReadBarrierMarkRegX entry point
199     // as a temporary (and not preserved).  It thus cannot be used by
200     // any live register in this slow path.
201     DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
202     DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
203     DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
204     DCHECK_NE(tmp_.reg(), IP0);
205     DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
206     // TODO: Load the entrypoint once before the loop, instead of
207     // loading it at every iteration.
208     int32_t entry_point_offset =
209         Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
210     // This runtime call does not require a stack map.
211     codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
212     codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
213     __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
214     __ Cmp(src_curr_addr, src_stop_addr);
215     __ B(&slow_copy_loop, ne);
216     __ B(GetExitLabel());
217   }
218 
GetDescription() const219   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
220 
221  private:
222   Location tmp_;
223 
224   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
225 };
226 #undef __
227 
TryDispatch(HInvoke * invoke)228 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
229   Dispatch(invoke);
230   LocationSummary* res = invoke->GetLocations();
231   if (res == nullptr) {
232     return false;
233   }
234   return res->Intrinsified();
235 }
236 
237 #define __ masm->
238 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)239 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
240   LocationSummary* locations =
241       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
242   locations->SetInAt(0, Location::RequiresFpuRegister());
243   locations->SetOut(Location::RequiresRegister());
244 }
245 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)246 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
247   LocationSummary* locations =
248       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
249   locations->SetInAt(0, Location::RequiresRegister());
250   locations->SetOut(Location::RequiresFpuRegister());
251 }
252 
MoveFPToInt(LocationSummary * locations,bool is64bit,MacroAssembler * masm)253 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
254   Location input = locations->InAt(0);
255   Location output = locations->Out();
256   __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
257           is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
258 }
259 
MoveIntToFP(LocationSummary * locations,bool is64bit,MacroAssembler * masm)260 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
261   Location input = locations->InAt(0);
262   Location output = locations->Out();
263   __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
264           is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
265 }
266 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)267 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
268   CreateFPToIntLocations(allocator_, invoke);
269 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)270 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
271   CreateIntToFPLocations(allocator_, invoke);
272 }
273 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)274 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
275   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
276 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)277 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
278   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
279 }
280 
VisitFloatFloatToRawIntBits(HInvoke * invoke)281 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
282   CreateFPToIntLocations(allocator_, invoke);
283 }
VisitFloatIntBitsToFloat(HInvoke * invoke)284 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
285   CreateIntToFPLocations(allocator_, invoke);
286 }
287 
VisitFloatFloatToRawIntBits(HInvoke * invoke)288 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
289   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
290 }
VisitFloatIntBitsToFloat(HInvoke * invoke)291 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
292   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
293 }
294 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)295 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
296   LocationSummary* locations =
297       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
298   locations->SetInAt(0, Location::RequiresRegister());
299   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
300 }
301 
GenReverseBytes(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)302 static void GenReverseBytes(LocationSummary* locations,
303                             DataType::Type type,
304                             MacroAssembler* masm) {
305   Location in = locations->InAt(0);
306   Location out = locations->Out();
307 
308   switch (type) {
309     case DataType::Type::kInt16:
310       __ Rev16(WRegisterFrom(out), WRegisterFrom(in));
311       __ Sxth(WRegisterFrom(out), WRegisterFrom(out));
312       break;
313     case DataType::Type::kInt32:
314     case DataType::Type::kInt64:
315       __ Rev(RegisterFrom(out, type), RegisterFrom(in, type));
316       break;
317     default:
318       LOG(FATAL) << "Unexpected size for reverse-bytes: " << type;
319       UNREACHABLE();
320   }
321 }
322 
VisitIntegerReverseBytes(HInvoke * invoke)323 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
324   CreateIntToIntLocations(allocator_, invoke);
325 }
326 
VisitIntegerReverseBytes(HInvoke * invoke)327 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
328   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
329 }
330 
VisitLongReverseBytes(HInvoke * invoke)331 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
332   CreateIntToIntLocations(allocator_, invoke);
333 }
334 
VisitLongReverseBytes(HInvoke * invoke)335 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
336   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
337 }
338 
VisitShortReverseBytes(HInvoke * invoke)339 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
340   CreateIntToIntLocations(allocator_, invoke);
341 }
342 
VisitShortReverseBytes(HInvoke * invoke)343 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
344   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
345 }
346 
GenNumberOfLeadingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)347 static void GenNumberOfLeadingZeros(LocationSummary* locations,
348                                     DataType::Type type,
349                                     MacroAssembler* masm) {
350   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
351 
352   Location in = locations->InAt(0);
353   Location out = locations->Out();
354 
355   __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
356 }
357 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)358 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
359   CreateIntToIntLocations(allocator_, invoke);
360 }
361 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)362 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
363   GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
364 }
365 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)366 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
367   CreateIntToIntLocations(allocator_, invoke);
368 }
369 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)370 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
371   GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
372 }
373 
GenNumberOfTrailingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)374 static void GenNumberOfTrailingZeros(LocationSummary* locations,
375                                      DataType::Type type,
376                                      MacroAssembler* masm) {
377   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
378 
379   Location in = locations->InAt(0);
380   Location out = locations->Out();
381 
382   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
383   __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
384 }
385 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)386 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
387   CreateIntToIntLocations(allocator_, invoke);
388 }
389 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)390 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
391   GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
392 }
393 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)394 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
395   CreateIntToIntLocations(allocator_, invoke);
396 }
397 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)398 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
399   GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
400 }
401 
GenReverse(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)402 static void GenReverse(LocationSummary* locations,
403                        DataType::Type type,
404                        MacroAssembler* masm) {
405   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
406 
407   Location in = locations->InAt(0);
408   Location out = locations->Out();
409 
410   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
411 }
412 
VisitIntegerReverse(HInvoke * invoke)413 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
414   CreateIntToIntLocations(allocator_, invoke);
415 }
416 
VisitIntegerReverse(HInvoke * invoke)417 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
418   GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
419 }
420 
VisitLongReverse(HInvoke * invoke)421 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
422   CreateIntToIntLocations(allocator_, invoke);
423 }
424 
VisitLongReverse(HInvoke * invoke)425 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
426   GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
427 }
428 
GenBitCount(HInvoke * instr,DataType::Type type,MacroAssembler * masm)429 static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) {
430   DCHECK(DataType::IsIntOrLongType(type)) << type;
431   DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
432   DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
433 
434   UseScratchRegisterScope temps(masm);
435 
436   Register src = InputRegisterAt(instr, 0);
437   Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
438   FPRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS();
439 
440   __ Fmov(fpr, src);
441   __ Cnt(fpr.V8B(), fpr.V8B());
442   __ Addv(fpr.B(), fpr.V8B());
443   __ Fmov(dst, fpr);
444 }
445 
VisitLongBitCount(HInvoke * invoke)446 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
447   CreateIntToIntLocations(allocator_, invoke);
448 }
449 
VisitLongBitCount(HInvoke * invoke)450 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
451   GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler());
452 }
453 
VisitIntegerBitCount(HInvoke * invoke)454 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
455   CreateIntToIntLocations(allocator_, invoke);
456 }
457 
VisitIntegerBitCount(HInvoke * invoke)458 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
459   GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler());
460 }
461 
GenHighestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)462 static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
463   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
464 
465   UseScratchRegisterScope temps(masm);
466 
467   Register src = InputRegisterAt(invoke, 0);
468   Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
469   Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
470   size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u;
471   size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u;
472 
473   __ Clz(temp, src);
474   __ Mov(dst, UINT64_C(1) << high_bit);  // MOV (bitmask immediate)
475   __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit));  // Clear dst if src was 0.
476   __ Lsr(dst, dst, temp);
477 }
478 
VisitIntegerHighestOneBit(HInvoke * invoke)479 void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
480   CreateIntToIntLocations(allocator_, invoke);
481 }
482 
VisitIntegerHighestOneBit(HInvoke * invoke)483 void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
484   GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
485 }
486 
VisitLongHighestOneBit(HInvoke * invoke)487 void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) {
488   CreateIntToIntLocations(allocator_, invoke);
489 }
490 
VisitLongHighestOneBit(HInvoke * invoke)491 void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) {
492   GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
493 }
494 
GenLowestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)495 static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
496   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
497 
498   UseScratchRegisterScope temps(masm);
499 
500   Register src = InputRegisterAt(invoke, 0);
501   Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
502   Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
503 
504   __ Neg(temp, src);
505   __ And(dst, temp, src);
506 }
507 
VisitIntegerLowestOneBit(HInvoke * invoke)508 void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
509   CreateIntToIntLocations(allocator_, invoke);
510 }
511 
VisitIntegerLowestOneBit(HInvoke * invoke)512 void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
513   GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
514 }
515 
VisitLongLowestOneBit(HInvoke * invoke)516 void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) {
517   CreateIntToIntLocations(allocator_, invoke);
518 }
519 
VisitLongLowestOneBit(HInvoke * invoke)520 void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
521   GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
522 }
523 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)524 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
525   LocationSummary* locations =
526       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
527   locations->SetInAt(0, Location::RequiresFpuRegister());
528   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
529 }
530 
VisitMathSqrt(HInvoke * invoke)531 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
532   CreateFPToFPLocations(allocator_, invoke);
533 }
534 
VisitMathSqrt(HInvoke * invoke)535 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
536   LocationSummary* locations = invoke->GetLocations();
537   MacroAssembler* masm = GetVIXLAssembler();
538   __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
539 }
540 
VisitMathCeil(HInvoke * invoke)541 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
542   CreateFPToFPLocations(allocator_, invoke);
543 }
544 
VisitMathCeil(HInvoke * invoke)545 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
546   LocationSummary* locations = invoke->GetLocations();
547   MacroAssembler* masm = GetVIXLAssembler();
548   __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
549 }
550 
VisitMathFloor(HInvoke * invoke)551 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
552   CreateFPToFPLocations(allocator_, invoke);
553 }
554 
VisitMathFloor(HInvoke * invoke)555 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
556   LocationSummary* locations = invoke->GetLocations();
557   MacroAssembler* masm = GetVIXLAssembler();
558   __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
559 }
560 
VisitMathRint(HInvoke * invoke)561 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
562   CreateFPToFPLocations(allocator_, invoke);
563 }
564 
VisitMathRint(HInvoke * invoke)565 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
566   LocationSummary* locations = invoke->GetLocations();
567   MacroAssembler* masm = GetVIXLAssembler();
568   __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
569 }
570 
CreateFPToIntPlusFPTempLocations(ArenaAllocator * allocator,HInvoke * invoke)571 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) {
572   LocationSummary* locations =
573       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
574   locations->SetInAt(0, Location::RequiresFpuRegister());
575   locations->SetOut(Location::RequiresRegister());
576   locations->AddTemp(Location::RequiresFpuRegister());
577 }
578 
GenMathRound(HInvoke * invoke,bool is_double,vixl::aarch64::MacroAssembler * masm)579 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
580   // Java 8 API definition for Math.round():
581   // Return the closest long or int to the argument, with ties rounding to positive infinity.
582   //
583   // There is no single instruction in ARMv8 that can support the above definition.
584   // We choose to use FCVTAS here, because it has closest semantic.
585   // FCVTAS performs rounding to nearest integer, ties away from zero.
586   // For most inputs (positive values, zero or NaN), this instruction is enough.
587   // We only need a few handling code after FCVTAS if the input is negative half value.
588   //
589   // The reason why we didn't choose FCVTPS instruction here is that
590   // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
591   // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
592   // If we were using this instruction, for most inputs, more handling code would be needed.
593   LocationSummary* l = invoke->GetLocations();
594   FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
595   FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
596   Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
597   vixl::aarch64::Label done;
598 
599   // Round to nearest integer, ties away from zero.
600   __ Fcvtas(out_reg, in_reg);
601 
602   // For positive values, zero or NaN inputs, rounding is done.
603   __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
604 
605   // Handle input < 0 cases.
606   // If input is negative but not a tie, previous result (round to nearest) is valid.
607   // If input is a negative tie, out_reg += 1.
608   __ Frinta(tmp_fp, in_reg);
609   __ Fsub(tmp_fp, in_reg, tmp_fp);
610   __ Fcmp(tmp_fp, 0.5);
611   __ Cinc(out_reg, out_reg, eq);
612 
613   __ Bind(&done);
614 }
615 
VisitMathRoundDouble(HInvoke * invoke)616 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
617   CreateFPToIntPlusFPTempLocations(allocator_, invoke);
618 }
619 
VisitMathRoundDouble(HInvoke * invoke)620 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
621   GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler());
622 }
623 
VisitMathRoundFloat(HInvoke * invoke)624 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
625   CreateFPToIntPlusFPTempLocations(allocator_, invoke);
626 }
627 
VisitMathRoundFloat(HInvoke * invoke)628 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
629   GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler());
630 }
631 
VisitMemoryPeekByte(HInvoke * invoke)632 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
633   CreateIntToIntLocations(allocator_, invoke);
634 }
635 
VisitMemoryPeekByte(HInvoke * invoke)636 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
637   MacroAssembler* masm = GetVIXLAssembler();
638   __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
639           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
640 }
641 
VisitMemoryPeekIntNative(HInvoke * invoke)642 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
643   CreateIntToIntLocations(allocator_, invoke);
644 }
645 
VisitMemoryPeekIntNative(HInvoke * invoke)646 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
647   MacroAssembler* masm = GetVIXLAssembler();
648   __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
649          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
650 }
651 
VisitMemoryPeekLongNative(HInvoke * invoke)652 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
653   CreateIntToIntLocations(allocator_, invoke);
654 }
655 
VisitMemoryPeekLongNative(HInvoke * invoke)656 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
657   MacroAssembler* masm = GetVIXLAssembler();
658   __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
659          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
660 }
661 
VisitMemoryPeekShortNative(HInvoke * invoke)662 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
663   CreateIntToIntLocations(allocator_, invoke);
664 }
665 
VisitMemoryPeekShortNative(HInvoke * invoke)666 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
667   MacroAssembler* masm = GetVIXLAssembler();
668   __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
669            AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
670 }
671 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)672 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
673   LocationSummary* locations =
674       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
675   locations->SetInAt(0, Location::RequiresRegister());
676   locations->SetInAt(1, Location::RequiresRegister());
677 }
678 
VisitMemoryPokeByte(HInvoke * invoke)679 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
680   CreateIntIntToVoidLocations(allocator_, invoke);
681 }
682 
VisitMemoryPokeByte(HInvoke * invoke)683 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
684   MacroAssembler* masm = GetVIXLAssembler();
685   __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
686           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
687 }
688 
VisitMemoryPokeIntNative(HInvoke * invoke)689 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
690   CreateIntIntToVoidLocations(allocator_, invoke);
691 }
692 
VisitMemoryPokeIntNative(HInvoke * invoke)693 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
694   MacroAssembler* masm = GetVIXLAssembler();
695   __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
696          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
697 }
698 
VisitMemoryPokeLongNative(HInvoke * invoke)699 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
700   CreateIntIntToVoidLocations(allocator_, invoke);
701 }
702 
VisitMemoryPokeLongNative(HInvoke * invoke)703 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
704   MacroAssembler* masm = GetVIXLAssembler();
705   __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
706          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
707 }
708 
VisitMemoryPokeShortNative(HInvoke * invoke)709 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
710   CreateIntIntToVoidLocations(allocator_, invoke);
711 }
712 
VisitMemoryPokeShortNative(HInvoke * invoke)713 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
714   MacroAssembler* masm = GetVIXLAssembler();
715   __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
716           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
717 }
718 
VisitThreadCurrentThread(HInvoke * invoke)719 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
720   LocationSummary* locations =
721       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
722   locations->SetOut(Location::RequiresRegister());
723 }
724 
VisitThreadCurrentThread(HInvoke * invoke)725 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
726   codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()),
727                  MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
728 }
729 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARM64 * codegen)730 static void GenUnsafeGet(HInvoke* invoke,
731                          DataType::Type type,
732                          bool is_volatile,
733                          CodeGeneratorARM64* codegen) {
734   LocationSummary* locations = invoke->GetLocations();
735   DCHECK((type == DataType::Type::kInt32) ||
736          (type == DataType::Type::kInt64) ||
737          (type == DataType::Type::kReference));
738   Location base_loc = locations->InAt(1);
739   Register base = WRegisterFrom(base_loc);      // Object pointer.
740   Location offset_loc = locations->InAt(2);
741   Register offset = XRegisterFrom(offset_loc);  // Long offset.
742   Location trg_loc = locations->Out();
743   Register trg = RegisterFrom(trg_loc, type);
744 
745   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
746     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
747     Register temp = WRegisterFrom(locations->GetTemp(0));
748     MacroAssembler* masm = codegen->GetVIXLAssembler();
749     // Piggy-back on the field load path using introspection for the Baker read barrier.
750     __ Add(temp, base, offset.W());  // Offset should not exceed 32 bits.
751     codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
752                                                    trg_loc,
753                                                    base,
754                                                    MemOperand(temp.X()),
755                                                    /* needs_null_check= */ false,
756                                                    is_volatile);
757   } else {
758     // Other cases.
759     MemOperand mem_op(base.X(), offset);
760     if (is_volatile) {
761       codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check= */ true);
762     } else {
763       codegen->Load(type, trg, mem_op);
764     }
765 
766     if (type == DataType::Type::kReference) {
767       DCHECK(trg.IsW());
768       codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
769     }
770   }
771 }
772 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)773 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
774   bool can_call = kEmitCompilerReadBarrier &&
775       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
776        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
777   LocationSummary* locations =
778       new (allocator) LocationSummary(invoke,
779                                       can_call
780                                           ? LocationSummary::kCallOnSlowPath
781                                           : LocationSummary::kNoCall,
782                                       kIntrinsified);
783   if (can_call && kUseBakerReadBarrier) {
784     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
785     // We need a temporary register for the read barrier load in order to use
786     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
787     locations->AddTemp(FixedTempLocation());
788   }
789   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
790   locations->SetInAt(1, Location::RequiresRegister());
791   locations->SetInAt(2, Location::RequiresRegister());
792   locations->SetOut(Location::RequiresRegister(),
793                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
794 }
795 
VisitUnsafeGet(HInvoke * invoke)796 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
797   CreateIntIntIntToIntLocations(allocator_, invoke);
798 }
VisitUnsafeGetVolatile(HInvoke * invoke)799 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
800   CreateIntIntIntToIntLocations(allocator_, invoke);
801 }
VisitUnsafeGetLong(HInvoke * invoke)802 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
803   CreateIntIntIntToIntLocations(allocator_, invoke);
804 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)805 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
806   CreateIntIntIntToIntLocations(allocator_, invoke);
807 }
VisitUnsafeGetObject(HInvoke * invoke)808 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
809   CreateIntIntIntToIntLocations(allocator_, invoke);
810 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)811 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
812   CreateIntIntIntToIntLocations(allocator_, invoke);
813 }
814 
VisitUnsafeGet(HInvoke * invoke)815 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
816   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
817 }
VisitUnsafeGetVolatile(HInvoke * invoke)818 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
819   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
820 }
VisitUnsafeGetLong(HInvoke * invoke)821 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
822   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
823 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)824 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
825   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
826 }
VisitUnsafeGetObject(HInvoke * invoke)827 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
828   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
829 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)830 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
831   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
832 }
833 
CreateIntIntIntIntToVoid(ArenaAllocator * allocator,HInvoke * invoke)834 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) {
835   LocationSummary* locations =
836       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
837   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
838   locations->SetInAt(1, Location::RequiresRegister());
839   locations->SetInAt(2, Location::RequiresRegister());
840   locations->SetInAt(3, Location::RequiresRegister());
841 }
842 
VisitUnsafePut(HInvoke * invoke)843 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
844   CreateIntIntIntIntToVoid(allocator_, invoke);
845 }
VisitUnsafePutOrdered(HInvoke * invoke)846 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
847   CreateIntIntIntIntToVoid(allocator_, invoke);
848 }
VisitUnsafePutVolatile(HInvoke * invoke)849 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
850   CreateIntIntIntIntToVoid(allocator_, invoke);
851 }
VisitUnsafePutObject(HInvoke * invoke)852 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
853   CreateIntIntIntIntToVoid(allocator_, invoke);
854 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)855 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
856   CreateIntIntIntIntToVoid(allocator_, invoke);
857 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)858 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
859   CreateIntIntIntIntToVoid(allocator_, invoke);
860 }
VisitUnsafePutLong(HInvoke * invoke)861 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
862   CreateIntIntIntIntToVoid(allocator_, invoke);
863 }
VisitUnsafePutLongOrdered(HInvoke * invoke)864 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
865   CreateIntIntIntIntToVoid(allocator_, invoke);
866 }
VisitUnsafePutLongVolatile(HInvoke * invoke)867 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
868   CreateIntIntIntIntToVoid(allocator_, invoke);
869 }
870 
GenUnsafePut(HInvoke * invoke,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARM64 * codegen)871 static void GenUnsafePut(HInvoke* invoke,
872                          DataType::Type type,
873                          bool is_volatile,
874                          bool is_ordered,
875                          CodeGeneratorARM64* codegen) {
876   LocationSummary* locations = invoke->GetLocations();
877   MacroAssembler* masm = codegen->GetVIXLAssembler();
878 
879   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
880   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
881   Register value = RegisterFrom(locations->InAt(3), type);
882   Register source = value;
883   MemOperand mem_op(base.X(), offset);
884 
885   {
886     // We use a block to end the scratch scope before the write barrier, thus
887     // freeing the temporary registers so they can be used in `MarkGCCard`.
888     UseScratchRegisterScope temps(masm);
889 
890     if (kPoisonHeapReferences && type == DataType::Type::kReference) {
891       DCHECK(value.IsW());
892       Register temp = temps.AcquireW();
893       __ Mov(temp.W(), value.W());
894       codegen->GetAssembler()->PoisonHeapReference(temp.W());
895       source = temp;
896     }
897 
898     if (is_volatile || is_ordered) {
899       codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false);
900     } else {
901       codegen->Store(type, source, mem_op);
902     }
903   }
904 
905   if (type == DataType::Type::kReference) {
906     bool value_can_be_null = true;  // TODO: Worth finding out this information?
907     codegen->MarkGCCard(base, value, value_can_be_null);
908   }
909 }
910 
VisitUnsafePut(HInvoke * invoke)911 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
912   GenUnsafePut(invoke,
913                DataType::Type::kInt32,
914                /* is_volatile= */ false,
915                /* is_ordered= */ false,
916                codegen_);
917 }
VisitUnsafePutOrdered(HInvoke * invoke)918 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
919   GenUnsafePut(invoke,
920                DataType::Type::kInt32,
921                /* is_volatile= */ false,
922                /* is_ordered= */ true,
923                codegen_);
924 }
VisitUnsafePutVolatile(HInvoke * invoke)925 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
926   GenUnsafePut(invoke,
927                DataType::Type::kInt32,
928                /* is_volatile= */ true,
929                /* is_ordered= */ false,
930                codegen_);
931 }
VisitUnsafePutObject(HInvoke * invoke)932 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
933   GenUnsafePut(invoke,
934                DataType::Type::kReference,
935                /* is_volatile= */ false,
936                /* is_ordered= */ false,
937                codegen_);
938 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)939 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
940   GenUnsafePut(invoke,
941                DataType::Type::kReference,
942                /* is_volatile= */ false,
943                /* is_ordered= */ true,
944                codegen_);
945 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)946 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
947   GenUnsafePut(invoke,
948                DataType::Type::kReference,
949                /* is_volatile= */ true,
950                /* is_ordered= */ false,
951                codegen_);
952 }
VisitUnsafePutLong(HInvoke * invoke)953 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
954   GenUnsafePut(invoke,
955                DataType::Type::kInt64,
956                /* is_volatile= */ false,
957                /* is_ordered= */ false,
958                codegen_);
959 }
VisitUnsafePutLongOrdered(HInvoke * invoke)960 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
961   GenUnsafePut(invoke,
962                DataType::Type::kInt64,
963                /* is_volatile= */ false,
964                /* is_ordered= */ true,
965                codegen_);
966 }
VisitUnsafePutLongVolatile(HInvoke * invoke)967 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
968   GenUnsafePut(invoke,
969                DataType::Type::kInt64,
970                /* is_volatile= */ true,
971                /* is_ordered= */ false,
972                codegen_);
973 }
974 
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type)975 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
976                                        HInvoke* invoke,
977                                        DataType::Type type) {
978   bool can_call = kEmitCompilerReadBarrier &&
979       kUseBakerReadBarrier &&
980       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
981   LocationSummary* locations =
982       new (allocator) LocationSummary(invoke,
983                                       can_call
984                                           ? LocationSummary::kCallOnSlowPath
985                                           : LocationSummary::kNoCall,
986                                       kIntrinsified);
987   if (can_call) {
988     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
989   }
990   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
991   locations->SetInAt(1, Location::RequiresRegister());
992   locations->SetInAt(2, Location::RequiresRegister());
993   locations->SetInAt(3, Location::RequiresRegister());
994   locations->SetInAt(4, Location::RequiresRegister());
995 
996   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
997   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
998     // We need two non-scratch temporary registers for (Baker) read barrier.
999     locations->AddTemp(Location::RequiresRegister());
1000     locations->AddTemp(Location::RequiresRegister());
1001   }
1002 }
1003 
1004 class BakerReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 {
1005  public:
BakerReadBarrierCasSlowPathARM64(HInvoke * invoke)1006   explicit BakerReadBarrierCasSlowPathARM64(HInvoke* invoke)
1007       : SlowPathCodeARM64(invoke) {}
1008 
GetDescription() const1009   const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARM64"; }
1010 
EmitNativeCode(CodeGenerator * codegen)1011   void EmitNativeCode(CodeGenerator* codegen) override {
1012     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1013     Arm64Assembler* assembler = arm64_codegen->GetAssembler();
1014     MacroAssembler* masm = assembler->GetVIXLAssembler();
1015     __ Bind(GetEntryLabel());
1016 
1017     // Get the locations.
1018     LocationSummary* locations = instruction_->GetLocations();
1019     Register base = WRegisterFrom(locations->InAt(1));              // Object pointer.
1020     Register offset = XRegisterFrom(locations->InAt(2));            // Long offset.
1021     Register expected = WRegisterFrom(locations->InAt(3));          // Expected.
1022     Register value = WRegisterFrom(locations->InAt(4));             // Value.
1023 
1024     Register old_value = WRegisterFrom(locations->GetTemp(0));      // The old value from main path.
1025     Register marked = WRegisterFrom(locations->GetTemp(1));         // The marked old value.
1026 
1027     // Mark the `old_value` from the main path and compare with `expected`. This clobbers the
1028     // `tmp_ptr` scratch register but we do not want to allocate another non-scratch temporary.
1029     arm64_codegen->GenerateUnsafeCasOldValueMovWithBakerReadBarrier(marked, old_value);
1030     __ Cmp(marked, expected);
1031     __ B(GetExitLabel(), ne);  // If taken, Z=false indicates failure.
1032 
1033     // The `old_value` we have read did not match `expected` (which is always a to-space reference)
1034     // but after the read barrier in GenerateUnsafeCasOldValueMovWithBakerReadBarrier() the marked
1035     // to-space value matched, so the `old_value` must be a from-space reference to the same
1036     // object. Do the same CAS loop as the main path but check for both `expected` and the unmarked
1037     // old value representing the to-space and from-space references for the same object.
1038 
1039     UseScratchRegisterScope temps(masm);
1040     Register tmp_ptr = temps.AcquireX();
1041     Register tmp = temps.AcquireSameSizeAs(value);
1042 
1043     // Recalculate the `tmp_ptr` clobbered above.
1044     __ Add(tmp_ptr, base.X(), Operand(offset));
1045 
1046     // do {
1047     //   tmp_value = [tmp_ptr];
1048     // } while ((tmp_value == expected || tmp == old_value) && failure([tmp_ptr] <- r_new_value));
1049     // result = (tmp_value == expected || tmp == old_value);
1050 
1051     vixl::aarch64::Label loop_head;
1052     __ Bind(&loop_head);
1053     __ Ldaxr(tmp, MemOperand(tmp_ptr));
1054     assembler->MaybeUnpoisonHeapReference(tmp);
1055     __ Cmp(tmp, expected);
1056     __ Ccmp(tmp, old_value, ZFlag, ne);
1057     __ B(GetExitLabel(), ne);  // If taken, Z=false indicates failure.
1058     assembler->MaybePoisonHeapReference(value);
1059     __ Stlxr(tmp.W(), value, MemOperand(tmp_ptr));
1060     assembler->MaybeUnpoisonHeapReference(value);
1061     __ Cbnz(tmp.W(), &loop_head);
1062 
1063     // Z=true from the above CMP+CCMP indicates success.
1064     __ B(GetExitLabel());
1065   }
1066 };
1067 
GenCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARM64 * codegen)1068 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) {
1069   Arm64Assembler* assembler = codegen->GetAssembler();
1070   MacroAssembler* masm = assembler->GetVIXLAssembler();
1071   LocationSummary* locations = invoke->GetLocations();
1072 
1073   Register out = WRegisterFrom(locations->Out());                 // Boolean result.
1074   Register base = WRegisterFrom(locations->InAt(1));              // Object pointer.
1075   Register offset = XRegisterFrom(locations->InAt(2));            // Long offset.
1076   Register expected = RegisterFrom(locations->InAt(3), type);     // Expected.
1077   Register value = RegisterFrom(locations->InAt(4), type);        // Value.
1078 
1079   // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
1080   if (type == DataType::Type::kReference) {
1081     // Mark card for object assuming new value is stored.
1082     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1083     codegen->MarkGCCard(base, value, value_can_be_null);
1084   }
1085 
1086   UseScratchRegisterScope temps(masm);
1087   Register tmp_ptr = temps.AcquireX();                             // Pointer to actual memory.
1088   Register old_value;                                              // Value in memory.
1089 
1090   vixl::aarch64::Label exit_loop_label;
1091   vixl::aarch64::Label* exit_loop = &exit_loop_label;
1092   vixl::aarch64::Label* failure = &exit_loop_label;
1093 
1094   if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
1095     // The only read barrier implementation supporting the
1096     // UnsafeCASObject intrinsic is the Baker-style read barriers.
1097     DCHECK(kUseBakerReadBarrier);
1098 
1099     BakerReadBarrierCasSlowPathARM64* slow_path =
1100         new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARM64(invoke);
1101     codegen->AddSlowPath(slow_path);
1102     exit_loop = slow_path->GetExitLabel();
1103     failure = slow_path->GetEntryLabel();
1104     // We need to store the `old_value` in a non-scratch register to make sure
1105     // the Baker read barrier in the slow path does not clobber it.
1106     old_value = WRegisterFrom(locations->GetTemp(0));
1107   } else {
1108     old_value = temps.AcquireSameSizeAs(value);
1109   }
1110 
1111   __ Add(tmp_ptr, base.X(), Operand(offset));
1112 
1113   // do {
1114   //   tmp_value = [tmp_ptr];
1115   // } while (tmp_value == expected && failure([tmp_ptr] <- r_new_value));
1116   // result = tmp_value == expected;
1117 
1118   vixl::aarch64::Label loop_head;
1119   __ Bind(&loop_head);
1120   __ Ldaxr(old_value, MemOperand(tmp_ptr));
1121   if (type == DataType::Type::kReference) {
1122     assembler->MaybeUnpoisonHeapReference(old_value);
1123   }
1124   __ Cmp(old_value, expected);
1125   __ B(failure, ne);
1126   if (type == DataType::Type::kReference) {
1127     assembler->MaybePoisonHeapReference(value);
1128   }
1129   __ Stlxr(old_value.W(), value, MemOperand(tmp_ptr));  // Reuse `old_value` for STLXR result.
1130   if (type == DataType::Type::kReference) {
1131     assembler->MaybeUnpoisonHeapReference(value);
1132   }
1133   __ Cbnz(old_value.W(), &loop_head);
1134   __ Bind(exit_loop);
1135   __ Cset(out, eq);
1136 }
1137 
VisitUnsafeCASInt(HInvoke * invoke)1138 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1139   CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt32);
1140 }
VisitUnsafeCASLong(HInvoke * invoke)1141 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1142   CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt64);
1143 }
VisitUnsafeCASObject(HInvoke * invoke)1144 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1145   // The only read barrier implementation supporting the
1146   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1147   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1148     return;
1149   }
1150 
1151   CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kReference);
1152 }
1153 
VisitUnsafeCASInt(HInvoke * invoke)1154 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1155   GenCas(invoke, DataType::Type::kInt32, codegen_);
1156 }
VisitUnsafeCASLong(HInvoke * invoke)1157 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1158   GenCas(invoke, DataType::Type::kInt64, codegen_);
1159 }
VisitUnsafeCASObject(HInvoke * invoke)1160 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1161   // The only read barrier implementation supporting the
1162   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1163   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1164 
1165   GenCas(invoke, DataType::Type::kReference, codegen_);
1166 }
1167 
VisitStringCompareTo(HInvoke * invoke)1168 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
1169   LocationSummary* locations =
1170       new (allocator_) LocationSummary(invoke,
1171                                        invoke->InputAt(1)->CanBeNull()
1172                                            ? LocationSummary::kCallOnSlowPath
1173                                            : LocationSummary::kNoCall,
1174                                        kIntrinsified);
1175   locations->SetInAt(0, Location::RequiresRegister());
1176   locations->SetInAt(1, Location::RequiresRegister());
1177   locations->AddTemp(Location::RequiresRegister());
1178   locations->AddTemp(Location::RequiresRegister());
1179   locations->AddTemp(Location::RequiresRegister());
1180   // Need temporary registers for String compression's feature.
1181   if (mirror::kUseStringCompression) {
1182     locations->AddTemp(Location::RequiresRegister());
1183   }
1184   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1185 }
1186 
VisitStringCompareTo(HInvoke * invoke)1187 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
1188   MacroAssembler* masm = GetVIXLAssembler();
1189   LocationSummary* locations = invoke->GetLocations();
1190 
1191   Register str = InputRegisterAt(invoke, 0);
1192   Register arg = InputRegisterAt(invoke, 1);
1193   DCHECK(str.IsW());
1194   DCHECK(arg.IsW());
1195   Register out = OutputRegister(invoke);
1196 
1197   Register temp0 = WRegisterFrom(locations->GetTemp(0));
1198   Register temp1 = WRegisterFrom(locations->GetTemp(1));
1199   Register temp2 = WRegisterFrom(locations->GetTemp(2));
1200   Register temp3;
1201   if (mirror::kUseStringCompression) {
1202     temp3 = WRegisterFrom(locations->GetTemp(3));
1203   }
1204 
1205   vixl::aarch64::Label loop;
1206   vixl::aarch64::Label find_char_diff;
1207   vixl::aarch64::Label end;
1208   vixl::aarch64::Label different_compression;
1209 
1210   // Get offsets of count and value fields within a string object.
1211   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1212   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1213 
1214   // Note that the null check must have been done earlier.
1215   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1216 
1217   // Take slow path and throw if input can be and is null.
1218   SlowPathCodeARM64* slow_path = nullptr;
1219   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1220   if (can_slow_path) {
1221     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1222     codegen_->AddSlowPath(slow_path);
1223     __ Cbz(arg, slow_path->GetEntryLabel());
1224   }
1225 
1226   // Reference equality check, return 0 if same reference.
1227   __ Subs(out, str, arg);
1228   __ B(&end, eq);
1229 
1230   if (mirror::kUseStringCompression) {
1231     // Load `count` fields of this and argument strings.
1232     __ Ldr(temp3, HeapOperand(str, count_offset));
1233     __ Ldr(temp2, HeapOperand(arg, count_offset));
1234     // Clean out compression flag from lengths.
1235     __ Lsr(temp0, temp3, 1u);
1236     __ Lsr(temp1, temp2, 1u);
1237   } else {
1238     // Load lengths of this and argument strings.
1239     __ Ldr(temp0, HeapOperand(str, count_offset));
1240     __ Ldr(temp1, HeapOperand(arg, count_offset));
1241   }
1242   // out = length diff.
1243   __ Subs(out, temp0, temp1);
1244   // temp0 = min(len(str), len(arg)).
1245   __ Csel(temp0, temp1, temp0, ge);
1246   // Shorter string is empty?
1247   __ Cbz(temp0, &end);
1248 
1249   if (mirror::kUseStringCompression) {
1250     // Check if both strings using same compression style to use this comparison loop.
1251     __ Eor(temp2, temp2, Operand(temp3));
1252     // Interleave with compression flag extraction which is needed for both paths
1253     // and also set flags which is needed only for the different compressions path.
1254     __ Ands(temp3.W(), temp3.W(), Operand(1));
1255     __ Tbnz(temp2, 0, &different_compression);  // Does not use flags.
1256   }
1257   // Store offset of string value in preparation for comparison loop.
1258   __ Mov(temp1, value_offset);
1259   if (mirror::kUseStringCompression) {
1260     // For string compression, calculate the number of bytes to compare (not chars).
1261     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1262     __ Lsl(temp0, temp0, temp3);
1263   }
1264 
1265   UseScratchRegisterScope scratch_scope(masm);
1266   Register temp4 = scratch_scope.AcquireX();
1267 
1268   // Assertions that must hold in order to compare strings 8 bytes at a time.
1269   DCHECK_ALIGNED(value_offset, 8);
1270   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1271 
1272   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1273   DCHECK_EQ(char_size, 2u);
1274 
1275   // Promote temp2 to an X reg, ready for LDR.
1276   temp2 = temp2.X();
1277 
1278   // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
1279   __ Bind(&loop);
1280   __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
1281   __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
1282   __ Cmp(temp4, temp2);
1283   __ B(ne, &find_char_diff);
1284   __ Add(temp1, temp1, char_size * 4);
1285   // With string compression, we have compared 8 bytes, otherwise 4 chars.
1286   __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
1287   __ B(&loop, hi);
1288   __ B(&end);
1289 
1290   // Promote temp1 to an X reg, ready for EOR.
1291   temp1 = temp1.X();
1292 
1293   // Find the single character difference.
1294   __ Bind(&find_char_diff);
1295   // Get the bit position of the first character that differs.
1296   __ Eor(temp1, temp2, temp4);
1297   __ Rbit(temp1, temp1);
1298   __ Clz(temp1, temp1);
1299 
1300   // If the number of chars remaining <= the index where the difference occurs (0-3), then
1301   // the difference occurs outside the remaining string data, so just return length diff (out).
1302   // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
1303   // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
1304   // unsigned when string compression is disabled.
1305   // When it's enabled, the comparison must be unsigned.
1306   __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
1307   __ B(ls, &end);
1308 
1309   // Extract the characters and calculate the difference.
1310   if (mirror:: kUseStringCompression) {
1311     __ Bic(temp1, temp1, 0x7);
1312     __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
1313   } else {
1314     __ Bic(temp1, temp1, 0xf);
1315   }
1316   __ Lsr(temp2, temp2, temp1);
1317   __ Lsr(temp4, temp4, temp1);
1318   if (mirror::kUseStringCompression) {
1319     // Prioritize the case of compressed strings and calculate such result first.
1320     __ Uxtb(temp1, temp4);
1321     __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
1322     __ Tbz(temp3, 0u, &end);  // If actually compressed, we're done.
1323   }
1324   __ Uxth(temp4, temp4);
1325   __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
1326 
1327   if (mirror::kUseStringCompression) {
1328     __ B(&end);
1329     __ Bind(&different_compression);
1330 
1331     // Comparison for different compression style.
1332     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1333     DCHECK_EQ(c_char_size, 1u);
1334     temp1 = temp1.W();
1335     temp2 = temp2.W();
1336     temp4 = temp4.W();
1337 
1338     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1339     // Note that flags have been set by the `str` compression flag extraction to `temp3`
1340     // before branching to the `different_compression` label.
1341     __ Csel(temp1, str, arg, eq);   // Pointer to the compressed string.
1342     __ Csel(temp2, str, arg, ne);   // Pointer to the uncompressed string.
1343 
1344     // We want to free up the temp3, currently holding `str` compression flag, for comparison.
1345     // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
1346     // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
1347     // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1348     __ Lsl(temp0, temp0, 1u);
1349 
1350     // Adjust temp1 and temp2 from string pointers to data pointers.
1351     __ Add(temp1, temp1, Operand(value_offset));
1352     __ Add(temp2, temp2, Operand(value_offset));
1353 
1354     // Complete the move of the compression flag.
1355     __ Sub(temp0, temp0, Operand(temp3));
1356 
1357     vixl::aarch64::Label different_compression_loop;
1358     vixl::aarch64::Label different_compression_diff;
1359 
1360     __ Bind(&different_compression_loop);
1361     __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
1362     __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
1363     __ Subs(temp4, temp4, Operand(temp3));
1364     __ B(&different_compression_diff, ne);
1365     __ Subs(temp0, temp0, 2);
1366     __ B(&different_compression_loop, hi);
1367     __ B(&end);
1368 
1369     // Calculate the difference.
1370     __ Bind(&different_compression_diff);
1371     __ Tst(temp0, Operand(1));
1372     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1373                   "Expecting 0=compressed, 1=uncompressed");
1374     __ Cneg(out, temp4, ne);
1375   }
1376 
1377   __ Bind(&end);
1378 
1379   if (can_slow_path) {
1380     __ Bind(slow_path->GetExitLabel());
1381   }
1382 }
1383 
1384 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1385 // The normal loop plus the pre-header is 9 instructions without string compression and 12
1386 // instructions with string compression. We can compare up to 8 bytes in 4 instructions
1387 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
1388 // to 10 instructions for the unrolled loop.
1389 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
1390 
GetConstString(HInstruction * candidate,uint32_t * utf16_length)1391 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1392   if (candidate->IsLoadString()) {
1393     HLoadString* load_string = candidate->AsLoadString();
1394     const DexFile& dex_file = load_string->GetDexFile();
1395     return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1396   }
1397   return nullptr;
1398 }
1399 
VisitStringEquals(HInvoke * invoke)1400 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
1401   LocationSummary* locations =
1402       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1403   locations->SetInAt(0, Location::RequiresRegister());
1404   locations->SetInAt(1, Location::RequiresRegister());
1405 
1406   // For the generic implementation and for long const strings we need a temporary.
1407   // We do not need it for short const strings, up to 8 bytes, see code generation below.
1408   uint32_t const_string_length = 0u;
1409   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1410   if (const_string == nullptr) {
1411     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1412   }
1413   bool is_compressed =
1414       mirror::kUseStringCompression &&
1415       const_string != nullptr &&
1416       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1417   if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
1418     locations->AddTemp(Location::RequiresRegister());
1419   }
1420 
1421   // TODO: If the String.equals() is used only for an immediately following HIf, we can
1422   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1423   // Then we shall need an extra temporary register instead of the output register.
1424   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1425 }
1426 
VisitStringEquals(HInvoke * invoke)1427 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
1428   MacroAssembler* masm = GetVIXLAssembler();
1429   LocationSummary* locations = invoke->GetLocations();
1430 
1431   Register str = WRegisterFrom(locations->InAt(0));
1432   Register arg = WRegisterFrom(locations->InAt(1));
1433   Register out = XRegisterFrom(locations->Out());
1434 
1435   UseScratchRegisterScope scratch_scope(masm);
1436   Register temp = scratch_scope.AcquireW();
1437   Register temp1 = scratch_scope.AcquireW();
1438 
1439   vixl::aarch64::Label loop;
1440   vixl::aarch64::Label end;
1441   vixl::aarch64::Label return_true;
1442   vixl::aarch64::Label return_false;
1443 
1444   // Get offsets of count, value, and class fields within a string object.
1445   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1446   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1447   const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1448 
1449   // Note that the null check must have been done earlier.
1450   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1451 
1452   StringEqualsOptimizations optimizations(invoke);
1453   if (!optimizations.GetArgumentNotNull()) {
1454     // Check if input is null, return false if it is.
1455     __ Cbz(arg, &return_false);
1456   }
1457 
1458   // Reference equality check, return true if same reference.
1459   __ Cmp(str, arg);
1460   __ B(&return_true, eq);
1461 
1462   if (!optimizations.GetArgumentIsString()) {
1463     // Instanceof check for the argument by comparing class fields.
1464     // All string objects must have the same type since String cannot be subclassed.
1465     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1466     // If the argument is a string object, its class field must be equal to receiver's class field.
1467     //
1468     // As the String class is expected to be non-movable, we can read the class
1469     // field from String.equals' arguments without read barriers.
1470     AssertNonMovableStringClass();
1471     // /* HeapReference<Class> */ temp = str->klass_
1472     __ Ldr(temp, MemOperand(str.X(), class_offset));
1473     // /* HeapReference<Class> */ temp1 = arg->klass_
1474     __ Ldr(temp1, MemOperand(arg.X(), class_offset));
1475     // Also, because we use the previously loaded class references only in the
1476     // following comparison, we don't need to unpoison them.
1477     __ Cmp(temp, temp1);
1478     __ B(&return_false, ne);
1479   }
1480 
1481   // Check if one of the inputs is a const string. Do not special-case both strings
1482   // being const, such cases should be handled by constant folding if needed.
1483   uint32_t const_string_length = 0u;
1484   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1485   if (const_string == nullptr) {
1486     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1487     if (const_string != nullptr) {
1488       std::swap(str, arg);  // Make sure the const string is in `str`.
1489     }
1490   }
1491   bool is_compressed =
1492       mirror::kUseStringCompression &&
1493       const_string != nullptr &&
1494       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1495 
1496   if (const_string != nullptr) {
1497     // Load `count` field of the argument string and check if it matches the const string.
1498     // Also compares the compression style, if differs return false.
1499     __ Ldr(temp, MemOperand(arg.X(), count_offset));
1500     // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
1501     scratch_scope.Release(temp1);
1502     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1503     temp1 = scratch_scope.AcquireW();
1504     __ B(&return_false, ne);
1505   } else {
1506     // Load `count` fields of this and argument strings.
1507     __ Ldr(temp, MemOperand(str.X(), count_offset));
1508     __ Ldr(temp1, MemOperand(arg.X(), count_offset));
1509     // Check if `count` fields are equal, return false if they're not.
1510     // Also compares the compression style, if differs return false.
1511     __ Cmp(temp, temp1);
1512     __ B(&return_false, ne);
1513   }
1514 
1515   // Assertions that must hold in order to compare strings 8 bytes at a time.
1516   // Ok to do this because strings are zero-padded to kObjectAlignment.
1517   DCHECK_ALIGNED(value_offset, 8);
1518   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1519 
1520   if (const_string != nullptr &&
1521       const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1522                                             : kShortConstStringEqualsCutoffInBytes / 2u)) {
1523     // Load and compare the contents. Though we know the contents of the short const string
1524     // at compile time, materializing constants may be more code than loading from memory.
1525     int32_t offset = value_offset;
1526     size_t remaining_bytes =
1527         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
1528     temp = temp.X();
1529     temp1 = temp1.X();
1530     while (remaining_bytes > sizeof(uint64_t)) {
1531       Register temp2 = XRegisterFrom(locations->GetTemp(0));
1532       __ Ldp(temp, temp1, MemOperand(str.X(), offset));
1533       __ Ldp(temp2, out, MemOperand(arg.X(), offset));
1534       __ Cmp(temp, temp2);
1535       __ Ccmp(temp1, out, NoFlag, eq);
1536       __ B(&return_false, ne);
1537       offset += 2u * sizeof(uint64_t);
1538       remaining_bytes -= 2u * sizeof(uint64_t);
1539     }
1540     if (remaining_bytes != 0u) {
1541       __ Ldr(temp, MemOperand(str.X(), offset));
1542       __ Ldr(temp1, MemOperand(arg.X(), offset));
1543       __ Cmp(temp, temp1);
1544       __ B(&return_false, ne);
1545     }
1546   } else {
1547     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1548     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1549                   "Expecting 0=compressed, 1=uncompressed");
1550     __ Cbz(temp, &return_true);
1551 
1552     if (mirror::kUseStringCompression) {
1553       // For string compression, calculate the number of bytes to compare (not chars).
1554       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1555       __ And(temp1, temp, Operand(1));    // Extract compression flag.
1556       __ Lsr(temp, temp, 1u);             // Extract length.
1557       __ Lsl(temp, temp, temp1);          // Calculate number of bytes to compare.
1558     }
1559 
1560     // Store offset of string value in preparation for comparison loop
1561     __ Mov(temp1, value_offset);
1562 
1563     temp1 = temp1.X();
1564     Register temp2 = XRegisterFrom(locations->GetTemp(0));
1565     // Loop to compare strings 8 bytes at a time starting at the front of the string.
1566     __ Bind(&loop);
1567     __ Ldr(out, MemOperand(str.X(), temp1));
1568     __ Ldr(temp2, MemOperand(arg.X(), temp1));
1569     __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
1570     __ Cmp(out, temp2);
1571     __ B(&return_false, ne);
1572     // With string compression, we have compared 8 bytes, otherwise 4 chars.
1573     __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
1574     __ B(&loop, hi);
1575   }
1576 
1577   // Return true and exit the function.
1578   // If loop does not result in returning false, we return true.
1579   __ Bind(&return_true);
1580   __ Mov(out, 1);
1581   __ B(&end);
1582 
1583   // Return false and exit the function.
1584   __ Bind(&return_false);
1585   __ Mov(out, 0);
1586   __ Bind(&end);
1587 }
1588 
GenerateVisitStringIndexOf(HInvoke * invoke,MacroAssembler * masm,CodeGeneratorARM64 * codegen,bool start_at_zero)1589 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1590                                        MacroAssembler* masm,
1591                                        CodeGeneratorARM64* codegen,
1592                                        bool start_at_zero) {
1593   LocationSummary* locations = invoke->GetLocations();
1594 
1595   // Note that the null check must have been done earlier.
1596   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1597 
1598   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1599   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1600   SlowPathCodeARM64* slow_path = nullptr;
1601   HInstruction* code_point = invoke->InputAt(1);
1602   if (code_point->IsIntConstant()) {
1603     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
1604       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1605       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1606       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1607       codegen->AddSlowPath(slow_path);
1608       __ B(slow_path->GetEntryLabel());
1609       __ Bind(slow_path->GetExitLabel());
1610       return;
1611     }
1612   } else if (code_point->GetType() != DataType::Type::kUint16) {
1613     Register char_reg = WRegisterFrom(locations->InAt(1));
1614     __ Tst(char_reg, 0xFFFF0000);
1615     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1616     codegen->AddSlowPath(slow_path);
1617     __ B(ne, slow_path->GetEntryLabel());
1618   }
1619 
1620   if (start_at_zero) {
1621     // Start-index = 0.
1622     Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
1623     __ Mov(tmp_reg, 0);
1624   }
1625 
1626   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1627   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1628 
1629   if (slow_path != nullptr) {
1630     __ Bind(slow_path->GetExitLabel());
1631   }
1632 }
1633 
VisitStringIndexOf(HInvoke * invoke)1634 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
1635   LocationSummary* locations = new (allocator_) LocationSummary(
1636       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1637   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1638   // best to align the inputs accordingly.
1639   InvokeRuntimeCallingConvention calling_convention;
1640   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1641   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1642   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
1643 
1644   // Need to send start_index=0.
1645   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1646 }
1647 
VisitStringIndexOf(HInvoke * invoke)1648 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
1649   GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true);
1650 }
1651 
VisitStringIndexOfAfter(HInvoke * invoke)1652 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1653   LocationSummary* locations = new (allocator_) LocationSummary(
1654       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1655   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1656   // best to align the inputs accordingly.
1657   InvokeRuntimeCallingConvention calling_convention;
1658   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1659   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1660   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1661   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
1662 }
1663 
VisitStringIndexOfAfter(HInvoke * invoke)1664 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1665   GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false);
1666 }
1667 
VisitStringNewStringFromBytes(HInvoke * invoke)1668 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1669   LocationSummary* locations = new (allocator_) LocationSummary(
1670       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1671   InvokeRuntimeCallingConvention calling_convention;
1672   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1673   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1674   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1675   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1676   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1677 }
1678 
VisitStringNewStringFromBytes(HInvoke * invoke)1679 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1680   MacroAssembler* masm = GetVIXLAssembler();
1681   LocationSummary* locations = invoke->GetLocations();
1682 
1683   Register byte_array = WRegisterFrom(locations->InAt(0));
1684   __ Cmp(byte_array, 0);
1685   SlowPathCodeARM64* slow_path =
1686       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1687   codegen_->AddSlowPath(slow_path);
1688   __ B(eq, slow_path->GetEntryLabel());
1689 
1690   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1691   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1692   __ Bind(slow_path->GetExitLabel());
1693 }
1694 
VisitStringNewStringFromChars(HInvoke * invoke)1695 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
1696   LocationSummary* locations =
1697       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1698   InvokeRuntimeCallingConvention calling_convention;
1699   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1700   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1701   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1702   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1703 }
1704 
VisitStringNewStringFromChars(HInvoke * invoke)1705 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
1706   // No need to emit code checking whether `locations->InAt(2)` is a null
1707   // pointer, as callers of the native method
1708   //
1709   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1710   //
1711   // all include a null check on `data` before calling that method.
1712   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1713   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1714 }
1715 
VisitStringNewStringFromString(HInvoke * invoke)1716 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
1717   LocationSummary* locations = new (allocator_) LocationSummary(
1718       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1719   InvokeRuntimeCallingConvention calling_convention;
1720   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1721   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1722 }
1723 
VisitStringNewStringFromString(HInvoke * invoke)1724 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
1725   MacroAssembler* masm = GetVIXLAssembler();
1726   LocationSummary* locations = invoke->GetLocations();
1727 
1728   Register string_to_copy = WRegisterFrom(locations->InAt(0));
1729   __ Cmp(string_to_copy, 0);
1730   SlowPathCodeARM64* slow_path =
1731       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1732   codegen_->AddSlowPath(slow_path);
1733   __ B(eq, slow_path->GetEntryLabel());
1734 
1735   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1736   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1737   __ Bind(slow_path->GetExitLabel());
1738 }
1739 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1740 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1741   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1742   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
1743   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
1744 
1745   LocationSummary* const locations =
1746       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1747   InvokeRuntimeCallingConvention calling_convention;
1748 
1749   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
1750   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
1751 }
1752 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1753 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1754   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1755   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
1756   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
1757   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
1758 
1759   LocationSummary* const locations =
1760       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1761   InvokeRuntimeCallingConvention calling_convention;
1762 
1763   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
1764   locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
1765   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
1766 }
1767 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorARM64 * codegen,QuickEntrypointEnum entry)1768 static void GenFPToFPCall(HInvoke* invoke,
1769                           CodeGeneratorARM64* codegen,
1770                           QuickEntrypointEnum entry) {
1771   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1772 }
1773 
VisitMathCos(HInvoke * invoke)1774 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
1775   CreateFPToFPCallLocations(allocator_, invoke);
1776 }
1777 
VisitMathCos(HInvoke * invoke)1778 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
1779   GenFPToFPCall(invoke, codegen_, kQuickCos);
1780 }
1781 
VisitMathSin(HInvoke * invoke)1782 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
1783   CreateFPToFPCallLocations(allocator_, invoke);
1784 }
1785 
VisitMathSin(HInvoke * invoke)1786 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
1787   GenFPToFPCall(invoke, codegen_, kQuickSin);
1788 }
1789 
VisitMathAcos(HInvoke * invoke)1790 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
1791   CreateFPToFPCallLocations(allocator_, invoke);
1792 }
1793 
VisitMathAcos(HInvoke * invoke)1794 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
1795   GenFPToFPCall(invoke, codegen_, kQuickAcos);
1796 }
1797 
VisitMathAsin(HInvoke * invoke)1798 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
1799   CreateFPToFPCallLocations(allocator_, invoke);
1800 }
1801 
VisitMathAsin(HInvoke * invoke)1802 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
1803   GenFPToFPCall(invoke, codegen_, kQuickAsin);
1804 }
1805 
VisitMathAtan(HInvoke * invoke)1806 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
1807   CreateFPToFPCallLocations(allocator_, invoke);
1808 }
1809 
VisitMathAtan(HInvoke * invoke)1810 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
1811   GenFPToFPCall(invoke, codegen_, kQuickAtan);
1812 }
1813 
VisitMathCbrt(HInvoke * invoke)1814 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
1815   CreateFPToFPCallLocations(allocator_, invoke);
1816 }
1817 
VisitMathCbrt(HInvoke * invoke)1818 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
1819   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
1820 }
1821 
VisitMathCosh(HInvoke * invoke)1822 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
1823   CreateFPToFPCallLocations(allocator_, invoke);
1824 }
1825 
VisitMathCosh(HInvoke * invoke)1826 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
1827   GenFPToFPCall(invoke, codegen_, kQuickCosh);
1828 }
1829 
VisitMathExp(HInvoke * invoke)1830 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
1831   CreateFPToFPCallLocations(allocator_, invoke);
1832 }
1833 
VisitMathExp(HInvoke * invoke)1834 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
1835   GenFPToFPCall(invoke, codegen_, kQuickExp);
1836 }
1837 
VisitMathExpm1(HInvoke * invoke)1838 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
1839   CreateFPToFPCallLocations(allocator_, invoke);
1840 }
1841 
VisitMathExpm1(HInvoke * invoke)1842 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
1843   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
1844 }
1845 
VisitMathLog(HInvoke * invoke)1846 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
1847   CreateFPToFPCallLocations(allocator_, invoke);
1848 }
1849 
VisitMathLog(HInvoke * invoke)1850 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
1851   GenFPToFPCall(invoke, codegen_, kQuickLog);
1852 }
1853 
VisitMathLog10(HInvoke * invoke)1854 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
1855   CreateFPToFPCallLocations(allocator_, invoke);
1856 }
1857 
VisitMathLog10(HInvoke * invoke)1858 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
1859   GenFPToFPCall(invoke, codegen_, kQuickLog10);
1860 }
1861 
VisitMathSinh(HInvoke * invoke)1862 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
1863   CreateFPToFPCallLocations(allocator_, invoke);
1864 }
1865 
VisitMathSinh(HInvoke * invoke)1866 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
1867   GenFPToFPCall(invoke, codegen_, kQuickSinh);
1868 }
1869 
VisitMathTan(HInvoke * invoke)1870 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
1871   CreateFPToFPCallLocations(allocator_, invoke);
1872 }
1873 
VisitMathTan(HInvoke * invoke)1874 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
1875   GenFPToFPCall(invoke, codegen_, kQuickTan);
1876 }
1877 
VisitMathTanh(HInvoke * invoke)1878 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
1879   CreateFPToFPCallLocations(allocator_, invoke);
1880 }
1881 
VisitMathTanh(HInvoke * invoke)1882 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
1883   GenFPToFPCall(invoke, codegen_, kQuickTanh);
1884 }
1885 
VisitMathAtan2(HInvoke * invoke)1886 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
1887   CreateFPFPToFPCallLocations(allocator_, invoke);
1888 }
1889 
VisitMathAtan2(HInvoke * invoke)1890 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
1891   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
1892 }
1893 
VisitMathPow(HInvoke * invoke)1894 void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
1895   CreateFPFPToFPCallLocations(allocator_, invoke);
1896 }
1897 
VisitMathPow(HInvoke * invoke)1898 void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
1899   GenFPToFPCall(invoke, codegen_, kQuickPow);
1900 }
1901 
VisitMathHypot(HInvoke * invoke)1902 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
1903   CreateFPFPToFPCallLocations(allocator_, invoke);
1904 }
1905 
VisitMathHypot(HInvoke * invoke)1906 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
1907   GenFPToFPCall(invoke, codegen_, kQuickHypot);
1908 }
1909 
VisitMathNextAfter(HInvoke * invoke)1910 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
1911   CreateFPFPToFPCallLocations(allocator_, invoke);
1912 }
1913 
VisitMathNextAfter(HInvoke * invoke)1914 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
1915   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
1916 }
1917 
VisitStringGetCharsNoCheck(HInvoke * invoke)1918 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1919   LocationSummary* locations =
1920       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1921   locations->SetInAt(0, Location::RequiresRegister());
1922   locations->SetInAt(1, Location::RequiresRegister());
1923   locations->SetInAt(2, Location::RequiresRegister());
1924   locations->SetInAt(3, Location::RequiresRegister());
1925   locations->SetInAt(4, Location::RequiresRegister());
1926 
1927   locations->AddTemp(Location::RequiresRegister());
1928   locations->AddTemp(Location::RequiresRegister());
1929   locations->AddTemp(Location::RequiresRegister());
1930 }
1931 
VisitStringGetCharsNoCheck(HInvoke * invoke)1932 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1933   MacroAssembler* masm = GetVIXLAssembler();
1934   LocationSummary* locations = invoke->GetLocations();
1935 
1936   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1937   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1938   DCHECK_EQ(char_size, 2u);
1939 
1940   // Location of data in char array buffer.
1941   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1942 
1943   // Location of char array data in string.
1944   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1945 
1946   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1947   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
1948   Register srcObj = XRegisterFrom(locations->InAt(0));
1949   Register srcBegin = XRegisterFrom(locations->InAt(1));
1950   Register srcEnd = XRegisterFrom(locations->InAt(2));
1951   Register dstObj = XRegisterFrom(locations->InAt(3));
1952   Register dstBegin = XRegisterFrom(locations->InAt(4));
1953 
1954   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
1955   Register num_chr = XRegisterFrom(locations->GetTemp(1));
1956   Register tmp1 = XRegisterFrom(locations->GetTemp(2));
1957 
1958   UseScratchRegisterScope temps(masm);
1959   Register dst_ptr = temps.AcquireX();
1960   Register tmp2 = temps.AcquireX();
1961 
1962   vixl::aarch64::Label done;
1963   vixl::aarch64::Label compressed_string_loop;
1964   __ Sub(num_chr, srcEnd, srcBegin);
1965   // Early out for valid zero-length retrievals.
1966   __ Cbz(num_chr, &done);
1967 
1968   // dst address start to copy to.
1969   __ Add(dst_ptr, dstObj, Operand(data_offset));
1970   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
1971 
1972   // src address to copy from.
1973   __ Add(src_ptr, srcObj, Operand(value_offset));
1974   vixl::aarch64::Label compressed_string_preloop;
1975   if (mirror::kUseStringCompression) {
1976     // Location of count in string.
1977     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1978     // String's length.
1979     __ Ldr(tmp2, MemOperand(srcObj, count_offset));
1980     __ Tbz(tmp2, 0, &compressed_string_preloop);
1981   }
1982   __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
1983 
1984   // Do the copy.
1985   vixl::aarch64::Label loop;
1986   vixl::aarch64::Label remainder;
1987 
1988   // Save repairing the value of num_chr on the < 8 character path.
1989   __ Subs(tmp1, num_chr, 8);
1990   __ B(lt, &remainder);
1991 
1992   // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
1993   __ Mov(num_chr, tmp1);
1994 
1995   // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
1996   // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
1997   __ Bind(&loop);
1998   __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
1999   __ Subs(num_chr, num_chr, 8);
2000   __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
2001   __ B(ge, &loop);
2002 
2003   __ Adds(num_chr, num_chr, 8);
2004   __ B(eq, &done);
2005 
2006   // Main loop for < 8 character case and remainder handling. Loads and stores one
2007   // 16-bit Java character at a time.
2008   __ Bind(&remainder);
2009   __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
2010   __ Subs(num_chr, num_chr, 1);
2011   __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2012   __ B(gt, &remainder);
2013   __ B(&done);
2014 
2015   if (mirror::kUseStringCompression) {
2016     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2017     DCHECK_EQ(c_char_size, 1u);
2018     __ Bind(&compressed_string_preloop);
2019     __ Add(src_ptr, src_ptr, Operand(srcBegin));
2020     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2021     __ Bind(&compressed_string_loop);
2022     __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
2023     __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2024     __ Subs(num_chr, num_chr, Operand(1));
2025     __ B(gt, &compressed_string_loop);
2026   }
2027 
2028   __ Bind(&done);
2029 }
2030 
2031 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
2032 // implementation there for longer copy lengths.
2033 static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
2034 
SetSystemArrayCopyLocationRequires(LocationSummary * locations,uint32_t at,HInstruction * input)2035 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
2036                                                uint32_t at,
2037                                                HInstruction* input) {
2038   HIntConstant* const_input = input->AsIntConstant();
2039   if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
2040     locations->SetInAt(at, Location::RequiresRegister());
2041   } else {
2042     locations->SetInAt(at, Location::RegisterOrConstant(input));
2043   }
2044 }
2045 
VisitSystemArrayCopyChar(HInvoke * invoke)2046 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2047   // Check to see if we have known failures that will cause us to have to bail out
2048   // to the runtime, and just generate the runtime call directly.
2049   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2050   HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
2051 
2052   // The positions must be non-negative.
2053   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2054       (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
2055     // We will have to fail anyways.
2056     return;
2057   }
2058 
2059   // The length must be >= 0 and not so long that we would (currently) prefer libcore's
2060   // native implementation.
2061   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2062   if (length != nullptr) {
2063     int32_t len = length->GetValue();
2064     if (len < 0 || len > kSystemArrayCopyCharThreshold) {
2065       // Just call as normal.
2066       return;
2067     }
2068   }
2069 
2070   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2071   LocationSummary* locations =
2072       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2073   // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
2074   locations->SetInAt(0, Location::RequiresRegister());
2075   SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2076   locations->SetInAt(2, Location::RequiresRegister());
2077   SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2078   SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2079 
2080   locations->AddTemp(Location::RequiresRegister());
2081   locations->AddTemp(Location::RequiresRegister());
2082   locations->AddTemp(Location::RequiresRegister());
2083 }
2084 
CheckSystemArrayCopyPosition(MacroAssembler * masm,const Location & pos,const Register & input,const Location & length,SlowPathCodeARM64 * slow_path,const Register & temp,bool length_is_input_length=false)2085 static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
2086                                          const Location& pos,
2087                                          const Register& input,
2088                                          const Location& length,
2089                                          SlowPathCodeARM64* slow_path,
2090                                          const Register& temp,
2091                                          bool length_is_input_length = false) {
2092   const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
2093   if (pos.IsConstant()) {
2094     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
2095     if (pos_const == 0) {
2096       if (!length_is_input_length) {
2097         // Check that length(input) >= length.
2098         __ Ldr(temp, MemOperand(input, length_offset));
2099         __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2100         __ B(slow_path->GetEntryLabel(), lt);
2101       }
2102     } else {
2103       // Check that length(input) >= pos.
2104       __ Ldr(temp, MemOperand(input, length_offset));
2105       __ Subs(temp, temp, pos_const);
2106       __ B(slow_path->GetEntryLabel(), lt);
2107 
2108       // Check that (length(input) - pos) >= length.
2109       __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2110       __ B(slow_path->GetEntryLabel(), lt);
2111     }
2112   } else if (length_is_input_length) {
2113     // The only way the copy can succeed is if pos is zero.
2114     __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
2115   } else {
2116     // Check that pos >= 0.
2117     Register pos_reg = WRegisterFrom(pos);
2118     __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
2119 
2120     // Check that pos <= length(input) && (length(input) - pos) >= length.
2121     __ Ldr(temp, MemOperand(input, length_offset));
2122     __ Subs(temp, temp, pos_reg);
2123     // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
2124     __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge);
2125     __ B(slow_path->GetEntryLabel(), lt);
2126   }
2127 }
2128 
2129 // Compute base source address, base destination address, and end
2130 // source address for System.arraycopy* intrinsics in `src_base`,
2131 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(MacroAssembler * masm,DataType::Type type,const Register & src,const Location & src_pos,const Register & dst,const Location & dst_pos,const Location & copy_length,const Register & src_base,const Register & dst_base,const Register & src_end)2132 static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
2133                                         DataType::Type type,
2134                                         const Register& src,
2135                                         const Location& src_pos,
2136                                         const Register& dst,
2137                                         const Location& dst_pos,
2138                                         const Location& copy_length,
2139                                         const Register& src_base,
2140                                         const Register& dst_base,
2141                                         const Register& src_end) {
2142   // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
2143   DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
2144       << "Unexpected element type: " << type;
2145   const int32_t element_size = DataType::Size(type);
2146   const int32_t element_size_shift = DataType::SizeShift(type);
2147   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2148 
2149   if (src_pos.IsConstant()) {
2150     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2151     __ Add(src_base, src, element_size * constant + data_offset);
2152   } else {
2153     __ Add(src_base, src, data_offset);
2154     __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
2155   }
2156 
2157   if (dst_pos.IsConstant()) {
2158     int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
2159     __ Add(dst_base, dst, element_size * constant + data_offset);
2160   } else {
2161     __ Add(dst_base, dst, data_offset);
2162     __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
2163   }
2164 
2165   if (copy_length.IsConstant()) {
2166     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2167     __ Add(src_end, src_base, element_size * constant);
2168   } else {
2169     __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
2170   }
2171 }
2172 
VisitSystemArrayCopyChar(HInvoke * invoke)2173 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2174   MacroAssembler* masm = GetVIXLAssembler();
2175   LocationSummary* locations = invoke->GetLocations();
2176   Register src = XRegisterFrom(locations->InAt(0));
2177   Location src_pos = locations->InAt(1);
2178   Register dst = XRegisterFrom(locations->InAt(2));
2179   Location dst_pos = locations->InAt(3);
2180   Location length = locations->InAt(4);
2181 
2182   SlowPathCodeARM64* slow_path =
2183       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2184   codegen_->AddSlowPath(slow_path);
2185 
2186   // If source and destination are the same, take the slow path. Overlapping copy regions must be
2187   // copied in reverse and we can't know in all cases if it's needed.
2188   __ Cmp(src, dst);
2189   __ B(slow_path->GetEntryLabel(), eq);
2190 
2191   // Bail out if the source is null.
2192   __ Cbz(src, slow_path->GetEntryLabel());
2193 
2194   // Bail out if the destination is null.
2195   __ Cbz(dst, slow_path->GetEntryLabel());
2196 
2197   if (!length.IsConstant()) {
2198     // Merge the following two comparisons into one:
2199     //   If the length is negative, bail out (delegate to libcore's native implementation).
2200     //   If the length > 32 then (currently) prefer libcore's native implementation.
2201     __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
2202     __ B(slow_path->GetEntryLabel(), hi);
2203   } else {
2204     // We have already checked in the LocationsBuilder for the constant case.
2205     DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2206     DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
2207   }
2208 
2209   Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
2210   Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
2211   Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
2212 
2213   CheckSystemArrayCopyPosition(masm,
2214                                src_pos,
2215                                src,
2216                                length,
2217                                slow_path,
2218                                src_curr_addr,
2219                                false);
2220 
2221   CheckSystemArrayCopyPosition(masm,
2222                                dst_pos,
2223                                dst,
2224                                length,
2225                                slow_path,
2226                                src_curr_addr,
2227                                false);
2228 
2229   src_curr_addr = src_curr_addr.X();
2230   dst_curr_addr = dst_curr_addr.X();
2231   src_stop_addr = src_stop_addr.X();
2232 
2233   GenSystemArrayCopyAddresses(masm,
2234                               DataType::Type::kUint16,
2235                               src,
2236                               src_pos,
2237                               dst,
2238                               dst_pos,
2239                               length,
2240                               src_curr_addr,
2241                               dst_curr_addr,
2242                               src_stop_addr);
2243 
2244   // Iterate over the arrays and do a raw copy of the chars.
2245   const int32_t char_size = DataType::Size(DataType::Type::kUint16);
2246   UseScratchRegisterScope temps(masm);
2247   Register tmp = temps.AcquireW();
2248   vixl::aarch64::Label loop, done;
2249   __ Bind(&loop);
2250   __ Cmp(src_curr_addr, src_stop_addr);
2251   __ B(&done, eq);
2252   __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
2253   __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
2254   __ B(&loop);
2255   __ Bind(&done);
2256 
2257   __ Bind(slow_path->GetExitLabel());
2258 }
2259 
2260 // We can choose to use the native implementation there for longer copy lengths.
2261 static constexpr int32_t kSystemArrayCopyThreshold = 128;
2262 
2263 // CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
2264 // We want to use two temporary registers in order to reduce the register pressure in arm64.
2265 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
VisitSystemArrayCopy(HInvoke * invoke)2266 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2267   // The only read barrier implementation supporting the
2268   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2269   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2270     return;
2271   }
2272 
2273   // Check to see if we have known failures that will cause us to have to bail out
2274   // to the runtime, and just generate the runtime call directly.
2275   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2276   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2277 
2278   // The positions must be non-negative.
2279   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2280       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
2281     // We will have to fail anyways.
2282     return;
2283   }
2284 
2285   // The length must be >= 0.
2286   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2287   if (length != nullptr) {
2288     int32_t len = length->GetValue();
2289     if (len < 0 || len >= kSystemArrayCopyThreshold) {
2290       // Just call as normal.
2291       return;
2292     }
2293   }
2294 
2295   SystemArrayCopyOptimizations optimizations(invoke);
2296 
2297   if (optimizations.GetDestinationIsSource()) {
2298     if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
2299       // We only support backward copying if source and destination are the same.
2300       return;
2301     }
2302   }
2303 
2304   if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
2305     // We currently don't intrinsify primitive copying.
2306     return;
2307   }
2308 
2309   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2310   LocationSummary* locations =
2311       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2312   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
2313   locations->SetInAt(0, Location::RequiresRegister());
2314   SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2315   locations->SetInAt(2, Location::RequiresRegister());
2316   SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2317   SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2318 
2319   locations->AddTemp(Location::RequiresRegister());
2320   locations->AddTemp(Location::RequiresRegister());
2321   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2322     // Temporary register IP0, obtained from the VIXL scratch register
2323     // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
2324     // (because that register is clobbered by ReadBarrierMarkRegX
2325     // entry points). It cannot be used in calls to
2326     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
2327     // either. For these reasons, get a third extra temporary register
2328     // from the register allocator.
2329     locations->AddTemp(Location::RequiresRegister());
2330   } else {
2331     // Cases other than Baker read barriers: the third temporary will
2332     // be acquired from the VIXL scratch register pool.
2333   }
2334 }
2335 
VisitSystemArrayCopy(HInvoke * invoke)2336 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2337   // The only read barrier implementation supporting the
2338   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2339   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2340 
2341   MacroAssembler* masm = GetVIXLAssembler();
2342   LocationSummary* locations = invoke->GetLocations();
2343 
2344   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2345   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2346   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2347   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2348   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2349 
2350   Register src = XRegisterFrom(locations->InAt(0));
2351   Location src_pos = locations->InAt(1);
2352   Register dest = XRegisterFrom(locations->InAt(2));
2353   Location dest_pos = locations->InAt(3);
2354   Location length = locations->InAt(4);
2355   Register temp1 = WRegisterFrom(locations->GetTemp(0));
2356   Location temp1_loc = LocationFrom(temp1);
2357   Register temp2 = WRegisterFrom(locations->GetTemp(1));
2358   Location temp2_loc = LocationFrom(temp2);
2359 
2360   SlowPathCodeARM64* intrinsic_slow_path =
2361       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2362   codegen_->AddSlowPath(intrinsic_slow_path);
2363 
2364   vixl::aarch64::Label conditions_on_positions_validated;
2365   SystemArrayCopyOptimizations optimizations(invoke);
2366 
2367   // If source and destination are the same, we go to slow path if we need to do
2368   // forward copying.
2369   if (src_pos.IsConstant()) {
2370     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2371     if (dest_pos.IsConstant()) {
2372       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2373       if (optimizations.GetDestinationIsSource()) {
2374         // Checked when building locations.
2375         DCHECK_GE(src_pos_constant, dest_pos_constant);
2376       } else if (src_pos_constant < dest_pos_constant) {
2377         __ Cmp(src, dest);
2378         __ B(intrinsic_slow_path->GetEntryLabel(), eq);
2379       }
2380       // Checked when building locations.
2381       DCHECK(!optimizations.GetDestinationIsSource()
2382              || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
2383     } else {
2384       if (!optimizations.GetDestinationIsSource()) {
2385         __ Cmp(src, dest);
2386         __ B(&conditions_on_positions_validated, ne);
2387       }
2388       __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
2389       __ B(intrinsic_slow_path->GetEntryLabel(), gt);
2390     }
2391   } else {
2392     if (!optimizations.GetDestinationIsSource()) {
2393       __ Cmp(src, dest);
2394       __ B(&conditions_on_positions_validated, ne);
2395     }
2396     __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
2397            OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
2398     __ B(intrinsic_slow_path->GetEntryLabel(), lt);
2399   }
2400 
2401   __ Bind(&conditions_on_positions_validated);
2402 
2403   if (!optimizations.GetSourceIsNotNull()) {
2404     // Bail out if the source is null.
2405     __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
2406   }
2407 
2408   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2409     // Bail out if the destination is null.
2410     __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
2411   }
2412 
2413   // We have already checked in the LocationsBuilder for the constant case.
2414   if (!length.IsConstant() &&
2415       !optimizations.GetCountIsSourceLength() &&
2416       !optimizations.GetCountIsDestinationLength()) {
2417     // Merge the following two comparisons into one:
2418     //   If the length is negative, bail out (delegate to libcore's native implementation).
2419     //   If the length >= 128 then (currently) prefer native implementation.
2420     __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
2421     __ B(intrinsic_slow_path->GetEntryLabel(), hs);
2422   }
2423   // Validity checks: source.
2424   CheckSystemArrayCopyPosition(masm,
2425                                src_pos,
2426                                src,
2427                                length,
2428                                intrinsic_slow_path,
2429                                temp1,
2430                                optimizations.GetCountIsSourceLength());
2431 
2432   // Validity checks: dest.
2433   CheckSystemArrayCopyPosition(masm,
2434                                dest_pos,
2435                                dest,
2436                                length,
2437                                intrinsic_slow_path,
2438                                temp1,
2439                                optimizations.GetCountIsDestinationLength());
2440   {
2441     // We use a block to end the scratch scope before the write barrier, thus
2442     // freeing the temporary registers so they can be used in `MarkGCCard`.
2443     UseScratchRegisterScope temps(masm);
2444     Location temp3_loc;  // Used only for Baker read barrier.
2445     Register temp3;
2446     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2447       temp3_loc = locations->GetTemp(2);
2448       temp3 = WRegisterFrom(temp3_loc);
2449     } else {
2450       temp3 = temps.AcquireW();
2451     }
2452 
2453     if (!optimizations.GetDoesNotNeedTypeCheck()) {
2454       // Check whether all elements of the source array are assignable to the component
2455       // type of the destination array. We do two checks: the classes are the same,
2456       // or the destination is Object[]. If none of these checks succeed, we go to the
2457       // slow path.
2458 
2459       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2460         if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2461           // /* HeapReference<Class> */ temp1 = src->klass_
2462           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2463                                                           temp1_loc,
2464                                                           src.W(),
2465                                                           class_offset,
2466                                                           temp3_loc,
2467                                                           /* needs_null_check= */ false,
2468                                                           /* use_load_acquire= */ false);
2469           // Bail out if the source is not a non primitive array.
2470           // /* HeapReference<Class> */ temp1 = temp1->component_type_
2471           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2472                                                           temp1_loc,
2473                                                           temp1,
2474                                                           component_offset,
2475                                                           temp3_loc,
2476                                                           /* needs_null_check= */ false,
2477                                                           /* use_load_acquire= */ false);
2478           __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
2479           // If heap poisoning is enabled, `temp1` has been unpoisoned
2480           // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2481           // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2482           __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
2483           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2484           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2485         }
2486 
2487         // /* HeapReference<Class> */ temp1 = dest->klass_
2488         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2489                                                         temp1_loc,
2490                                                         dest.W(),
2491                                                         class_offset,
2492                                                         temp3_loc,
2493                                                         /* needs_null_check= */ false,
2494                                                         /* use_load_acquire= */ false);
2495 
2496         if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2497           // Bail out if the destination is not a non primitive array.
2498           //
2499           // Register `temp1` is not trashed by the read barrier emitted
2500           // by GenerateFieldLoadWithBakerReadBarrier below, as that
2501           // method produces a call to a ReadBarrierMarkRegX entry point,
2502           // which saves all potentially live registers, including
2503           // temporaries such a `temp1`.
2504           // /* HeapReference<Class> */ temp2 = temp1->component_type_
2505           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2506                                                           temp2_loc,
2507                                                           temp1,
2508                                                           component_offset,
2509                                                           temp3_loc,
2510                                                           /* needs_null_check= */ false,
2511                                                           /* use_load_acquire= */ false);
2512           __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2513           // If heap poisoning is enabled, `temp2` has been unpoisoned
2514           // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2515           // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2516           __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2517           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2518           __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2519         }
2520 
2521         // For the same reason given earlier, `temp1` is not trashed by the
2522         // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2523         // /* HeapReference<Class> */ temp2 = src->klass_
2524         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2525                                                         temp2_loc,
2526                                                         src.W(),
2527                                                         class_offset,
2528                                                         temp3_loc,
2529                                                         /* needs_null_check= */ false,
2530                                                         /* use_load_acquire= */ false);
2531         // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2532         __ Cmp(temp1, temp2);
2533 
2534         if (optimizations.GetDestinationIsTypedObjectArray()) {
2535           vixl::aarch64::Label do_copy;
2536           __ B(&do_copy, eq);
2537           // /* HeapReference<Class> */ temp1 = temp1->component_type_
2538           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2539                                                           temp1_loc,
2540                                                           temp1,
2541                                                           component_offset,
2542                                                           temp3_loc,
2543                                                           /* needs_null_check= */ false,
2544                                                           /* use_load_acquire= */ false);
2545           // /* HeapReference<Class> */ temp1 = temp1->super_class_
2546           // We do not need to emit a read barrier for the following
2547           // heap reference load, as `temp1` is only used in a
2548           // comparison with null below, and this reference is not
2549           // kept afterwards.
2550           __ Ldr(temp1, HeapOperand(temp1, super_offset));
2551           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2552           __ Bind(&do_copy);
2553         } else {
2554           __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2555         }
2556       } else {
2557         // Non read barrier code.
2558 
2559         // /* HeapReference<Class> */ temp1 = dest->klass_
2560         __ Ldr(temp1, MemOperand(dest, class_offset));
2561         // /* HeapReference<Class> */ temp2 = src->klass_
2562         __ Ldr(temp2, MemOperand(src, class_offset));
2563         bool did_unpoison = false;
2564         if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2565             !optimizations.GetSourceIsNonPrimitiveArray()) {
2566           // One or two of the references need to be unpoisoned. Unpoison them
2567           // both to make the identity check valid.
2568           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2569           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2570           did_unpoison = true;
2571         }
2572 
2573         if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2574           // Bail out if the destination is not a non primitive array.
2575           // /* HeapReference<Class> */ temp3 = temp1->component_type_
2576           __ Ldr(temp3, HeapOperand(temp1, component_offset));
2577           __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2578           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2579           // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2580           __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2581           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2582           __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2583         }
2584 
2585         if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2586           // Bail out if the source is not a non primitive array.
2587           // /* HeapReference<Class> */ temp3 = temp2->component_type_
2588           __ Ldr(temp3, HeapOperand(temp2, component_offset));
2589           __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2590           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2591           // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2592           __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2593           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2594           __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2595         }
2596 
2597         __ Cmp(temp1, temp2);
2598 
2599         if (optimizations.GetDestinationIsTypedObjectArray()) {
2600           vixl::aarch64::Label do_copy;
2601           __ B(&do_copy, eq);
2602           if (!did_unpoison) {
2603             codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2604           }
2605           // /* HeapReference<Class> */ temp1 = temp1->component_type_
2606           __ Ldr(temp1, HeapOperand(temp1, component_offset));
2607           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2608           // /* HeapReference<Class> */ temp1 = temp1->super_class_
2609           __ Ldr(temp1, HeapOperand(temp1, super_offset));
2610           // No need to unpoison the result, we're comparing against null.
2611           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2612           __ Bind(&do_copy);
2613         } else {
2614           __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2615         }
2616       }
2617     } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2618       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2619       // Bail out if the source is not a non primitive array.
2620       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2621         // /* HeapReference<Class> */ temp1 = src->klass_
2622         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2623                                                         temp1_loc,
2624                                                         src.W(),
2625                                                         class_offset,
2626                                                         temp3_loc,
2627                                                         /* needs_null_check= */ false,
2628                                                         /* use_load_acquire= */ false);
2629         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2630         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2631                                                         temp2_loc,
2632                                                         temp1,
2633                                                         component_offset,
2634                                                         temp3_loc,
2635                                                         /* needs_null_check= */ false,
2636                                                         /* use_load_acquire= */ false);
2637         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2638         // If heap poisoning is enabled, `temp2` has been unpoisoned
2639         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2640       } else {
2641         // /* HeapReference<Class> */ temp1 = src->klass_
2642         __ Ldr(temp1, HeapOperand(src.W(), class_offset));
2643         codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2644         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2645         __ Ldr(temp2, HeapOperand(temp1, component_offset));
2646         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2647         codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2648       }
2649       // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2650       __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2651       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2652       __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2653     }
2654 
2655     if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
2656       // Null constant length: not need to emit the loop code at all.
2657     } else {
2658       Register src_curr_addr = temp1.X();
2659       Register dst_curr_addr = temp2.X();
2660       Register src_stop_addr = temp3.X();
2661       vixl::aarch64::Label done;
2662       const DataType::Type type = DataType::Type::kReference;
2663       const int32_t element_size = DataType::Size(type);
2664 
2665       if (length.IsRegister()) {
2666         // Don't enter the copy loop if the length is null.
2667         __ Cbz(WRegisterFrom(length), &done);
2668       }
2669 
2670       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2671         // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2672 
2673         // SystemArrayCopy implementation for Baker read barriers (see
2674         // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier):
2675         //
2676         //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2677         //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
2678         //   bool is_gray = (rb_state == ReadBarrier::GrayState());
2679         //   if (is_gray) {
2680         //     // Slow-path copy.
2681         //     do {
2682         //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2683         //     } while (src_ptr != end_ptr)
2684         //   } else {
2685         //     // Fast-path copy.
2686         //     do {
2687         //       *dest_ptr++ = *src_ptr++;
2688         //     } while (src_ptr != end_ptr)
2689         //   }
2690 
2691         // Make sure `tmp` is not IP0, as it is clobbered by
2692         // ReadBarrierMarkRegX entry points in
2693         // ReadBarrierSystemArrayCopySlowPathARM64.
2694         DCHECK(temps.IsAvailable(ip0));
2695         temps.Exclude(ip0);
2696         Register tmp = temps.AcquireW();
2697         DCHECK_NE(LocationFrom(tmp).reg(), IP0);
2698         // Put IP0 back in the pool so that VIXL has at least one
2699         // scratch register available to emit macro-instructions (note
2700         // that IP1 is already used for `tmp`). Indeed some
2701         // macro-instructions used in GenSystemArrayCopyAddresses
2702         // (invoked hereunder) may require a scratch register (for
2703         // instance to emit a load with a large constant offset).
2704         temps.Include(ip0);
2705 
2706         // /* int32_t */ monitor = src->monitor_
2707         __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
2708         // /* LockWord */ lock_word = LockWord(monitor)
2709         static_assert(sizeof(LockWord) == sizeof(int32_t),
2710                       "art::LockWord and int32_t have different sizes.");
2711 
2712         // Introduce a dependency on the lock_word including rb_state,
2713         // to prevent load-load reordering, and without using
2714         // a memory barrier (which would be more expensive).
2715         // `src` is unchanged by this operation, but its value now depends
2716         // on `tmp`.
2717         __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
2718 
2719         // Compute base source address, base destination address, and end
2720         // source address for System.arraycopy* intrinsics in `src_base`,
2721         // `dst_base` and `src_end` respectively.
2722         // Note that `src_curr_addr` is computed from from `src` (and
2723         // `src_pos`) here, and thus honors the artificial dependency
2724         // of `src` on `tmp`.
2725         GenSystemArrayCopyAddresses(masm,
2726                                     type,
2727                                     src,
2728                                     src_pos,
2729                                     dest,
2730                                     dest_pos,
2731                                     length,
2732                                     src_curr_addr,
2733                                     dst_curr_addr,
2734                                     src_stop_addr);
2735 
2736         // Slow path used to copy array when `src` is gray.
2737         SlowPathCodeARM64* read_barrier_slow_path =
2738             new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(
2739                 invoke, LocationFrom(tmp));
2740         codegen_->AddSlowPath(read_barrier_slow_path);
2741 
2742         // Given the numeric representation, it's enough to check the low bit of the rb_state.
2743         static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
2744         static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2745         __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
2746 
2747         // Fast-path copy.
2748         // Iterate over the arrays and do a raw copy of the objects. We don't need to
2749         // poison/unpoison.
2750         vixl::aarch64::Label loop;
2751         __ Bind(&loop);
2752         __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
2753         __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
2754         __ Cmp(src_curr_addr, src_stop_addr);
2755         __ B(&loop, ne);
2756 
2757         __ Bind(read_barrier_slow_path->GetExitLabel());
2758       } else {
2759         // Non read barrier code.
2760         // Compute base source address, base destination address, and end
2761         // source address for System.arraycopy* intrinsics in `src_base`,
2762         // `dst_base` and `src_end` respectively.
2763         GenSystemArrayCopyAddresses(masm,
2764                                     type,
2765                                     src,
2766                                     src_pos,
2767                                     dest,
2768                                     dest_pos,
2769                                     length,
2770                                     src_curr_addr,
2771                                     dst_curr_addr,
2772                                     src_stop_addr);
2773         // Iterate over the arrays and do a raw copy of the objects. We don't need to
2774         // poison/unpoison.
2775         vixl::aarch64::Label loop;
2776         __ Bind(&loop);
2777         {
2778           Register tmp = temps.AcquireW();
2779           __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
2780           __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
2781         }
2782         __ Cmp(src_curr_addr, src_stop_addr);
2783         __ B(&loop, ne);
2784       }
2785       __ Bind(&done);
2786     }
2787   }
2788 
2789   // We only need one card marking on the destination array.
2790   codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false);
2791 
2792   __ Bind(intrinsic_slow_path->GetExitLabel());
2793 }
2794 
GenIsInfinite(LocationSummary * locations,bool is64bit,MacroAssembler * masm)2795 static void GenIsInfinite(LocationSummary* locations,
2796                           bool is64bit,
2797                           MacroAssembler* masm) {
2798   Operand infinity;
2799   Register out;
2800 
2801   if (is64bit) {
2802     infinity = kPositiveInfinityDouble;
2803     out = XRegisterFrom(locations->Out());
2804   } else {
2805     infinity = kPositiveInfinityFloat;
2806     out = WRegisterFrom(locations->Out());
2807   }
2808 
2809   const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out);
2810 
2811   MoveFPToInt(locations, is64bit, masm);
2812   __ Eor(out, out, infinity);
2813   // We don't care about the sign bit, so shift left.
2814   __ Cmp(zero, Operand(out, LSL, 1));
2815   __ Cset(out, eq);
2816 }
2817 
VisitFloatIsInfinite(HInvoke * invoke)2818 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
2819   CreateFPToIntLocations(allocator_, invoke);
2820 }
2821 
VisitFloatIsInfinite(HInvoke * invoke)2822 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
2823   GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
2824 }
2825 
VisitDoubleIsInfinite(HInvoke * invoke)2826 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
2827   CreateFPToIntLocations(allocator_, invoke);
2828 }
2829 
VisitDoubleIsInfinite(HInvoke * invoke)2830 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
2831   GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
2832 }
2833 
VisitIntegerValueOf(HInvoke * invoke)2834 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
2835   InvokeRuntimeCallingConvention calling_convention;
2836   IntrinsicVisitor::ComputeIntegerValueOfLocations(
2837       invoke,
2838       codegen_,
2839       calling_convention.GetReturnLocation(DataType::Type::kReference),
2840       Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
2841 }
2842 
VisitIntegerValueOf(HInvoke * invoke)2843 void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
2844   IntrinsicVisitor::IntegerValueOfInfo info =
2845       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2846   LocationSummary* locations = invoke->GetLocations();
2847   MacroAssembler* masm = GetVIXLAssembler();
2848 
2849   Register out = RegisterFrom(locations->Out(), DataType::Type::kReference);
2850   UseScratchRegisterScope temps(masm);
2851   Register temp = temps.AcquireW();
2852   if (invoke->InputAt(0)->IsConstant()) {
2853     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2854     if (static_cast<uint32_t>(value - info.low) < info.length) {
2855       // Just embed the j.l.Integer in the code.
2856       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2857       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2858     } else {
2859       DCHECK(locations->CanCall());
2860       // Allocate and initialize a new j.l.Integer.
2861       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2862       // JIT object table.
2863       codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
2864                                              info.integer_boot_image_offset);
2865       __ Mov(temp.W(), value);
2866       __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
2867       // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2868       // one.
2869       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2870     }
2871   } else {
2872     DCHECK(locations->CanCall());
2873     Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32);
2874     // Check bounds of our cache.
2875     __ Add(out.W(), in.W(), -info.low);
2876     __ Cmp(out.W(), info.length);
2877     vixl::aarch64::Label allocate, done;
2878     __ B(&allocate, hs);
2879     // If the value is within the bounds, load the j.l.Integer directly from the array.
2880     codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
2881     MemOperand source = HeapOperand(
2882         temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
2883     codegen_->Load(DataType::Type::kReference, out, source);
2884     codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
2885     __ B(&done);
2886     __ Bind(&allocate);
2887     // Otherwise allocate and initialize a new j.l.Integer.
2888     codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
2889                                            info.integer_boot_image_offset);
2890     __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
2891     // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2892     // one.
2893     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2894     __ Bind(&done);
2895   }
2896 }
2897 
VisitThreadInterrupted(HInvoke * invoke)2898 void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
2899   LocationSummary* locations =
2900       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2901   locations->SetOut(Location::RequiresRegister());
2902 }
2903 
VisitThreadInterrupted(HInvoke * invoke)2904 void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
2905   MacroAssembler* masm = GetVIXLAssembler();
2906   Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32);
2907   UseScratchRegisterScope temps(masm);
2908   Register temp = temps.AcquireX();
2909 
2910   __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
2911   __ Ldar(out.W(), MemOperand(temp));
2912 
2913   vixl::aarch64::Label done;
2914   __ Cbz(out.W(), &done);
2915   __ Stlr(wzr, MemOperand(temp));
2916   __ Bind(&done);
2917 }
2918 
VisitReachabilityFence(HInvoke * invoke)2919 void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
2920   LocationSummary* locations =
2921       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2922   locations->SetInAt(0, Location::Any());
2923 }
2924 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)2925 void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
2926 
VisitCRC32Update(HInvoke * invoke)2927 void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
2928   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
2929     return;
2930   }
2931 
2932   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
2933                                                                 LocationSummary::kNoCall,
2934                                                                 kIntrinsified);
2935 
2936   locations->SetInAt(0, Location::RequiresRegister());
2937   locations->SetInAt(1, Location::RequiresRegister());
2938   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2939 }
2940 
2941 // Lower the invoke of CRC32.update(int crc, int b).
VisitCRC32Update(HInvoke * invoke)2942 void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) {
2943   DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
2944 
2945   MacroAssembler* masm = GetVIXLAssembler();
2946 
2947   Register crc = InputRegisterAt(invoke, 0);
2948   Register val = InputRegisterAt(invoke, 1);
2949   Register out = OutputRegister(invoke);
2950 
2951   // The general algorithm of the CRC32 calculation is:
2952   //   crc = ~crc
2953   //   result = crc32_for_byte(crc, b)
2954   //   crc = ~result
2955   // It is directly lowered to three instructions.
2956 
2957   UseScratchRegisterScope temps(masm);
2958   Register tmp = temps.AcquireSameSizeAs(out);
2959 
2960   __ Mvn(tmp, crc);
2961   __ Crc32b(tmp, tmp, val);
2962   __ Mvn(out, tmp);
2963 }
2964 
2965 // Generate code using CRC32 instructions which calculates
2966 // a CRC32 value of a byte.
2967 //
2968 // Parameters:
2969 //   masm   - VIXL macro assembler
2970 //   crc    - a register holding an initial CRC value
2971 //   ptr    - a register holding a memory address of bytes
2972 //   length - a register holding a number of bytes to process
2973 //   out    - a register to put a result of calculation
GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler * masm,const Register & crc,const Register & ptr,const Register & length,const Register & out)2974 static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm,
2975                                                         const Register& crc,
2976                                                         const Register& ptr,
2977                                                         const Register& length,
2978                                                         const Register& out) {
2979   // The algorithm of CRC32 of bytes is:
2980   //   crc = ~crc
2981   //   process a few first bytes to make the array 8-byte aligned
2982   //   while array has 8 bytes do:
2983   //     crc = crc32_of_8bytes(crc, 8_bytes(array))
2984   //   if array has 4 bytes:
2985   //     crc = crc32_of_4bytes(crc, 4_bytes(array))
2986   //   if array has 2 bytes:
2987   //     crc = crc32_of_2bytes(crc, 2_bytes(array))
2988   //   if array has a byte:
2989   //     crc = crc32_of_byte(crc, 1_byte(array))
2990   //   crc = ~crc
2991 
2992   vixl::aarch64::Label loop, done;
2993   vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte;
2994   vixl::aarch64::Label aligned2, aligned4, aligned8;
2995 
2996   // Use VIXL scratch registers as the VIXL macro assembler won't use them in
2997   // instructions below.
2998   UseScratchRegisterScope temps(masm);
2999   Register len = temps.AcquireW();
3000   Register array_elem = temps.AcquireW();
3001 
3002   __ Mvn(out, crc);
3003   __ Mov(len, length);
3004 
3005   __ Tbz(ptr, 0, &aligned2);
3006   __ Subs(len, len, 1);
3007   __ B(&done, lo);
3008   __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex));
3009   __ Crc32b(out, out, array_elem);
3010 
3011   __ Bind(&aligned2);
3012   __ Tbz(ptr, 1, &aligned4);
3013   __ Subs(len, len, 2);
3014   __ B(&process_1byte, lo);
3015   __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3016   __ Crc32h(out, out, array_elem);
3017 
3018   __ Bind(&aligned4);
3019   __ Tbz(ptr, 2, &aligned8);
3020   __ Subs(len, len, 4);
3021   __ B(&process_2bytes, lo);
3022   __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3023   __ Crc32w(out, out, array_elem);
3024 
3025   __ Bind(&aligned8);
3026   __ Subs(len, len, 8);
3027   // If len < 8 go to process data by 4 bytes, 2 bytes and a byte.
3028   __ B(&process_4bytes, lo);
3029 
3030   // The main loop processing data by 8 bytes.
3031   __ Bind(&loop);
3032   __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex));
3033   __ Subs(len, len, 8);
3034   __ Crc32x(out, out, array_elem.X());
3035   // if len >= 8, process the next 8 bytes.
3036   __ B(&loop, hs);
3037 
3038   // Process the data which is less than 8 bytes.
3039   // The code generated below works with values of len
3040   // which come in the range [-8, 0].
3041   // The first three bits are used to detect whether 4 bytes or 2 bytes or
3042   // a byte can be processed.
3043   // The checking order is from bit 2 to bit 0:
3044   //  bit 2 is set: at least 4 bytes available
3045   //  bit 1 is set: at least 2 bytes available
3046   //  bit 0 is set: at least a byte available
3047   __ Bind(&process_4bytes);
3048   // Goto process_2bytes if less than four bytes available
3049   __ Tbz(len, 2, &process_2bytes);
3050   __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3051   __ Crc32w(out, out, array_elem);
3052 
3053   __ Bind(&process_2bytes);
3054   // Goto process_1bytes if less than two bytes available
3055   __ Tbz(len, 1, &process_1byte);
3056   __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3057   __ Crc32h(out, out, array_elem);
3058 
3059   __ Bind(&process_1byte);
3060   // Goto done if no bytes available
3061   __ Tbz(len, 0, &done);
3062   __ Ldrb(array_elem, MemOperand(ptr));
3063   __ Crc32b(out, out, array_elem);
3064 
3065   __ Bind(&done);
3066   __ Mvn(out, out);
3067 }
3068 
3069 // The threshold for sizes of arrays to use the library provided implementation
3070 // of CRC32.updateBytes instead of the intrinsic.
3071 static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
3072 
VisitCRC32UpdateBytes(HInvoke * invoke)3073 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3074   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3075     return;
3076   }
3077 
3078   LocationSummary* locations =
3079       new (allocator_) LocationSummary(invoke,
3080                                        LocationSummary::kCallOnSlowPath,
3081                                        kIntrinsified);
3082 
3083   locations->SetInAt(0, Location::RequiresRegister());
3084   locations->SetInAt(1, Location::RequiresRegister());
3085   locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
3086   locations->SetInAt(3, Location::RequiresRegister());
3087   locations->AddTemp(Location::RequiresRegister());
3088   locations->SetOut(Location::RequiresRegister());
3089 }
3090 
3091 // Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
3092 //
3093 // Note: The intrinsic is not used if len exceeds a threshold.
VisitCRC32UpdateBytes(HInvoke * invoke)3094 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3095   DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3096 
3097   MacroAssembler* masm = GetVIXLAssembler();
3098   LocationSummary* locations = invoke->GetLocations();
3099 
3100   SlowPathCodeARM64* slow_path =
3101     new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
3102   codegen_->AddSlowPath(slow_path);
3103 
3104   Register length = WRegisterFrom(locations->InAt(3));
3105   __ Cmp(length, kCRC32UpdateBytesThreshold);
3106   __ B(slow_path->GetEntryLabel(), hi);
3107 
3108   const uint32_t array_data_offset =
3109       mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
3110   Register ptr = XRegisterFrom(locations->GetTemp(0));
3111   Register array = XRegisterFrom(locations->InAt(1));
3112   Location offset = locations->InAt(2);
3113   if (offset.IsConstant()) {
3114     int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
3115     __ Add(ptr, array, array_data_offset + offset_value);
3116   } else {
3117     __ Add(ptr, array, array_data_offset);
3118     __ Add(ptr, ptr, XRegisterFrom(offset));
3119   }
3120 
3121   Register crc = WRegisterFrom(locations->InAt(0));
3122   Register out = WRegisterFrom(locations->Out());
3123 
3124   GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3125 
3126   __ Bind(slow_path->GetExitLabel());
3127 }
3128 
VisitCRC32UpdateByteBuffer(HInvoke * invoke)3129 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3130   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3131     return;
3132   }
3133 
3134   LocationSummary* locations =
3135       new (allocator_) LocationSummary(invoke,
3136                                        LocationSummary::kNoCall,
3137                                        kIntrinsified);
3138 
3139   locations->SetInAt(0, Location::RequiresRegister());
3140   locations->SetInAt(1, Location::RequiresRegister());
3141   locations->SetInAt(2, Location::RequiresRegister());
3142   locations->SetInAt(3, Location::RequiresRegister());
3143   locations->AddTemp(Location::RequiresRegister());
3144   locations->SetOut(Location::RequiresRegister());
3145 }
3146 
3147 // Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len)
3148 //
3149 // There is no need to generate code checking if addr is 0.
3150 // The method updateByteBuffer is a private method of java.util.zip.CRC32.
3151 // This guarantees no calls outside of the CRC32 class.
3152 // An address of DirectBuffer is always passed to the call of updateByteBuffer.
3153 // It might be an implementation of an empty DirectBuffer which can use a zero
3154 // address but it must have the length to be zero. The current generated code
3155 // correctly works with the zero length.
VisitCRC32UpdateByteBuffer(HInvoke * invoke)3156 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3157   DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3158 
3159   MacroAssembler* masm = GetVIXLAssembler();
3160   LocationSummary* locations = invoke->GetLocations();
3161 
3162   Register addr = XRegisterFrom(locations->InAt(1));
3163   Register ptr = XRegisterFrom(locations->GetTemp(0));
3164   __ Add(ptr, addr, XRegisterFrom(locations->InAt(2)));
3165 
3166   Register crc = WRegisterFrom(locations->InAt(0));
3167   Register length = WRegisterFrom(locations->InAt(3));
3168   Register out = WRegisterFrom(locations->Out());
3169   GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3170 }
3171 
3172 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
3173 
3174 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
3175 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
3176 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
3177 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
3178 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
3179 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend);
3180 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
3181 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
3182 
3183 // 1.8.
3184 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
3185 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
3186 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
3187 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
3188 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
3189 
3190 UNREACHABLE_INTRINSICS(ARM64)
3191 
3192 #undef __
3193 
3194 }  // namespace arm64
3195 }  // namespace art
3196