1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_arm64.h"
18
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "art_method.h"
21 #include "code_generator_arm64.h"
22 #include "common_arm64.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "heap_poisoning.h"
25 #include "intrinsics.h"
26 #include "lock_word.h"
27 #include "mirror/array-inl.h"
28 #include "mirror/object_array-inl.h"
29 #include "mirror/reference.h"
30 #include "mirror/string-inl.h"
31 #include "scoped_thread_state_change-inl.h"
32 #include "thread-current-inl.h"
33 #include "utils/arm64/assembler_arm64.h"
34
35 using namespace vixl::aarch64; // NOLINT(build/namespaces)
36
37 // TODO(VIXL): Make VIXL compile with -Wshadow.
38 #pragma GCC diagnostic push
39 #pragma GCC diagnostic ignored "-Wshadow"
40 #include "aarch64/disasm-aarch64.h"
41 #include "aarch64/macro-assembler-aarch64.h"
42 #pragma GCC diagnostic pop
43
44 namespace art {
45
46 namespace arm64 {
47
48 using helpers::DRegisterFrom;
49 using helpers::FPRegisterFrom;
50 using helpers::HeapOperand;
51 using helpers::LocationFrom;
52 using helpers::OperandFrom;
53 using helpers::RegisterFrom;
54 using helpers::SRegisterFrom;
55 using helpers::WRegisterFrom;
56 using helpers::XRegisterFrom;
57 using helpers::InputRegisterAt;
58 using helpers::OutputRegister;
59
60 namespace {
61
AbsoluteHeapOperandFrom(Location location,size_t offset=0)62 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
63 return MemOperand(XRegisterFrom(location), offset);
64 }
65
66 } // namespace
67
GetVIXLAssembler()68 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
69 return codegen_->GetVIXLAssembler();
70 }
71
GetAllocator()72 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
73 return codegen_->GetGraph()->GetAllocator();
74 }
75
76 #define __ codegen->GetVIXLAssembler()->
77
MoveFromReturnRegister(Location trg,DataType::Type type,CodeGeneratorARM64 * codegen)78 static void MoveFromReturnRegister(Location trg,
79 DataType::Type type,
80 CodeGeneratorARM64* codegen) {
81 if (!trg.IsValid()) {
82 DCHECK(type == DataType::Type::kVoid);
83 return;
84 }
85
86 DCHECK_NE(type, DataType::Type::kVoid);
87
88 if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
89 Register trg_reg = RegisterFrom(trg, type);
90 Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
91 __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
92 } else {
93 FPRegister trg_reg = FPRegisterFrom(trg, type);
94 FPRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
95 __ Fmov(trg_reg, res_reg);
96 }
97 }
98
MoveArguments(HInvoke * invoke,CodeGeneratorARM64 * codegen)99 static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) {
100 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
101 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
102 }
103
104 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
105 // call. This will copy the arguments into the positions for a regular call.
106 //
107 // Note: The actual parameters are required to be in the locations given by the invoke's location
108 // summary. If an intrinsic modifies those locations before a slowpath call, they must be
109 // restored!
110 class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
111 public:
IntrinsicSlowPathARM64(HInvoke * invoke)112 explicit IntrinsicSlowPathARM64(HInvoke* invoke)
113 : SlowPathCodeARM64(invoke), invoke_(invoke) { }
114
EmitNativeCode(CodeGenerator * codegen_in)115 void EmitNativeCode(CodeGenerator* codegen_in) override {
116 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
117 __ Bind(GetEntryLabel());
118
119 SaveLiveRegisters(codegen, invoke_->GetLocations());
120
121 MoveArguments(invoke_, codegen);
122
123 {
124 // Ensure that between the BLR (emitted by Generate*Call) and RecordPcInfo there
125 // are no pools emitted.
126 vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
127 if (invoke_->IsInvokeStaticOrDirect()) {
128 codegen->GenerateStaticOrDirectCall(
129 invoke_->AsInvokeStaticOrDirect(), LocationFrom(kArtMethodRegister), this);
130 } else {
131 codegen->GenerateVirtualCall(
132 invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister), this);
133 }
134 }
135
136 // Copy the result back to the expected output.
137 Location out = invoke_->GetLocations()->Out();
138 if (out.IsValid()) {
139 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
140 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
141 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
142 }
143
144 RestoreLiveRegisters(codegen, invoke_->GetLocations());
145 __ B(GetExitLabel());
146 }
147
GetDescription() const148 const char* GetDescription() const override { return "IntrinsicSlowPathARM64"; }
149
150 private:
151 // The instruction where this slow path is happening.
152 HInvoke* const invoke_;
153
154 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
155 };
156
157 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
158 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
159 public:
ReadBarrierSystemArrayCopySlowPathARM64(HInstruction * instruction,Location tmp)160 ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
161 : SlowPathCodeARM64(instruction), tmp_(tmp) {
162 DCHECK(kEmitCompilerReadBarrier);
163 DCHECK(kUseBakerReadBarrier);
164 }
165
EmitNativeCode(CodeGenerator * codegen_in)166 void EmitNativeCode(CodeGenerator* codegen_in) override {
167 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
168 LocationSummary* locations = instruction_->GetLocations();
169 DCHECK(locations->CanCall());
170 DCHECK(instruction_->IsInvokeStaticOrDirect())
171 << "Unexpected instruction in read barrier arraycopy slow path: "
172 << instruction_->DebugName();
173 DCHECK(instruction_->GetLocations()->Intrinsified());
174 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
175
176 const int32_t element_size = DataType::Size(DataType::Type::kReference);
177
178 Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
179 Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
180 Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
181 Register tmp_reg = WRegisterFrom(tmp_);
182
183 __ Bind(GetEntryLabel());
184 vixl::aarch64::Label slow_copy_loop;
185 __ Bind(&slow_copy_loop);
186 __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
187 codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
188 // TODO: Inline the mark bit check before calling the runtime?
189 // tmp_reg = ReadBarrier::Mark(tmp_reg);
190 // No need to save live registers; it's taken care of by the
191 // entrypoint. Also, there is no need to update the stack mask,
192 // as this runtime call will not trigger a garbage collection.
193 // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
194 // explanations.)
195 DCHECK_NE(tmp_.reg(), LR);
196 DCHECK_NE(tmp_.reg(), WSP);
197 DCHECK_NE(tmp_.reg(), WZR);
198 // IP0 is used internally by the ReadBarrierMarkRegX entry point
199 // as a temporary (and not preserved). It thus cannot be used by
200 // any live register in this slow path.
201 DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
202 DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
203 DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
204 DCHECK_NE(tmp_.reg(), IP0);
205 DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
206 // TODO: Load the entrypoint once before the loop, instead of
207 // loading it at every iteration.
208 int32_t entry_point_offset =
209 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
210 // This runtime call does not require a stack map.
211 codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
212 codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
213 __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
214 __ Cmp(src_curr_addr, src_stop_addr);
215 __ B(&slow_copy_loop, ne);
216 __ B(GetExitLabel());
217 }
218
GetDescription() const219 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
220
221 private:
222 Location tmp_;
223
224 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
225 };
226 #undef __
227
TryDispatch(HInvoke * invoke)228 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
229 Dispatch(invoke);
230 LocationSummary* res = invoke->GetLocations();
231 if (res == nullptr) {
232 return false;
233 }
234 return res->Intrinsified();
235 }
236
237 #define __ masm->
238
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)239 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
240 LocationSummary* locations =
241 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
242 locations->SetInAt(0, Location::RequiresFpuRegister());
243 locations->SetOut(Location::RequiresRegister());
244 }
245
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)246 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
247 LocationSummary* locations =
248 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
249 locations->SetInAt(0, Location::RequiresRegister());
250 locations->SetOut(Location::RequiresFpuRegister());
251 }
252
MoveFPToInt(LocationSummary * locations,bool is64bit,MacroAssembler * masm)253 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
254 Location input = locations->InAt(0);
255 Location output = locations->Out();
256 __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
257 is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
258 }
259
MoveIntToFP(LocationSummary * locations,bool is64bit,MacroAssembler * masm)260 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
261 Location input = locations->InAt(0);
262 Location output = locations->Out();
263 __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
264 is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
265 }
266
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)267 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
268 CreateFPToIntLocations(allocator_, invoke);
269 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)270 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
271 CreateIntToFPLocations(allocator_, invoke);
272 }
273
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)274 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
275 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
276 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)277 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
278 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
279 }
280
VisitFloatFloatToRawIntBits(HInvoke * invoke)281 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
282 CreateFPToIntLocations(allocator_, invoke);
283 }
VisitFloatIntBitsToFloat(HInvoke * invoke)284 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
285 CreateIntToFPLocations(allocator_, invoke);
286 }
287
VisitFloatFloatToRawIntBits(HInvoke * invoke)288 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
289 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
290 }
VisitFloatIntBitsToFloat(HInvoke * invoke)291 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
292 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
293 }
294
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)295 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
296 LocationSummary* locations =
297 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
298 locations->SetInAt(0, Location::RequiresRegister());
299 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
300 }
301
GenReverseBytes(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)302 static void GenReverseBytes(LocationSummary* locations,
303 DataType::Type type,
304 MacroAssembler* masm) {
305 Location in = locations->InAt(0);
306 Location out = locations->Out();
307
308 switch (type) {
309 case DataType::Type::kInt16:
310 __ Rev16(WRegisterFrom(out), WRegisterFrom(in));
311 __ Sxth(WRegisterFrom(out), WRegisterFrom(out));
312 break;
313 case DataType::Type::kInt32:
314 case DataType::Type::kInt64:
315 __ Rev(RegisterFrom(out, type), RegisterFrom(in, type));
316 break;
317 default:
318 LOG(FATAL) << "Unexpected size for reverse-bytes: " << type;
319 UNREACHABLE();
320 }
321 }
322
VisitIntegerReverseBytes(HInvoke * invoke)323 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
324 CreateIntToIntLocations(allocator_, invoke);
325 }
326
VisitIntegerReverseBytes(HInvoke * invoke)327 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
328 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
329 }
330
VisitLongReverseBytes(HInvoke * invoke)331 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
332 CreateIntToIntLocations(allocator_, invoke);
333 }
334
VisitLongReverseBytes(HInvoke * invoke)335 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
336 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
337 }
338
VisitShortReverseBytes(HInvoke * invoke)339 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
340 CreateIntToIntLocations(allocator_, invoke);
341 }
342
VisitShortReverseBytes(HInvoke * invoke)343 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
344 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
345 }
346
GenNumberOfLeadingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)347 static void GenNumberOfLeadingZeros(LocationSummary* locations,
348 DataType::Type type,
349 MacroAssembler* masm) {
350 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
351
352 Location in = locations->InAt(0);
353 Location out = locations->Out();
354
355 __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
356 }
357
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)358 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
359 CreateIntToIntLocations(allocator_, invoke);
360 }
361
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)362 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
363 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
364 }
365
VisitLongNumberOfLeadingZeros(HInvoke * invoke)366 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
367 CreateIntToIntLocations(allocator_, invoke);
368 }
369
VisitLongNumberOfLeadingZeros(HInvoke * invoke)370 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
371 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
372 }
373
GenNumberOfTrailingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)374 static void GenNumberOfTrailingZeros(LocationSummary* locations,
375 DataType::Type type,
376 MacroAssembler* masm) {
377 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
378
379 Location in = locations->InAt(0);
380 Location out = locations->Out();
381
382 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
383 __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
384 }
385
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)386 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
387 CreateIntToIntLocations(allocator_, invoke);
388 }
389
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)390 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
391 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
392 }
393
VisitLongNumberOfTrailingZeros(HInvoke * invoke)394 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
395 CreateIntToIntLocations(allocator_, invoke);
396 }
397
VisitLongNumberOfTrailingZeros(HInvoke * invoke)398 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
399 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
400 }
401
GenReverse(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)402 static void GenReverse(LocationSummary* locations,
403 DataType::Type type,
404 MacroAssembler* masm) {
405 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
406
407 Location in = locations->InAt(0);
408 Location out = locations->Out();
409
410 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
411 }
412
VisitIntegerReverse(HInvoke * invoke)413 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
414 CreateIntToIntLocations(allocator_, invoke);
415 }
416
VisitIntegerReverse(HInvoke * invoke)417 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
418 GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
419 }
420
VisitLongReverse(HInvoke * invoke)421 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
422 CreateIntToIntLocations(allocator_, invoke);
423 }
424
VisitLongReverse(HInvoke * invoke)425 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
426 GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
427 }
428
GenBitCount(HInvoke * instr,DataType::Type type,MacroAssembler * masm)429 static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) {
430 DCHECK(DataType::IsIntOrLongType(type)) << type;
431 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
432 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
433
434 UseScratchRegisterScope temps(masm);
435
436 Register src = InputRegisterAt(instr, 0);
437 Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
438 FPRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS();
439
440 __ Fmov(fpr, src);
441 __ Cnt(fpr.V8B(), fpr.V8B());
442 __ Addv(fpr.B(), fpr.V8B());
443 __ Fmov(dst, fpr);
444 }
445
VisitLongBitCount(HInvoke * invoke)446 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
447 CreateIntToIntLocations(allocator_, invoke);
448 }
449
VisitLongBitCount(HInvoke * invoke)450 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
451 GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler());
452 }
453
VisitIntegerBitCount(HInvoke * invoke)454 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
455 CreateIntToIntLocations(allocator_, invoke);
456 }
457
VisitIntegerBitCount(HInvoke * invoke)458 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
459 GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler());
460 }
461
GenHighestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)462 static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
463 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
464
465 UseScratchRegisterScope temps(masm);
466
467 Register src = InputRegisterAt(invoke, 0);
468 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
469 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
470 size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u;
471 size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u;
472
473 __ Clz(temp, src);
474 __ Mov(dst, UINT64_C(1) << high_bit); // MOV (bitmask immediate)
475 __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit)); // Clear dst if src was 0.
476 __ Lsr(dst, dst, temp);
477 }
478
VisitIntegerHighestOneBit(HInvoke * invoke)479 void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
480 CreateIntToIntLocations(allocator_, invoke);
481 }
482
VisitIntegerHighestOneBit(HInvoke * invoke)483 void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
484 GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
485 }
486
VisitLongHighestOneBit(HInvoke * invoke)487 void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) {
488 CreateIntToIntLocations(allocator_, invoke);
489 }
490
VisitLongHighestOneBit(HInvoke * invoke)491 void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) {
492 GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
493 }
494
GenLowestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)495 static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
496 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
497
498 UseScratchRegisterScope temps(masm);
499
500 Register src = InputRegisterAt(invoke, 0);
501 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
502 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
503
504 __ Neg(temp, src);
505 __ And(dst, temp, src);
506 }
507
VisitIntegerLowestOneBit(HInvoke * invoke)508 void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
509 CreateIntToIntLocations(allocator_, invoke);
510 }
511
VisitIntegerLowestOneBit(HInvoke * invoke)512 void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
513 GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
514 }
515
VisitLongLowestOneBit(HInvoke * invoke)516 void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) {
517 CreateIntToIntLocations(allocator_, invoke);
518 }
519
VisitLongLowestOneBit(HInvoke * invoke)520 void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
521 GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
522 }
523
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)524 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
525 LocationSummary* locations =
526 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
527 locations->SetInAt(0, Location::RequiresFpuRegister());
528 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
529 }
530
VisitMathSqrt(HInvoke * invoke)531 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
532 CreateFPToFPLocations(allocator_, invoke);
533 }
534
VisitMathSqrt(HInvoke * invoke)535 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
536 LocationSummary* locations = invoke->GetLocations();
537 MacroAssembler* masm = GetVIXLAssembler();
538 __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
539 }
540
VisitMathCeil(HInvoke * invoke)541 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
542 CreateFPToFPLocations(allocator_, invoke);
543 }
544
VisitMathCeil(HInvoke * invoke)545 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
546 LocationSummary* locations = invoke->GetLocations();
547 MacroAssembler* masm = GetVIXLAssembler();
548 __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
549 }
550
VisitMathFloor(HInvoke * invoke)551 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
552 CreateFPToFPLocations(allocator_, invoke);
553 }
554
VisitMathFloor(HInvoke * invoke)555 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
556 LocationSummary* locations = invoke->GetLocations();
557 MacroAssembler* masm = GetVIXLAssembler();
558 __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
559 }
560
VisitMathRint(HInvoke * invoke)561 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
562 CreateFPToFPLocations(allocator_, invoke);
563 }
564
VisitMathRint(HInvoke * invoke)565 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
566 LocationSummary* locations = invoke->GetLocations();
567 MacroAssembler* masm = GetVIXLAssembler();
568 __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
569 }
570
CreateFPToIntPlusFPTempLocations(ArenaAllocator * allocator,HInvoke * invoke)571 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) {
572 LocationSummary* locations =
573 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
574 locations->SetInAt(0, Location::RequiresFpuRegister());
575 locations->SetOut(Location::RequiresRegister());
576 locations->AddTemp(Location::RequiresFpuRegister());
577 }
578
GenMathRound(HInvoke * invoke,bool is_double,vixl::aarch64::MacroAssembler * masm)579 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
580 // Java 8 API definition for Math.round():
581 // Return the closest long or int to the argument, with ties rounding to positive infinity.
582 //
583 // There is no single instruction in ARMv8 that can support the above definition.
584 // We choose to use FCVTAS here, because it has closest semantic.
585 // FCVTAS performs rounding to nearest integer, ties away from zero.
586 // For most inputs (positive values, zero or NaN), this instruction is enough.
587 // We only need a few handling code after FCVTAS if the input is negative half value.
588 //
589 // The reason why we didn't choose FCVTPS instruction here is that
590 // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
591 // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
592 // If we were using this instruction, for most inputs, more handling code would be needed.
593 LocationSummary* l = invoke->GetLocations();
594 FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
595 FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
596 Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
597 vixl::aarch64::Label done;
598
599 // Round to nearest integer, ties away from zero.
600 __ Fcvtas(out_reg, in_reg);
601
602 // For positive values, zero or NaN inputs, rounding is done.
603 __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
604
605 // Handle input < 0 cases.
606 // If input is negative but not a tie, previous result (round to nearest) is valid.
607 // If input is a negative tie, out_reg += 1.
608 __ Frinta(tmp_fp, in_reg);
609 __ Fsub(tmp_fp, in_reg, tmp_fp);
610 __ Fcmp(tmp_fp, 0.5);
611 __ Cinc(out_reg, out_reg, eq);
612
613 __ Bind(&done);
614 }
615
VisitMathRoundDouble(HInvoke * invoke)616 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
617 CreateFPToIntPlusFPTempLocations(allocator_, invoke);
618 }
619
VisitMathRoundDouble(HInvoke * invoke)620 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
621 GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler());
622 }
623
VisitMathRoundFloat(HInvoke * invoke)624 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
625 CreateFPToIntPlusFPTempLocations(allocator_, invoke);
626 }
627
VisitMathRoundFloat(HInvoke * invoke)628 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
629 GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler());
630 }
631
VisitMemoryPeekByte(HInvoke * invoke)632 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
633 CreateIntToIntLocations(allocator_, invoke);
634 }
635
VisitMemoryPeekByte(HInvoke * invoke)636 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
637 MacroAssembler* masm = GetVIXLAssembler();
638 __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
639 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
640 }
641
VisitMemoryPeekIntNative(HInvoke * invoke)642 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
643 CreateIntToIntLocations(allocator_, invoke);
644 }
645
VisitMemoryPeekIntNative(HInvoke * invoke)646 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
647 MacroAssembler* masm = GetVIXLAssembler();
648 __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
649 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
650 }
651
VisitMemoryPeekLongNative(HInvoke * invoke)652 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
653 CreateIntToIntLocations(allocator_, invoke);
654 }
655
VisitMemoryPeekLongNative(HInvoke * invoke)656 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
657 MacroAssembler* masm = GetVIXLAssembler();
658 __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
659 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
660 }
661
VisitMemoryPeekShortNative(HInvoke * invoke)662 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
663 CreateIntToIntLocations(allocator_, invoke);
664 }
665
VisitMemoryPeekShortNative(HInvoke * invoke)666 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
667 MacroAssembler* masm = GetVIXLAssembler();
668 __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
669 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
670 }
671
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)672 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
673 LocationSummary* locations =
674 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
675 locations->SetInAt(0, Location::RequiresRegister());
676 locations->SetInAt(1, Location::RequiresRegister());
677 }
678
VisitMemoryPokeByte(HInvoke * invoke)679 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
680 CreateIntIntToVoidLocations(allocator_, invoke);
681 }
682
VisitMemoryPokeByte(HInvoke * invoke)683 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
684 MacroAssembler* masm = GetVIXLAssembler();
685 __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
686 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
687 }
688
VisitMemoryPokeIntNative(HInvoke * invoke)689 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
690 CreateIntIntToVoidLocations(allocator_, invoke);
691 }
692
VisitMemoryPokeIntNative(HInvoke * invoke)693 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
694 MacroAssembler* masm = GetVIXLAssembler();
695 __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
696 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
697 }
698
VisitMemoryPokeLongNative(HInvoke * invoke)699 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
700 CreateIntIntToVoidLocations(allocator_, invoke);
701 }
702
VisitMemoryPokeLongNative(HInvoke * invoke)703 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
704 MacroAssembler* masm = GetVIXLAssembler();
705 __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
706 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
707 }
708
VisitMemoryPokeShortNative(HInvoke * invoke)709 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
710 CreateIntIntToVoidLocations(allocator_, invoke);
711 }
712
VisitMemoryPokeShortNative(HInvoke * invoke)713 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
714 MacroAssembler* masm = GetVIXLAssembler();
715 __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
716 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
717 }
718
VisitThreadCurrentThread(HInvoke * invoke)719 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
720 LocationSummary* locations =
721 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
722 locations->SetOut(Location::RequiresRegister());
723 }
724
VisitThreadCurrentThread(HInvoke * invoke)725 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
726 codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()),
727 MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
728 }
729
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARM64 * codegen)730 static void GenUnsafeGet(HInvoke* invoke,
731 DataType::Type type,
732 bool is_volatile,
733 CodeGeneratorARM64* codegen) {
734 LocationSummary* locations = invoke->GetLocations();
735 DCHECK((type == DataType::Type::kInt32) ||
736 (type == DataType::Type::kInt64) ||
737 (type == DataType::Type::kReference));
738 Location base_loc = locations->InAt(1);
739 Register base = WRegisterFrom(base_loc); // Object pointer.
740 Location offset_loc = locations->InAt(2);
741 Register offset = XRegisterFrom(offset_loc); // Long offset.
742 Location trg_loc = locations->Out();
743 Register trg = RegisterFrom(trg_loc, type);
744
745 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
746 // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
747 Register temp = WRegisterFrom(locations->GetTemp(0));
748 MacroAssembler* masm = codegen->GetVIXLAssembler();
749 // Piggy-back on the field load path using introspection for the Baker read barrier.
750 __ Add(temp, base, offset.W()); // Offset should not exceed 32 bits.
751 codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
752 trg_loc,
753 base,
754 MemOperand(temp.X()),
755 /* needs_null_check= */ false,
756 is_volatile);
757 } else {
758 // Other cases.
759 MemOperand mem_op(base.X(), offset);
760 if (is_volatile) {
761 codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check= */ true);
762 } else {
763 codegen->Load(type, trg, mem_op);
764 }
765
766 if (type == DataType::Type::kReference) {
767 DCHECK(trg.IsW());
768 codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
769 }
770 }
771 }
772
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)773 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
774 bool can_call = kEmitCompilerReadBarrier &&
775 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
776 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
777 LocationSummary* locations =
778 new (allocator) LocationSummary(invoke,
779 can_call
780 ? LocationSummary::kCallOnSlowPath
781 : LocationSummary::kNoCall,
782 kIntrinsified);
783 if (can_call && kUseBakerReadBarrier) {
784 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
785 // We need a temporary register for the read barrier load in order to use
786 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
787 locations->AddTemp(FixedTempLocation());
788 }
789 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
790 locations->SetInAt(1, Location::RequiresRegister());
791 locations->SetInAt(2, Location::RequiresRegister());
792 locations->SetOut(Location::RequiresRegister(),
793 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
794 }
795
VisitUnsafeGet(HInvoke * invoke)796 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
797 CreateIntIntIntToIntLocations(allocator_, invoke);
798 }
VisitUnsafeGetVolatile(HInvoke * invoke)799 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
800 CreateIntIntIntToIntLocations(allocator_, invoke);
801 }
VisitUnsafeGetLong(HInvoke * invoke)802 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
803 CreateIntIntIntToIntLocations(allocator_, invoke);
804 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)805 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
806 CreateIntIntIntToIntLocations(allocator_, invoke);
807 }
VisitUnsafeGetObject(HInvoke * invoke)808 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
809 CreateIntIntIntToIntLocations(allocator_, invoke);
810 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)811 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
812 CreateIntIntIntToIntLocations(allocator_, invoke);
813 }
814
VisitUnsafeGet(HInvoke * invoke)815 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
816 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
817 }
VisitUnsafeGetVolatile(HInvoke * invoke)818 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
819 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
820 }
VisitUnsafeGetLong(HInvoke * invoke)821 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
822 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
823 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)824 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
825 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
826 }
VisitUnsafeGetObject(HInvoke * invoke)827 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
828 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
829 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)830 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
831 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
832 }
833
CreateIntIntIntIntToVoid(ArenaAllocator * allocator,HInvoke * invoke)834 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) {
835 LocationSummary* locations =
836 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
837 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
838 locations->SetInAt(1, Location::RequiresRegister());
839 locations->SetInAt(2, Location::RequiresRegister());
840 locations->SetInAt(3, Location::RequiresRegister());
841 }
842
VisitUnsafePut(HInvoke * invoke)843 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
844 CreateIntIntIntIntToVoid(allocator_, invoke);
845 }
VisitUnsafePutOrdered(HInvoke * invoke)846 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
847 CreateIntIntIntIntToVoid(allocator_, invoke);
848 }
VisitUnsafePutVolatile(HInvoke * invoke)849 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
850 CreateIntIntIntIntToVoid(allocator_, invoke);
851 }
VisitUnsafePutObject(HInvoke * invoke)852 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
853 CreateIntIntIntIntToVoid(allocator_, invoke);
854 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)855 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
856 CreateIntIntIntIntToVoid(allocator_, invoke);
857 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)858 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
859 CreateIntIntIntIntToVoid(allocator_, invoke);
860 }
VisitUnsafePutLong(HInvoke * invoke)861 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
862 CreateIntIntIntIntToVoid(allocator_, invoke);
863 }
VisitUnsafePutLongOrdered(HInvoke * invoke)864 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
865 CreateIntIntIntIntToVoid(allocator_, invoke);
866 }
VisitUnsafePutLongVolatile(HInvoke * invoke)867 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
868 CreateIntIntIntIntToVoid(allocator_, invoke);
869 }
870
GenUnsafePut(HInvoke * invoke,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARM64 * codegen)871 static void GenUnsafePut(HInvoke* invoke,
872 DataType::Type type,
873 bool is_volatile,
874 bool is_ordered,
875 CodeGeneratorARM64* codegen) {
876 LocationSummary* locations = invoke->GetLocations();
877 MacroAssembler* masm = codegen->GetVIXLAssembler();
878
879 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
880 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
881 Register value = RegisterFrom(locations->InAt(3), type);
882 Register source = value;
883 MemOperand mem_op(base.X(), offset);
884
885 {
886 // We use a block to end the scratch scope before the write barrier, thus
887 // freeing the temporary registers so they can be used in `MarkGCCard`.
888 UseScratchRegisterScope temps(masm);
889
890 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
891 DCHECK(value.IsW());
892 Register temp = temps.AcquireW();
893 __ Mov(temp.W(), value.W());
894 codegen->GetAssembler()->PoisonHeapReference(temp.W());
895 source = temp;
896 }
897
898 if (is_volatile || is_ordered) {
899 codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false);
900 } else {
901 codegen->Store(type, source, mem_op);
902 }
903 }
904
905 if (type == DataType::Type::kReference) {
906 bool value_can_be_null = true; // TODO: Worth finding out this information?
907 codegen->MarkGCCard(base, value, value_can_be_null);
908 }
909 }
910
VisitUnsafePut(HInvoke * invoke)911 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
912 GenUnsafePut(invoke,
913 DataType::Type::kInt32,
914 /* is_volatile= */ false,
915 /* is_ordered= */ false,
916 codegen_);
917 }
VisitUnsafePutOrdered(HInvoke * invoke)918 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
919 GenUnsafePut(invoke,
920 DataType::Type::kInt32,
921 /* is_volatile= */ false,
922 /* is_ordered= */ true,
923 codegen_);
924 }
VisitUnsafePutVolatile(HInvoke * invoke)925 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
926 GenUnsafePut(invoke,
927 DataType::Type::kInt32,
928 /* is_volatile= */ true,
929 /* is_ordered= */ false,
930 codegen_);
931 }
VisitUnsafePutObject(HInvoke * invoke)932 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
933 GenUnsafePut(invoke,
934 DataType::Type::kReference,
935 /* is_volatile= */ false,
936 /* is_ordered= */ false,
937 codegen_);
938 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)939 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
940 GenUnsafePut(invoke,
941 DataType::Type::kReference,
942 /* is_volatile= */ false,
943 /* is_ordered= */ true,
944 codegen_);
945 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)946 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
947 GenUnsafePut(invoke,
948 DataType::Type::kReference,
949 /* is_volatile= */ true,
950 /* is_ordered= */ false,
951 codegen_);
952 }
VisitUnsafePutLong(HInvoke * invoke)953 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
954 GenUnsafePut(invoke,
955 DataType::Type::kInt64,
956 /* is_volatile= */ false,
957 /* is_ordered= */ false,
958 codegen_);
959 }
VisitUnsafePutLongOrdered(HInvoke * invoke)960 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
961 GenUnsafePut(invoke,
962 DataType::Type::kInt64,
963 /* is_volatile= */ false,
964 /* is_ordered= */ true,
965 codegen_);
966 }
VisitUnsafePutLongVolatile(HInvoke * invoke)967 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
968 GenUnsafePut(invoke,
969 DataType::Type::kInt64,
970 /* is_volatile= */ true,
971 /* is_ordered= */ false,
972 codegen_);
973 }
974
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type)975 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
976 HInvoke* invoke,
977 DataType::Type type) {
978 bool can_call = kEmitCompilerReadBarrier &&
979 kUseBakerReadBarrier &&
980 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
981 LocationSummary* locations =
982 new (allocator) LocationSummary(invoke,
983 can_call
984 ? LocationSummary::kCallOnSlowPath
985 : LocationSummary::kNoCall,
986 kIntrinsified);
987 if (can_call) {
988 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
989 }
990 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
991 locations->SetInAt(1, Location::RequiresRegister());
992 locations->SetInAt(2, Location::RequiresRegister());
993 locations->SetInAt(3, Location::RequiresRegister());
994 locations->SetInAt(4, Location::RequiresRegister());
995
996 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
997 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
998 // We need two non-scratch temporary registers for (Baker) read barrier.
999 locations->AddTemp(Location::RequiresRegister());
1000 locations->AddTemp(Location::RequiresRegister());
1001 }
1002 }
1003
1004 class BakerReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 {
1005 public:
BakerReadBarrierCasSlowPathARM64(HInvoke * invoke)1006 explicit BakerReadBarrierCasSlowPathARM64(HInvoke* invoke)
1007 : SlowPathCodeARM64(invoke) {}
1008
GetDescription() const1009 const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARM64"; }
1010
EmitNativeCode(CodeGenerator * codegen)1011 void EmitNativeCode(CodeGenerator* codegen) override {
1012 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1013 Arm64Assembler* assembler = arm64_codegen->GetAssembler();
1014 MacroAssembler* masm = assembler->GetVIXLAssembler();
1015 __ Bind(GetEntryLabel());
1016
1017 // Get the locations.
1018 LocationSummary* locations = instruction_->GetLocations();
1019 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
1020 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
1021 Register expected = WRegisterFrom(locations->InAt(3)); // Expected.
1022 Register value = WRegisterFrom(locations->InAt(4)); // Value.
1023
1024 Register old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path.
1025 Register marked = WRegisterFrom(locations->GetTemp(1)); // The marked old value.
1026
1027 // Mark the `old_value` from the main path and compare with `expected`. This clobbers the
1028 // `tmp_ptr` scratch register but we do not want to allocate another non-scratch temporary.
1029 arm64_codegen->GenerateUnsafeCasOldValueMovWithBakerReadBarrier(marked, old_value);
1030 __ Cmp(marked, expected);
1031 __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure.
1032
1033 // The `old_value` we have read did not match `expected` (which is always a to-space reference)
1034 // but after the read barrier in GenerateUnsafeCasOldValueMovWithBakerReadBarrier() the marked
1035 // to-space value matched, so the `old_value` must be a from-space reference to the same
1036 // object. Do the same CAS loop as the main path but check for both `expected` and the unmarked
1037 // old value representing the to-space and from-space references for the same object.
1038
1039 UseScratchRegisterScope temps(masm);
1040 Register tmp_ptr = temps.AcquireX();
1041 Register tmp = temps.AcquireSameSizeAs(value);
1042
1043 // Recalculate the `tmp_ptr` clobbered above.
1044 __ Add(tmp_ptr, base.X(), Operand(offset));
1045
1046 // do {
1047 // tmp_value = [tmp_ptr];
1048 // } while ((tmp_value == expected || tmp == old_value) && failure([tmp_ptr] <- r_new_value));
1049 // result = (tmp_value == expected || tmp == old_value);
1050
1051 vixl::aarch64::Label loop_head;
1052 __ Bind(&loop_head);
1053 __ Ldaxr(tmp, MemOperand(tmp_ptr));
1054 assembler->MaybeUnpoisonHeapReference(tmp);
1055 __ Cmp(tmp, expected);
1056 __ Ccmp(tmp, old_value, ZFlag, ne);
1057 __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure.
1058 assembler->MaybePoisonHeapReference(value);
1059 __ Stlxr(tmp.W(), value, MemOperand(tmp_ptr));
1060 assembler->MaybeUnpoisonHeapReference(value);
1061 __ Cbnz(tmp.W(), &loop_head);
1062
1063 // Z=true from the above CMP+CCMP indicates success.
1064 __ B(GetExitLabel());
1065 }
1066 };
1067
GenCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARM64 * codegen)1068 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) {
1069 Arm64Assembler* assembler = codegen->GetAssembler();
1070 MacroAssembler* masm = assembler->GetVIXLAssembler();
1071 LocationSummary* locations = invoke->GetLocations();
1072
1073 Register out = WRegisterFrom(locations->Out()); // Boolean result.
1074 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
1075 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
1076 Register expected = RegisterFrom(locations->InAt(3), type); // Expected.
1077 Register value = RegisterFrom(locations->InAt(4), type); // Value.
1078
1079 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
1080 if (type == DataType::Type::kReference) {
1081 // Mark card for object assuming new value is stored.
1082 bool value_can_be_null = true; // TODO: Worth finding out this information?
1083 codegen->MarkGCCard(base, value, value_can_be_null);
1084 }
1085
1086 UseScratchRegisterScope temps(masm);
1087 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
1088 Register old_value; // Value in memory.
1089
1090 vixl::aarch64::Label exit_loop_label;
1091 vixl::aarch64::Label* exit_loop = &exit_loop_label;
1092 vixl::aarch64::Label* failure = &exit_loop_label;
1093
1094 if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
1095 // The only read barrier implementation supporting the
1096 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1097 DCHECK(kUseBakerReadBarrier);
1098
1099 BakerReadBarrierCasSlowPathARM64* slow_path =
1100 new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARM64(invoke);
1101 codegen->AddSlowPath(slow_path);
1102 exit_loop = slow_path->GetExitLabel();
1103 failure = slow_path->GetEntryLabel();
1104 // We need to store the `old_value` in a non-scratch register to make sure
1105 // the Baker read barrier in the slow path does not clobber it.
1106 old_value = WRegisterFrom(locations->GetTemp(0));
1107 } else {
1108 old_value = temps.AcquireSameSizeAs(value);
1109 }
1110
1111 __ Add(tmp_ptr, base.X(), Operand(offset));
1112
1113 // do {
1114 // tmp_value = [tmp_ptr];
1115 // } while (tmp_value == expected && failure([tmp_ptr] <- r_new_value));
1116 // result = tmp_value == expected;
1117
1118 vixl::aarch64::Label loop_head;
1119 __ Bind(&loop_head);
1120 __ Ldaxr(old_value, MemOperand(tmp_ptr));
1121 if (type == DataType::Type::kReference) {
1122 assembler->MaybeUnpoisonHeapReference(old_value);
1123 }
1124 __ Cmp(old_value, expected);
1125 __ B(failure, ne);
1126 if (type == DataType::Type::kReference) {
1127 assembler->MaybePoisonHeapReference(value);
1128 }
1129 __ Stlxr(old_value.W(), value, MemOperand(tmp_ptr)); // Reuse `old_value` for STLXR result.
1130 if (type == DataType::Type::kReference) {
1131 assembler->MaybeUnpoisonHeapReference(value);
1132 }
1133 __ Cbnz(old_value.W(), &loop_head);
1134 __ Bind(exit_loop);
1135 __ Cset(out, eq);
1136 }
1137
VisitUnsafeCASInt(HInvoke * invoke)1138 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1139 CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt32);
1140 }
VisitUnsafeCASLong(HInvoke * invoke)1141 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1142 CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt64);
1143 }
VisitUnsafeCASObject(HInvoke * invoke)1144 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1145 // The only read barrier implementation supporting the
1146 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1147 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1148 return;
1149 }
1150
1151 CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kReference);
1152 }
1153
VisitUnsafeCASInt(HInvoke * invoke)1154 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1155 GenCas(invoke, DataType::Type::kInt32, codegen_);
1156 }
VisitUnsafeCASLong(HInvoke * invoke)1157 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1158 GenCas(invoke, DataType::Type::kInt64, codegen_);
1159 }
VisitUnsafeCASObject(HInvoke * invoke)1160 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1161 // The only read barrier implementation supporting the
1162 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1163 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1164
1165 GenCas(invoke, DataType::Type::kReference, codegen_);
1166 }
1167
VisitStringCompareTo(HInvoke * invoke)1168 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
1169 LocationSummary* locations =
1170 new (allocator_) LocationSummary(invoke,
1171 invoke->InputAt(1)->CanBeNull()
1172 ? LocationSummary::kCallOnSlowPath
1173 : LocationSummary::kNoCall,
1174 kIntrinsified);
1175 locations->SetInAt(0, Location::RequiresRegister());
1176 locations->SetInAt(1, Location::RequiresRegister());
1177 locations->AddTemp(Location::RequiresRegister());
1178 locations->AddTemp(Location::RequiresRegister());
1179 locations->AddTemp(Location::RequiresRegister());
1180 // Need temporary registers for String compression's feature.
1181 if (mirror::kUseStringCompression) {
1182 locations->AddTemp(Location::RequiresRegister());
1183 }
1184 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1185 }
1186
VisitStringCompareTo(HInvoke * invoke)1187 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
1188 MacroAssembler* masm = GetVIXLAssembler();
1189 LocationSummary* locations = invoke->GetLocations();
1190
1191 Register str = InputRegisterAt(invoke, 0);
1192 Register arg = InputRegisterAt(invoke, 1);
1193 DCHECK(str.IsW());
1194 DCHECK(arg.IsW());
1195 Register out = OutputRegister(invoke);
1196
1197 Register temp0 = WRegisterFrom(locations->GetTemp(0));
1198 Register temp1 = WRegisterFrom(locations->GetTemp(1));
1199 Register temp2 = WRegisterFrom(locations->GetTemp(2));
1200 Register temp3;
1201 if (mirror::kUseStringCompression) {
1202 temp3 = WRegisterFrom(locations->GetTemp(3));
1203 }
1204
1205 vixl::aarch64::Label loop;
1206 vixl::aarch64::Label find_char_diff;
1207 vixl::aarch64::Label end;
1208 vixl::aarch64::Label different_compression;
1209
1210 // Get offsets of count and value fields within a string object.
1211 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1212 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1213
1214 // Note that the null check must have been done earlier.
1215 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1216
1217 // Take slow path and throw if input can be and is null.
1218 SlowPathCodeARM64* slow_path = nullptr;
1219 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1220 if (can_slow_path) {
1221 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1222 codegen_->AddSlowPath(slow_path);
1223 __ Cbz(arg, slow_path->GetEntryLabel());
1224 }
1225
1226 // Reference equality check, return 0 if same reference.
1227 __ Subs(out, str, arg);
1228 __ B(&end, eq);
1229
1230 if (mirror::kUseStringCompression) {
1231 // Load `count` fields of this and argument strings.
1232 __ Ldr(temp3, HeapOperand(str, count_offset));
1233 __ Ldr(temp2, HeapOperand(arg, count_offset));
1234 // Clean out compression flag from lengths.
1235 __ Lsr(temp0, temp3, 1u);
1236 __ Lsr(temp1, temp2, 1u);
1237 } else {
1238 // Load lengths of this and argument strings.
1239 __ Ldr(temp0, HeapOperand(str, count_offset));
1240 __ Ldr(temp1, HeapOperand(arg, count_offset));
1241 }
1242 // out = length diff.
1243 __ Subs(out, temp0, temp1);
1244 // temp0 = min(len(str), len(arg)).
1245 __ Csel(temp0, temp1, temp0, ge);
1246 // Shorter string is empty?
1247 __ Cbz(temp0, &end);
1248
1249 if (mirror::kUseStringCompression) {
1250 // Check if both strings using same compression style to use this comparison loop.
1251 __ Eor(temp2, temp2, Operand(temp3));
1252 // Interleave with compression flag extraction which is needed for both paths
1253 // and also set flags which is needed only for the different compressions path.
1254 __ Ands(temp3.W(), temp3.W(), Operand(1));
1255 __ Tbnz(temp2, 0, &different_compression); // Does not use flags.
1256 }
1257 // Store offset of string value in preparation for comparison loop.
1258 __ Mov(temp1, value_offset);
1259 if (mirror::kUseStringCompression) {
1260 // For string compression, calculate the number of bytes to compare (not chars).
1261 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1262 __ Lsl(temp0, temp0, temp3);
1263 }
1264
1265 UseScratchRegisterScope scratch_scope(masm);
1266 Register temp4 = scratch_scope.AcquireX();
1267
1268 // Assertions that must hold in order to compare strings 8 bytes at a time.
1269 DCHECK_ALIGNED(value_offset, 8);
1270 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1271
1272 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1273 DCHECK_EQ(char_size, 2u);
1274
1275 // Promote temp2 to an X reg, ready for LDR.
1276 temp2 = temp2.X();
1277
1278 // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
1279 __ Bind(&loop);
1280 __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
1281 __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
1282 __ Cmp(temp4, temp2);
1283 __ B(ne, &find_char_diff);
1284 __ Add(temp1, temp1, char_size * 4);
1285 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1286 __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
1287 __ B(&loop, hi);
1288 __ B(&end);
1289
1290 // Promote temp1 to an X reg, ready for EOR.
1291 temp1 = temp1.X();
1292
1293 // Find the single character difference.
1294 __ Bind(&find_char_diff);
1295 // Get the bit position of the first character that differs.
1296 __ Eor(temp1, temp2, temp4);
1297 __ Rbit(temp1, temp1);
1298 __ Clz(temp1, temp1);
1299
1300 // If the number of chars remaining <= the index where the difference occurs (0-3), then
1301 // the difference occurs outside the remaining string data, so just return length diff (out).
1302 // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
1303 // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
1304 // unsigned when string compression is disabled.
1305 // When it's enabled, the comparison must be unsigned.
1306 __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
1307 __ B(ls, &end);
1308
1309 // Extract the characters and calculate the difference.
1310 if (mirror:: kUseStringCompression) {
1311 __ Bic(temp1, temp1, 0x7);
1312 __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
1313 } else {
1314 __ Bic(temp1, temp1, 0xf);
1315 }
1316 __ Lsr(temp2, temp2, temp1);
1317 __ Lsr(temp4, temp4, temp1);
1318 if (mirror::kUseStringCompression) {
1319 // Prioritize the case of compressed strings and calculate such result first.
1320 __ Uxtb(temp1, temp4);
1321 __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
1322 __ Tbz(temp3, 0u, &end); // If actually compressed, we're done.
1323 }
1324 __ Uxth(temp4, temp4);
1325 __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
1326
1327 if (mirror::kUseStringCompression) {
1328 __ B(&end);
1329 __ Bind(&different_compression);
1330
1331 // Comparison for different compression style.
1332 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1333 DCHECK_EQ(c_char_size, 1u);
1334 temp1 = temp1.W();
1335 temp2 = temp2.W();
1336 temp4 = temp4.W();
1337
1338 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1339 // Note that flags have been set by the `str` compression flag extraction to `temp3`
1340 // before branching to the `different_compression` label.
1341 __ Csel(temp1, str, arg, eq); // Pointer to the compressed string.
1342 __ Csel(temp2, str, arg, ne); // Pointer to the uncompressed string.
1343
1344 // We want to free up the temp3, currently holding `str` compression flag, for comparison.
1345 // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
1346 // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
1347 // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1348 __ Lsl(temp0, temp0, 1u);
1349
1350 // Adjust temp1 and temp2 from string pointers to data pointers.
1351 __ Add(temp1, temp1, Operand(value_offset));
1352 __ Add(temp2, temp2, Operand(value_offset));
1353
1354 // Complete the move of the compression flag.
1355 __ Sub(temp0, temp0, Operand(temp3));
1356
1357 vixl::aarch64::Label different_compression_loop;
1358 vixl::aarch64::Label different_compression_diff;
1359
1360 __ Bind(&different_compression_loop);
1361 __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
1362 __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
1363 __ Subs(temp4, temp4, Operand(temp3));
1364 __ B(&different_compression_diff, ne);
1365 __ Subs(temp0, temp0, 2);
1366 __ B(&different_compression_loop, hi);
1367 __ B(&end);
1368
1369 // Calculate the difference.
1370 __ Bind(&different_compression_diff);
1371 __ Tst(temp0, Operand(1));
1372 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1373 "Expecting 0=compressed, 1=uncompressed");
1374 __ Cneg(out, temp4, ne);
1375 }
1376
1377 __ Bind(&end);
1378
1379 if (can_slow_path) {
1380 __ Bind(slow_path->GetExitLabel());
1381 }
1382 }
1383
1384 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1385 // The normal loop plus the pre-header is 9 instructions without string compression and 12
1386 // instructions with string compression. We can compare up to 8 bytes in 4 instructions
1387 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
1388 // to 10 instructions for the unrolled loop.
1389 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
1390
GetConstString(HInstruction * candidate,uint32_t * utf16_length)1391 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1392 if (candidate->IsLoadString()) {
1393 HLoadString* load_string = candidate->AsLoadString();
1394 const DexFile& dex_file = load_string->GetDexFile();
1395 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1396 }
1397 return nullptr;
1398 }
1399
VisitStringEquals(HInvoke * invoke)1400 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
1401 LocationSummary* locations =
1402 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1403 locations->SetInAt(0, Location::RequiresRegister());
1404 locations->SetInAt(1, Location::RequiresRegister());
1405
1406 // For the generic implementation and for long const strings we need a temporary.
1407 // We do not need it for short const strings, up to 8 bytes, see code generation below.
1408 uint32_t const_string_length = 0u;
1409 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1410 if (const_string == nullptr) {
1411 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1412 }
1413 bool is_compressed =
1414 mirror::kUseStringCompression &&
1415 const_string != nullptr &&
1416 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1417 if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
1418 locations->AddTemp(Location::RequiresRegister());
1419 }
1420
1421 // TODO: If the String.equals() is used only for an immediately following HIf, we can
1422 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1423 // Then we shall need an extra temporary register instead of the output register.
1424 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1425 }
1426
VisitStringEquals(HInvoke * invoke)1427 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
1428 MacroAssembler* masm = GetVIXLAssembler();
1429 LocationSummary* locations = invoke->GetLocations();
1430
1431 Register str = WRegisterFrom(locations->InAt(0));
1432 Register arg = WRegisterFrom(locations->InAt(1));
1433 Register out = XRegisterFrom(locations->Out());
1434
1435 UseScratchRegisterScope scratch_scope(masm);
1436 Register temp = scratch_scope.AcquireW();
1437 Register temp1 = scratch_scope.AcquireW();
1438
1439 vixl::aarch64::Label loop;
1440 vixl::aarch64::Label end;
1441 vixl::aarch64::Label return_true;
1442 vixl::aarch64::Label return_false;
1443
1444 // Get offsets of count, value, and class fields within a string object.
1445 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1446 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1447 const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1448
1449 // Note that the null check must have been done earlier.
1450 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1451
1452 StringEqualsOptimizations optimizations(invoke);
1453 if (!optimizations.GetArgumentNotNull()) {
1454 // Check if input is null, return false if it is.
1455 __ Cbz(arg, &return_false);
1456 }
1457
1458 // Reference equality check, return true if same reference.
1459 __ Cmp(str, arg);
1460 __ B(&return_true, eq);
1461
1462 if (!optimizations.GetArgumentIsString()) {
1463 // Instanceof check for the argument by comparing class fields.
1464 // All string objects must have the same type since String cannot be subclassed.
1465 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1466 // If the argument is a string object, its class field must be equal to receiver's class field.
1467 //
1468 // As the String class is expected to be non-movable, we can read the class
1469 // field from String.equals' arguments without read barriers.
1470 AssertNonMovableStringClass();
1471 // /* HeapReference<Class> */ temp = str->klass_
1472 __ Ldr(temp, MemOperand(str.X(), class_offset));
1473 // /* HeapReference<Class> */ temp1 = arg->klass_
1474 __ Ldr(temp1, MemOperand(arg.X(), class_offset));
1475 // Also, because we use the previously loaded class references only in the
1476 // following comparison, we don't need to unpoison them.
1477 __ Cmp(temp, temp1);
1478 __ B(&return_false, ne);
1479 }
1480
1481 // Check if one of the inputs is a const string. Do not special-case both strings
1482 // being const, such cases should be handled by constant folding if needed.
1483 uint32_t const_string_length = 0u;
1484 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1485 if (const_string == nullptr) {
1486 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1487 if (const_string != nullptr) {
1488 std::swap(str, arg); // Make sure the const string is in `str`.
1489 }
1490 }
1491 bool is_compressed =
1492 mirror::kUseStringCompression &&
1493 const_string != nullptr &&
1494 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1495
1496 if (const_string != nullptr) {
1497 // Load `count` field of the argument string and check if it matches the const string.
1498 // Also compares the compression style, if differs return false.
1499 __ Ldr(temp, MemOperand(arg.X(), count_offset));
1500 // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
1501 scratch_scope.Release(temp1);
1502 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1503 temp1 = scratch_scope.AcquireW();
1504 __ B(&return_false, ne);
1505 } else {
1506 // Load `count` fields of this and argument strings.
1507 __ Ldr(temp, MemOperand(str.X(), count_offset));
1508 __ Ldr(temp1, MemOperand(arg.X(), count_offset));
1509 // Check if `count` fields are equal, return false if they're not.
1510 // Also compares the compression style, if differs return false.
1511 __ Cmp(temp, temp1);
1512 __ B(&return_false, ne);
1513 }
1514
1515 // Assertions that must hold in order to compare strings 8 bytes at a time.
1516 // Ok to do this because strings are zero-padded to kObjectAlignment.
1517 DCHECK_ALIGNED(value_offset, 8);
1518 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1519
1520 if (const_string != nullptr &&
1521 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1522 : kShortConstStringEqualsCutoffInBytes / 2u)) {
1523 // Load and compare the contents. Though we know the contents of the short const string
1524 // at compile time, materializing constants may be more code than loading from memory.
1525 int32_t offset = value_offset;
1526 size_t remaining_bytes =
1527 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
1528 temp = temp.X();
1529 temp1 = temp1.X();
1530 while (remaining_bytes > sizeof(uint64_t)) {
1531 Register temp2 = XRegisterFrom(locations->GetTemp(0));
1532 __ Ldp(temp, temp1, MemOperand(str.X(), offset));
1533 __ Ldp(temp2, out, MemOperand(arg.X(), offset));
1534 __ Cmp(temp, temp2);
1535 __ Ccmp(temp1, out, NoFlag, eq);
1536 __ B(&return_false, ne);
1537 offset += 2u * sizeof(uint64_t);
1538 remaining_bytes -= 2u * sizeof(uint64_t);
1539 }
1540 if (remaining_bytes != 0u) {
1541 __ Ldr(temp, MemOperand(str.X(), offset));
1542 __ Ldr(temp1, MemOperand(arg.X(), offset));
1543 __ Cmp(temp, temp1);
1544 __ B(&return_false, ne);
1545 }
1546 } else {
1547 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1548 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1549 "Expecting 0=compressed, 1=uncompressed");
1550 __ Cbz(temp, &return_true);
1551
1552 if (mirror::kUseStringCompression) {
1553 // For string compression, calculate the number of bytes to compare (not chars).
1554 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1555 __ And(temp1, temp, Operand(1)); // Extract compression flag.
1556 __ Lsr(temp, temp, 1u); // Extract length.
1557 __ Lsl(temp, temp, temp1); // Calculate number of bytes to compare.
1558 }
1559
1560 // Store offset of string value in preparation for comparison loop
1561 __ Mov(temp1, value_offset);
1562
1563 temp1 = temp1.X();
1564 Register temp2 = XRegisterFrom(locations->GetTemp(0));
1565 // Loop to compare strings 8 bytes at a time starting at the front of the string.
1566 __ Bind(&loop);
1567 __ Ldr(out, MemOperand(str.X(), temp1));
1568 __ Ldr(temp2, MemOperand(arg.X(), temp1));
1569 __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
1570 __ Cmp(out, temp2);
1571 __ B(&return_false, ne);
1572 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1573 __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
1574 __ B(&loop, hi);
1575 }
1576
1577 // Return true and exit the function.
1578 // If loop does not result in returning false, we return true.
1579 __ Bind(&return_true);
1580 __ Mov(out, 1);
1581 __ B(&end);
1582
1583 // Return false and exit the function.
1584 __ Bind(&return_false);
1585 __ Mov(out, 0);
1586 __ Bind(&end);
1587 }
1588
GenerateVisitStringIndexOf(HInvoke * invoke,MacroAssembler * masm,CodeGeneratorARM64 * codegen,bool start_at_zero)1589 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1590 MacroAssembler* masm,
1591 CodeGeneratorARM64* codegen,
1592 bool start_at_zero) {
1593 LocationSummary* locations = invoke->GetLocations();
1594
1595 // Note that the null check must have been done earlier.
1596 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1597
1598 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1599 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1600 SlowPathCodeARM64* slow_path = nullptr;
1601 HInstruction* code_point = invoke->InputAt(1);
1602 if (code_point->IsIntConstant()) {
1603 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
1604 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1605 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1606 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1607 codegen->AddSlowPath(slow_path);
1608 __ B(slow_path->GetEntryLabel());
1609 __ Bind(slow_path->GetExitLabel());
1610 return;
1611 }
1612 } else if (code_point->GetType() != DataType::Type::kUint16) {
1613 Register char_reg = WRegisterFrom(locations->InAt(1));
1614 __ Tst(char_reg, 0xFFFF0000);
1615 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1616 codegen->AddSlowPath(slow_path);
1617 __ B(ne, slow_path->GetEntryLabel());
1618 }
1619
1620 if (start_at_zero) {
1621 // Start-index = 0.
1622 Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
1623 __ Mov(tmp_reg, 0);
1624 }
1625
1626 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1627 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1628
1629 if (slow_path != nullptr) {
1630 __ Bind(slow_path->GetExitLabel());
1631 }
1632 }
1633
VisitStringIndexOf(HInvoke * invoke)1634 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
1635 LocationSummary* locations = new (allocator_) LocationSummary(
1636 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1637 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1638 // best to align the inputs accordingly.
1639 InvokeRuntimeCallingConvention calling_convention;
1640 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1641 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1642 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
1643
1644 // Need to send start_index=0.
1645 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1646 }
1647
VisitStringIndexOf(HInvoke * invoke)1648 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
1649 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true);
1650 }
1651
VisitStringIndexOfAfter(HInvoke * invoke)1652 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1653 LocationSummary* locations = new (allocator_) LocationSummary(
1654 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1655 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1656 // best to align the inputs accordingly.
1657 InvokeRuntimeCallingConvention calling_convention;
1658 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1659 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1660 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1661 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
1662 }
1663
VisitStringIndexOfAfter(HInvoke * invoke)1664 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1665 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false);
1666 }
1667
VisitStringNewStringFromBytes(HInvoke * invoke)1668 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1669 LocationSummary* locations = new (allocator_) LocationSummary(
1670 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1671 InvokeRuntimeCallingConvention calling_convention;
1672 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1673 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1674 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1675 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1676 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1677 }
1678
VisitStringNewStringFromBytes(HInvoke * invoke)1679 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1680 MacroAssembler* masm = GetVIXLAssembler();
1681 LocationSummary* locations = invoke->GetLocations();
1682
1683 Register byte_array = WRegisterFrom(locations->InAt(0));
1684 __ Cmp(byte_array, 0);
1685 SlowPathCodeARM64* slow_path =
1686 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1687 codegen_->AddSlowPath(slow_path);
1688 __ B(eq, slow_path->GetEntryLabel());
1689
1690 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1691 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1692 __ Bind(slow_path->GetExitLabel());
1693 }
1694
VisitStringNewStringFromChars(HInvoke * invoke)1695 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
1696 LocationSummary* locations =
1697 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1698 InvokeRuntimeCallingConvention calling_convention;
1699 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1700 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1701 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1702 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1703 }
1704
VisitStringNewStringFromChars(HInvoke * invoke)1705 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
1706 // No need to emit code checking whether `locations->InAt(2)` is a null
1707 // pointer, as callers of the native method
1708 //
1709 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1710 //
1711 // all include a null check on `data` before calling that method.
1712 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1713 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1714 }
1715
VisitStringNewStringFromString(HInvoke * invoke)1716 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
1717 LocationSummary* locations = new (allocator_) LocationSummary(
1718 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1719 InvokeRuntimeCallingConvention calling_convention;
1720 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1721 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1722 }
1723
VisitStringNewStringFromString(HInvoke * invoke)1724 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
1725 MacroAssembler* masm = GetVIXLAssembler();
1726 LocationSummary* locations = invoke->GetLocations();
1727
1728 Register string_to_copy = WRegisterFrom(locations->InAt(0));
1729 __ Cmp(string_to_copy, 0);
1730 SlowPathCodeARM64* slow_path =
1731 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1732 codegen_->AddSlowPath(slow_path);
1733 __ B(eq, slow_path->GetEntryLabel());
1734
1735 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1736 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1737 __ Bind(slow_path->GetExitLabel());
1738 }
1739
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1740 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1741 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1742 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
1743 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
1744
1745 LocationSummary* const locations =
1746 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1747 InvokeRuntimeCallingConvention calling_convention;
1748
1749 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
1750 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
1751 }
1752
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1753 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1754 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1755 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
1756 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
1757 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
1758
1759 LocationSummary* const locations =
1760 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1761 InvokeRuntimeCallingConvention calling_convention;
1762
1763 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
1764 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
1765 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
1766 }
1767
GenFPToFPCall(HInvoke * invoke,CodeGeneratorARM64 * codegen,QuickEntrypointEnum entry)1768 static void GenFPToFPCall(HInvoke* invoke,
1769 CodeGeneratorARM64* codegen,
1770 QuickEntrypointEnum entry) {
1771 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1772 }
1773
VisitMathCos(HInvoke * invoke)1774 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
1775 CreateFPToFPCallLocations(allocator_, invoke);
1776 }
1777
VisitMathCos(HInvoke * invoke)1778 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
1779 GenFPToFPCall(invoke, codegen_, kQuickCos);
1780 }
1781
VisitMathSin(HInvoke * invoke)1782 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
1783 CreateFPToFPCallLocations(allocator_, invoke);
1784 }
1785
VisitMathSin(HInvoke * invoke)1786 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
1787 GenFPToFPCall(invoke, codegen_, kQuickSin);
1788 }
1789
VisitMathAcos(HInvoke * invoke)1790 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
1791 CreateFPToFPCallLocations(allocator_, invoke);
1792 }
1793
VisitMathAcos(HInvoke * invoke)1794 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
1795 GenFPToFPCall(invoke, codegen_, kQuickAcos);
1796 }
1797
VisitMathAsin(HInvoke * invoke)1798 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
1799 CreateFPToFPCallLocations(allocator_, invoke);
1800 }
1801
VisitMathAsin(HInvoke * invoke)1802 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
1803 GenFPToFPCall(invoke, codegen_, kQuickAsin);
1804 }
1805
VisitMathAtan(HInvoke * invoke)1806 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
1807 CreateFPToFPCallLocations(allocator_, invoke);
1808 }
1809
VisitMathAtan(HInvoke * invoke)1810 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
1811 GenFPToFPCall(invoke, codegen_, kQuickAtan);
1812 }
1813
VisitMathCbrt(HInvoke * invoke)1814 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
1815 CreateFPToFPCallLocations(allocator_, invoke);
1816 }
1817
VisitMathCbrt(HInvoke * invoke)1818 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
1819 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
1820 }
1821
VisitMathCosh(HInvoke * invoke)1822 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
1823 CreateFPToFPCallLocations(allocator_, invoke);
1824 }
1825
VisitMathCosh(HInvoke * invoke)1826 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
1827 GenFPToFPCall(invoke, codegen_, kQuickCosh);
1828 }
1829
VisitMathExp(HInvoke * invoke)1830 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
1831 CreateFPToFPCallLocations(allocator_, invoke);
1832 }
1833
VisitMathExp(HInvoke * invoke)1834 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
1835 GenFPToFPCall(invoke, codegen_, kQuickExp);
1836 }
1837
VisitMathExpm1(HInvoke * invoke)1838 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
1839 CreateFPToFPCallLocations(allocator_, invoke);
1840 }
1841
VisitMathExpm1(HInvoke * invoke)1842 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
1843 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
1844 }
1845
VisitMathLog(HInvoke * invoke)1846 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
1847 CreateFPToFPCallLocations(allocator_, invoke);
1848 }
1849
VisitMathLog(HInvoke * invoke)1850 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
1851 GenFPToFPCall(invoke, codegen_, kQuickLog);
1852 }
1853
VisitMathLog10(HInvoke * invoke)1854 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
1855 CreateFPToFPCallLocations(allocator_, invoke);
1856 }
1857
VisitMathLog10(HInvoke * invoke)1858 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
1859 GenFPToFPCall(invoke, codegen_, kQuickLog10);
1860 }
1861
VisitMathSinh(HInvoke * invoke)1862 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
1863 CreateFPToFPCallLocations(allocator_, invoke);
1864 }
1865
VisitMathSinh(HInvoke * invoke)1866 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
1867 GenFPToFPCall(invoke, codegen_, kQuickSinh);
1868 }
1869
VisitMathTan(HInvoke * invoke)1870 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
1871 CreateFPToFPCallLocations(allocator_, invoke);
1872 }
1873
VisitMathTan(HInvoke * invoke)1874 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
1875 GenFPToFPCall(invoke, codegen_, kQuickTan);
1876 }
1877
VisitMathTanh(HInvoke * invoke)1878 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
1879 CreateFPToFPCallLocations(allocator_, invoke);
1880 }
1881
VisitMathTanh(HInvoke * invoke)1882 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
1883 GenFPToFPCall(invoke, codegen_, kQuickTanh);
1884 }
1885
VisitMathAtan2(HInvoke * invoke)1886 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
1887 CreateFPFPToFPCallLocations(allocator_, invoke);
1888 }
1889
VisitMathAtan2(HInvoke * invoke)1890 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
1891 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
1892 }
1893
VisitMathPow(HInvoke * invoke)1894 void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
1895 CreateFPFPToFPCallLocations(allocator_, invoke);
1896 }
1897
VisitMathPow(HInvoke * invoke)1898 void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
1899 GenFPToFPCall(invoke, codegen_, kQuickPow);
1900 }
1901
VisitMathHypot(HInvoke * invoke)1902 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
1903 CreateFPFPToFPCallLocations(allocator_, invoke);
1904 }
1905
VisitMathHypot(HInvoke * invoke)1906 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
1907 GenFPToFPCall(invoke, codegen_, kQuickHypot);
1908 }
1909
VisitMathNextAfter(HInvoke * invoke)1910 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
1911 CreateFPFPToFPCallLocations(allocator_, invoke);
1912 }
1913
VisitMathNextAfter(HInvoke * invoke)1914 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
1915 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
1916 }
1917
VisitStringGetCharsNoCheck(HInvoke * invoke)1918 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1919 LocationSummary* locations =
1920 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1921 locations->SetInAt(0, Location::RequiresRegister());
1922 locations->SetInAt(1, Location::RequiresRegister());
1923 locations->SetInAt(2, Location::RequiresRegister());
1924 locations->SetInAt(3, Location::RequiresRegister());
1925 locations->SetInAt(4, Location::RequiresRegister());
1926
1927 locations->AddTemp(Location::RequiresRegister());
1928 locations->AddTemp(Location::RequiresRegister());
1929 locations->AddTemp(Location::RequiresRegister());
1930 }
1931
VisitStringGetCharsNoCheck(HInvoke * invoke)1932 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1933 MacroAssembler* masm = GetVIXLAssembler();
1934 LocationSummary* locations = invoke->GetLocations();
1935
1936 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1937 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1938 DCHECK_EQ(char_size, 2u);
1939
1940 // Location of data in char array buffer.
1941 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1942
1943 // Location of char array data in string.
1944 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1945
1946 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1947 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
1948 Register srcObj = XRegisterFrom(locations->InAt(0));
1949 Register srcBegin = XRegisterFrom(locations->InAt(1));
1950 Register srcEnd = XRegisterFrom(locations->InAt(2));
1951 Register dstObj = XRegisterFrom(locations->InAt(3));
1952 Register dstBegin = XRegisterFrom(locations->InAt(4));
1953
1954 Register src_ptr = XRegisterFrom(locations->GetTemp(0));
1955 Register num_chr = XRegisterFrom(locations->GetTemp(1));
1956 Register tmp1 = XRegisterFrom(locations->GetTemp(2));
1957
1958 UseScratchRegisterScope temps(masm);
1959 Register dst_ptr = temps.AcquireX();
1960 Register tmp2 = temps.AcquireX();
1961
1962 vixl::aarch64::Label done;
1963 vixl::aarch64::Label compressed_string_loop;
1964 __ Sub(num_chr, srcEnd, srcBegin);
1965 // Early out for valid zero-length retrievals.
1966 __ Cbz(num_chr, &done);
1967
1968 // dst address start to copy to.
1969 __ Add(dst_ptr, dstObj, Operand(data_offset));
1970 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
1971
1972 // src address to copy from.
1973 __ Add(src_ptr, srcObj, Operand(value_offset));
1974 vixl::aarch64::Label compressed_string_preloop;
1975 if (mirror::kUseStringCompression) {
1976 // Location of count in string.
1977 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1978 // String's length.
1979 __ Ldr(tmp2, MemOperand(srcObj, count_offset));
1980 __ Tbz(tmp2, 0, &compressed_string_preloop);
1981 }
1982 __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
1983
1984 // Do the copy.
1985 vixl::aarch64::Label loop;
1986 vixl::aarch64::Label remainder;
1987
1988 // Save repairing the value of num_chr on the < 8 character path.
1989 __ Subs(tmp1, num_chr, 8);
1990 __ B(lt, &remainder);
1991
1992 // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
1993 __ Mov(num_chr, tmp1);
1994
1995 // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
1996 // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
1997 __ Bind(&loop);
1998 __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
1999 __ Subs(num_chr, num_chr, 8);
2000 __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
2001 __ B(ge, &loop);
2002
2003 __ Adds(num_chr, num_chr, 8);
2004 __ B(eq, &done);
2005
2006 // Main loop for < 8 character case and remainder handling. Loads and stores one
2007 // 16-bit Java character at a time.
2008 __ Bind(&remainder);
2009 __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
2010 __ Subs(num_chr, num_chr, 1);
2011 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2012 __ B(gt, &remainder);
2013 __ B(&done);
2014
2015 if (mirror::kUseStringCompression) {
2016 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2017 DCHECK_EQ(c_char_size, 1u);
2018 __ Bind(&compressed_string_preloop);
2019 __ Add(src_ptr, src_ptr, Operand(srcBegin));
2020 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2021 __ Bind(&compressed_string_loop);
2022 __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
2023 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2024 __ Subs(num_chr, num_chr, Operand(1));
2025 __ B(gt, &compressed_string_loop);
2026 }
2027
2028 __ Bind(&done);
2029 }
2030
2031 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
2032 // implementation there for longer copy lengths.
2033 static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
2034
SetSystemArrayCopyLocationRequires(LocationSummary * locations,uint32_t at,HInstruction * input)2035 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
2036 uint32_t at,
2037 HInstruction* input) {
2038 HIntConstant* const_input = input->AsIntConstant();
2039 if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
2040 locations->SetInAt(at, Location::RequiresRegister());
2041 } else {
2042 locations->SetInAt(at, Location::RegisterOrConstant(input));
2043 }
2044 }
2045
VisitSystemArrayCopyChar(HInvoke * invoke)2046 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2047 // Check to see if we have known failures that will cause us to have to bail out
2048 // to the runtime, and just generate the runtime call directly.
2049 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2050 HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
2051
2052 // The positions must be non-negative.
2053 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2054 (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
2055 // We will have to fail anyways.
2056 return;
2057 }
2058
2059 // The length must be >= 0 and not so long that we would (currently) prefer libcore's
2060 // native implementation.
2061 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2062 if (length != nullptr) {
2063 int32_t len = length->GetValue();
2064 if (len < 0 || len > kSystemArrayCopyCharThreshold) {
2065 // Just call as normal.
2066 return;
2067 }
2068 }
2069
2070 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2071 LocationSummary* locations =
2072 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2073 // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
2074 locations->SetInAt(0, Location::RequiresRegister());
2075 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2076 locations->SetInAt(2, Location::RequiresRegister());
2077 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2078 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2079
2080 locations->AddTemp(Location::RequiresRegister());
2081 locations->AddTemp(Location::RequiresRegister());
2082 locations->AddTemp(Location::RequiresRegister());
2083 }
2084
CheckSystemArrayCopyPosition(MacroAssembler * masm,const Location & pos,const Register & input,const Location & length,SlowPathCodeARM64 * slow_path,const Register & temp,bool length_is_input_length=false)2085 static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
2086 const Location& pos,
2087 const Register& input,
2088 const Location& length,
2089 SlowPathCodeARM64* slow_path,
2090 const Register& temp,
2091 bool length_is_input_length = false) {
2092 const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
2093 if (pos.IsConstant()) {
2094 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
2095 if (pos_const == 0) {
2096 if (!length_is_input_length) {
2097 // Check that length(input) >= length.
2098 __ Ldr(temp, MemOperand(input, length_offset));
2099 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2100 __ B(slow_path->GetEntryLabel(), lt);
2101 }
2102 } else {
2103 // Check that length(input) >= pos.
2104 __ Ldr(temp, MemOperand(input, length_offset));
2105 __ Subs(temp, temp, pos_const);
2106 __ B(slow_path->GetEntryLabel(), lt);
2107
2108 // Check that (length(input) - pos) >= length.
2109 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2110 __ B(slow_path->GetEntryLabel(), lt);
2111 }
2112 } else if (length_is_input_length) {
2113 // The only way the copy can succeed is if pos is zero.
2114 __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
2115 } else {
2116 // Check that pos >= 0.
2117 Register pos_reg = WRegisterFrom(pos);
2118 __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
2119
2120 // Check that pos <= length(input) && (length(input) - pos) >= length.
2121 __ Ldr(temp, MemOperand(input, length_offset));
2122 __ Subs(temp, temp, pos_reg);
2123 // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
2124 __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge);
2125 __ B(slow_path->GetEntryLabel(), lt);
2126 }
2127 }
2128
2129 // Compute base source address, base destination address, and end
2130 // source address for System.arraycopy* intrinsics in `src_base`,
2131 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(MacroAssembler * masm,DataType::Type type,const Register & src,const Location & src_pos,const Register & dst,const Location & dst_pos,const Location & copy_length,const Register & src_base,const Register & dst_base,const Register & src_end)2132 static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
2133 DataType::Type type,
2134 const Register& src,
2135 const Location& src_pos,
2136 const Register& dst,
2137 const Location& dst_pos,
2138 const Location& copy_length,
2139 const Register& src_base,
2140 const Register& dst_base,
2141 const Register& src_end) {
2142 // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
2143 DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
2144 << "Unexpected element type: " << type;
2145 const int32_t element_size = DataType::Size(type);
2146 const int32_t element_size_shift = DataType::SizeShift(type);
2147 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2148
2149 if (src_pos.IsConstant()) {
2150 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2151 __ Add(src_base, src, element_size * constant + data_offset);
2152 } else {
2153 __ Add(src_base, src, data_offset);
2154 __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
2155 }
2156
2157 if (dst_pos.IsConstant()) {
2158 int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
2159 __ Add(dst_base, dst, element_size * constant + data_offset);
2160 } else {
2161 __ Add(dst_base, dst, data_offset);
2162 __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
2163 }
2164
2165 if (copy_length.IsConstant()) {
2166 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2167 __ Add(src_end, src_base, element_size * constant);
2168 } else {
2169 __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
2170 }
2171 }
2172
VisitSystemArrayCopyChar(HInvoke * invoke)2173 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2174 MacroAssembler* masm = GetVIXLAssembler();
2175 LocationSummary* locations = invoke->GetLocations();
2176 Register src = XRegisterFrom(locations->InAt(0));
2177 Location src_pos = locations->InAt(1);
2178 Register dst = XRegisterFrom(locations->InAt(2));
2179 Location dst_pos = locations->InAt(3);
2180 Location length = locations->InAt(4);
2181
2182 SlowPathCodeARM64* slow_path =
2183 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2184 codegen_->AddSlowPath(slow_path);
2185
2186 // If source and destination are the same, take the slow path. Overlapping copy regions must be
2187 // copied in reverse and we can't know in all cases if it's needed.
2188 __ Cmp(src, dst);
2189 __ B(slow_path->GetEntryLabel(), eq);
2190
2191 // Bail out if the source is null.
2192 __ Cbz(src, slow_path->GetEntryLabel());
2193
2194 // Bail out if the destination is null.
2195 __ Cbz(dst, slow_path->GetEntryLabel());
2196
2197 if (!length.IsConstant()) {
2198 // Merge the following two comparisons into one:
2199 // If the length is negative, bail out (delegate to libcore's native implementation).
2200 // If the length > 32 then (currently) prefer libcore's native implementation.
2201 __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
2202 __ B(slow_path->GetEntryLabel(), hi);
2203 } else {
2204 // We have already checked in the LocationsBuilder for the constant case.
2205 DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2206 DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
2207 }
2208
2209 Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
2210 Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
2211 Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
2212
2213 CheckSystemArrayCopyPosition(masm,
2214 src_pos,
2215 src,
2216 length,
2217 slow_path,
2218 src_curr_addr,
2219 false);
2220
2221 CheckSystemArrayCopyPosition(masm,
2222 dst_pos,
2223 dst,
2224 length,
2225 slow_path,
2226 src_curr_addr,
2227 false);
2228
2229 src_curr_addr = src_curr_addr.X();
2230 dst_curr_addr = dst_curr_addr.X();
2231 src_stop_addr = src_stop_addr.X();
2232
2233 GenSystemArrayCopyAddresses(masm,
2234 DataType::Type::kUint16,
2235 src,
2236 src_pos,
2237 dst,
2238 dst_pos,
2239 length,
2240 src_curr_addr,
2241 dst_curr_addr,
2242 src_stop_addr);
2243
2244 // Iterate over the arrays and do a raw copy of the chars.
2245 const int32_t char_size = DataType::Size(DataType::Type::kUint16);
2246 UseScratchRegisterScope temps(masm);
2247 Register tmp = temps.AcquireW();
2248 vixl::aarch64::Label loop, done;
2249 __ Bind(&loop);
2250 __ Cmp(src_curr_addr, src_stop_addr);
2251 __ B(&done, eq);
2252 __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
2253 __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
2254 __ B(&loop);
2255 __ Bind(&done);
2256
2257 __ Bind(slow_path->GetExitLabel());
2258 }
2259
2260 // We can choose to use the native implementation there for longer copy lengths.
2261 static constexpr int32_t kSystemArrayCopyThreshold = 128;
2262
2263 // CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
2264 // We want to use two temporary registers in order to reduce the register pressure in arm64.
2265 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
VisitSystemArrayCopy(HInvoke * invoke)2266 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2267 // The only read barrier implementation supporting the
2268 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2269 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2270 return;
2271 }
2272
2273 // Check to see if we have known failures that will cause us to have to bail out
2274 // to the runtime, and just generate the runtime call directly.
2275 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2276 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2277
2278 // The positions must be non-negative.
2279 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2280 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
2281 // We will have to fail anyways.
2282 return;
2283 }
2284
2285 // The length must be >= 0.
2286 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2287 if (length != nullptr) {
2288 int32_t len = length->GetValue();
2289 if (len < 0 || len >= kSystemArrayCopyThreshold) {
2290 // Just call as normal.
2291 return;
2292 }
2293 }
2294
2295 SystemArrayCopyOptimizations optimizations(invoke);
2296
2297 if (optimizations.GetDestinationIsSource()) {
2298 if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
2299 // We only support backward copying if source and destination are the same.
2300 return;
2301 }
2302 }
2303
2304 if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
2305 // We currently don't intrinsify primitive copying.
2306 return;
2307 }
2308
2309 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2310 LocationSummary* locations =
2311 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2312 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
2313 locations->SetInAt(0, Location::RequiresRegister());
2314 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2315 locations->SetInAt(2, Location::RequiresRegister());
2316 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2317 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2318
2319 locations->AddTemp(Location::RequiresRegister());
2320 locations->AddTemp(Location::RequiresRegister());
2321 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2322 // Temporary register IP0, obtained from the VIXL scratch register
2323 // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
2324 // (because that register is clobbered by ReadBarrierMarkRegX
2325 // entry points). It cannot be used in calls to
2326 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
2327 // either. For these reasons, get a third extra temporary register
2328 // from the register allocator.
2329 locations->AddTemp(Location::RequiresRegister());
2330 } else {
2331 // Cases other than Baker read barriers: the third temporary will
2332 // be acquired from the VIXL scratch register pool.
2333 }
2334 }
2335
VisitSystemArrayCopy(HInvoke * invoke)2336 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2337 // The only read barrier implementation supporting the
2338 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2339 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2340
2341 MacroAssembler* masm = GetVIXLAssembler();
2342 LocationSummary* locations = invoke->GetLocations();
2343
2344 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2345 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2346 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2347 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2348 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2349
2350 Register src = XRegisterFrom(locations->InAt(0));
2351 Location src_pos = locations->InAt(1);
2352 Register dest = XRegisterFrom(locations->InAt(2));
2353 Location dest_pos = locations->InAt(3);
2354 Location length = locations->InAt(4);
2355 Register temp1 = WRegisterFrom(locations->GetTemp(0));
2356 Location temp1_loc = LocationFrom(temp1);
2357 Register temp2 = WRegisterFrom(locations->GetTemp(1));
2358 Location temp2_loc = LocationFrom(temp2);
2359
2360 SlowPathCodeARM64* intrinsic_slow_path =
2361 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2362 codegen_->AddSlowPath(intrinsic_slow_path);
2363
2364 vixl::aarch64::Label conditions_on_positions_validated;
2365 SystemArrayCopyOptimizations optimizations(invoke);
2366
2367 // If source and destination are the same, we go to slow path if we need to do
2368 // forward copying.
2369 if (src_pos.IsConstant()) {
2370 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2371 if (dest_pos.IsConstant()) {
2372 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2373 if (optimizations.GetDestinationIsSource()) {
2374 // Checked when building locations.
2375 DCHECK_GE(src_pos_constant, dest_pos_constant);
2376 } else if (src_pos_constant < dest_pos_constant) {
2377 __ Cmp(src, dest);
2378 __ B(intrinsic_slow_path->GetEntryLabel(), eq);
2379 }
2380 // Checked when building locations.
2381 DCHECK(!optimizations.GetDestinationIsSource()
2382 || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
2383 } else {
2384 if (!optimizations.GetDestinationIsSource()) {
2385 __ Cmp(src, dest);
2386 __ B(&conditions_on_positions_validated, ne);
2387 }
2388 __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
2389 __ B(intrinsic_slow_path->GetEntryLabel(), gt);
2390 }
2391 } else {
2392 if (!optimizations.GetDestinationIsSource()) {
2393 __ Cmp(src, dest);
2394 __ B(&conditions_on_positions_validated, ne);
2395 }
2396 __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
2397 OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
2398 __ B(intrinsic_slow_path->GetEntryLabel(), lt);
2399 }
2400
2401 __ Bind(&conditions_on_positions_validated);
2402
2403 if (!optimizations.GetSourceIsNotNull()) {
2404 // Bail out if the source is null.
2405 __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
2406 }
2407
2408 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2409 // Bail out if the destination is null.
2410 __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
2411 }
2412
2413 // We have already checked in the LocationsBuilder for the constant case.
2414 if (!length.IsConstant() &&
2415 !optimizations.GetCountIsSourceLength() &&
2416 !optimizations.GetCountIsDestinationLength()) {
2417 // Merge the following two comparisons into one:
2418 // If the length is negative, bail out (delegate to libcore's native implementation).
2419 // If the length >= 128 then (currently) prefer native implementation.
2420 __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
2421 __ B(intrinsic_slow_path->GetEntryLabel(), hs);
2422 }
2423 // Validity checks: source.
2424 CheckSystemArrayCopyPosition(masm,
2425 src_pos,
2426 src,
2427 length,
2428 intrinsic_slow_path,
2429 temp1,
2430 optimizations.GetCountIsSourceLength());
2431
2432 // Validity checks: dest.
2433 CheckSystemArrayCopyPosition(masm,
2434 dest_pos,
2435 dest,
2436 length,
2437 intrinsic_slow_path,
2438 temp1,
2439 optimizations.GetCountIsDestinationLength());
2440 {
2441 // We use a block to end the scratch scope before the write barrier, thus
2442 // freeing the temporary registers so they can be used in `MarkGCCard`.
2443 UseScratchRegisterScope temps(masm);
2444 Location temp3_loc; // Used only for Baker read barrier.
2445 Register temp3;
2446 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2447 temp3_loc = locations->GetTemp(2);
2448 temp3 = WRegisterFrom(temp3_loc);
2449 } else {
2450 temp3 = temps.AcquireW();
2451 }
2452
2453 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2454 // Check whether all elements of the source array are assignable to the component
2455 // type of the destination array. We do two checks: the classes are the same,
2456 // or the destination is Object[]. If none of these checks succeed, we go to the
2457 // slow path.
2458
2459 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2460 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2461 // /* HeapReference<Class> */ temp1 = src->klass_
2462 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2463 temp1_loc,
2464 src.W(),
2465 class_offset,
2466 temp3_loc,
2467 /* needs_null_check= */ false,
2468 /* use_load_acquire= */ false);
2469 // Bail out if the source is not a non primitive array.
2470 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2471 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2472 temp1_loc,
2473 temp1,
2474 component_offset,
2475 temp3_loc,
2476 /* needs_null_check= */ false,
2477 /* use_load_acquire= */ false);
2478 __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
2479 // If heap poisoning is enabled, `temp1` has been unpoisoned
2480 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2481 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2482 __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
2483 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2484 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2485 }
2486
2487 // /* HeapReference<Class> */ temp1 = dest->klass_
2488 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2489 temp1_loc,
2490 dest.W(),
2491 class_offset,
2492 temp3_loc,
2493 /* needs_null_check= */ false,
2494 /* use_load_acquire= */ false);
2495
2496 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2497 // Bail out if the destination is not a non primitive array.
2498 //
2499 // Register `temp1` is not trashed by the read barrier emitted
2500 // by GenerateFieldLoadWithBakerReadBarrier below, as that
2501 // method produces a call to a ReadBarrierMarkRegX entry point,
2502 // which saves all potentially live registers, including
2503 // temporaries such a `temp1`.
2504 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2505 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2506 temp2_loc,
2507 temp1,
2508 component_offset,
2509 temp3_loc,
2510 /* needs_null_check= */ false,
2511 /* use_load_acquire= */ false);
2512 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2513 // If heap poisoning is enabled, `temp2` has been unpoisoned
2514 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2515 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2516 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2517 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2518 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2519 }
2520
2521 // For the same reason given earlier, `temp1` is not trashed by the
2522 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2523 // /* HeapReference<Class> */ temp2 = src->klass_
2524 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2525 temp2_loc,
2526 src.W(),
2527 class_offset,
2528 temp3_loc,
2529 /* needs_null_check= */ false,
2530 /* use_load_acquire= */ false);
2531 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2532 __ Cmp(temp1, temp2);
2533
2534 if (optimizations.GetDestinationIsTypedObjectArray()) {
2535 vixl::aarch64::Label do_copy;
2536 __ B(&do_copy, eq);
2537 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2538 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2539 temp1_loc,
2540 temp1,
2541 component_offset,
2542 temp3_loc,
2543 /* needs_null_check= */ false,
2544 /* use_load_acquire= */ false);
2545 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2546 // We do not need to emit a read barrier for the following
2547 // heap reference load, as `temp1` is only used in a
2548 // comparison with null below, and this reference is not
2549 // kept afterwards.
2550 __ Ldr(temp1, HeapOperand(temp1, super_offset));
2551 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2552 __ Bind(&do_copy);
2553 } else {
2554 __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2555 }
2556 } else {
2557 // Non read barrier code.
2558
2559 // /* HeapReference<Class> */ temp1 = dest->klass_
2560 __ Ldr(temp1, MemOperand(dest, class_offset));
2561 // /* HeapReference<Class> */ temp2 = src->klass_
2562 __ Ldr(temp2, MemOperand(src, class_offset));
2563 bool did_unpoison = false;
2564 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2565 !optimizations.GetSourceIsNonPrimitiveArray()) {
2566 // One or two of the references need to be unpoisoned. Unpoison them
2567 // both to make the identity check valid.
2568 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2569 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2570 did_unpoison = true;
2571 }
2572
2573 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2574 // Bail out if the destination is not a non primitive array.
2575 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2576 __ Ldr(temp3, HeapOperand(temp1, component_offset));
2577 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2578 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2579 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2580 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2581 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2582 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2583 }
2584
2585 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2586 // Bail out if the source is not a non primitive array.
2587 // /* HeapReference<Class> */ temp3 = temp2->component_type_
2588 __ Ldr(temp3, HeapOperand(temp2, component_offset));
2589 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2590 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2591 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2592 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2593 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2594 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2595 }
2596
2597 __ Cmp(temp1, temp2);
2598
2599 if (optimizations.GetDestinationIsTypedObjectArray()) {
2600 vixl::aarch64::Label do_copy;
2601 __ B(&do_copy, eq);
2602 if (!did_unpoison) {
2603 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2604 }
2605 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2606 __ Ldr(temp1, HeapOperand(temp1, component_offset));
2607 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2608 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2609 __ Ldr(temp1, HeapOperand(temp1, super_offset));
2610 // No need to unpoison the result, we're comparing against null.
2611 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2612 __ Bind(&do_copy);
2613 } else {
2614 __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2615 }
2616 }
2617 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2618 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2619 // Bail out if the source is not a non primitive array.
2620 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2621 // /* HeapReference<Class> */ temp1 = src->klass_
2622 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2623 temp1_loc,
2624 src.W(),
2625 class_offset,
2626 temp3_loc,
2627 /* needs_null_check= */ false,
2628 /* use_load_acquire= */ false);
2629 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2630 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2631 temp2_loc,
2632 temp1,
2633 component_offset,
2634 temp3_loc,
2635 /* needs_null_check= */ false,
2636 /* use_load_acquire= */ false);
2637 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2638 // If heap poisoning is enabled, `temp2` has been unpoisoned
2639 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2640 } else {
2641 // /* HeapReference<Class> */ temp1 = src->klass_
2642 __ Ldr(temp1, HeapOperand(src.W(), class_offset));
2643 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2644 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2645 __ Ldr(temp2, HeapOperand(temp1, component_offset));
2646 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2647 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2648 }
2649 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2650 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2651 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2652 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2653 }
2654
2655 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
2656 // Null constant length: not need to emit the loop code at all.
2657 } else {
2658 Register src_curr_addr = temp1.X();
2659 Register dst_curr_addr = temp2.X();
2660 Register src_stop_addr = temp3.X();
2661 vixl::aarch64::Label done;
2662 const DataType::Type type = DataType::Type::kReference;
2663 const int32_t element_size = DataType::Size(type);
2664
2665 if (length.IsRegister()) {
2666 // Don't enter the copy loop if the length is null.
2667 __ Cbz(WRegisterFrom(length), &done);
2668 }
2669
2670 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2671 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2672
2673 // SystemArrayCopy implementation for Baker read barriers (see
2674 // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier):
2675 //
2676 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2677 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
2678 // bool is_gray = (rb_state == ReadBarrier::GrayState());
2679 // if (is_gray) {
2680 // // Slow-path copy.
2681 // do {
2682 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2683 // } while (src_ptr != end_ptr)
2684 // } else {
2685 // // Fast-path copy.
2686 // do {
2687 // *dest_ptr++ = *src_ptr++;
2688 // } while (src_ptr != end_ptr)
2689 // }
2690
2691 // Make sure `tmp` is not IP0, as it is clobbered by
2692 // ReadBarrierMarkRegX entry points in
2693 // ReadBarrierSystemArrayCopySlowPathARM64.
2694 DCHECK(temps.IsAvailable(ip0));
2695 temps.Exclude(ip0);
2696 Register tmp = temps.AcquireW();
2697 DCHECK_NE(LocationFrom(tmp).reg(), IP0);
2698 // Put IP0 back in the pool so that VIXL has at least one
2699 // scratch register available to emit macro-instructions (note
2700 // that IP1 is already used for `tmp`). Indeed some
2701 // macro-instructions used in GenSystemArrayCopyAddresses
2702 // (invoked hereunder) may require a scratch register (for
2703 // instance to emit a load with a large constant offset).
2704 temps.Include(ip0);
2705
2706 // /* int32_t */ monitor = src->monitor_
2707 __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
2708 // /* LockWord */ lock_word = LockWord(monitor)
2709 static_assert(sizeof(LockWord) == sizeof(int32_t),
2710 "art::LockWord and int32_t have different sizes.");
2711
2712 // Introduce a dependency on the lock_word including rb_state,
2713 // to prevent load-load reordering, and without using
2714 // a memory barrier (which would be more expensive).
2715 // `src` is unchanged by this operation, but its value now depends
2716 // on `tmp`.
2717 __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
2718
2719 // Compute base source address, base destination address, and end
2720 // source address for System.arraycopy* intrinsics in `src_base`,
2721 // `dst_base` and `src_end` respectively.
2722 // Note that `src_curr_addr` is computed from from `src` (and
2723 // `src_pos`) here, and thus honors the artificial dependency
2724 // of `src` on `tmp`.
2725 GenSystemArrayCopyAddresses(masm,
2726 type,
2727 src,
2728 src_pos,
2729 dest,
2730 dest_pos,
2731 length,
2732 src_curr_addr,
2733 dst_curr_addr,
2734 src_stop_addr);
2735
2736 // Slow path used to copy array when `src` is gray.
2737 SlowPathCodeARM64* read_barrier_slow_path =
2738 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(
2739 invoke, LocationFrom(tmp));
2740 codegen_->AddSlowPath(read_barrier_slow_path);
2741
2742 // Given the numeric representation, it's enough to check the low bit of the rb_state.
2743 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
2744 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2745 __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
2746
2747 // Fast-path copy.
2748 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2749 // poison/unpoison.
2750 vixl::aarch64::Label loop;
2751 __ Bind(&loop);
2752 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
2753 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
2754 __ Cmp(src_curr_addr, src_stop_addr);
2755 __ B(&loop, ne);
2756
2757 __ Bind(read_barrier_slow_path->GetExitLabel());
2758 } else {
2759 // Non read barrier code.
2760 // Compute base source address, base destination address, and end
2761 // source address for System.arraycopy* intrinsics in `src_base`,
2762 // `dst_base` and `src_end` respectively.
2763 GenSystemArrayCopyAddresses(masm,
2764 type,
2765 src,
2766 src_pos,
2767 dest,
2768 dest_pos,
2769 length,
2770 src_curr_addr,
2771 dst_curr_addr,
2772 src_stop_addr);
2773 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2774 // poison/unpoison.
2775 vixl::aarch64::Label loop;
2776 __ Bind(&loop);
2777 {
2778 Register tmp = temps.AcquireW();
2779 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
2780 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
2781 }
2782 __ Cmp(src_curr_addr, src_stop_addr);
2783 __ B(&loop, ne);
2784 }
2785 __ Bind(&done);
2786 }
2787 }
2788
2789 // We only need one card marking on the destination array.
2790 codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false);
2791
2792 __ Bind(intrinsic_slow_path->GetExitLabel());
2793 }
2794
GenIsInfinite(LocationSummary * locations,bool is64bit,MacroAssembler * masm)2795 static void GenIsInfinite(LocationSummary* locations,
2796 bool is64bit,
2797 MacroAssembler* masm) {
2798 Operand infinity;
2799 Register out;
2800
2801 if (is64bit) {
2802 infinity = kPositiveInfinityDouble;
2803 out = XRegisterFrom(locations->Out());
2804 } else {
2805 infinity = kPositiveInfinityFloat;
2806 out = WRegisterFrom(locations->Out());
2807 }
2808
2809 const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out);
2810
2811 MoveFPToInt(locations, is64bit, masm);
2812 __ Eor(out, out, infinity);
2813 // We don't care about the sign bit, so shift left.
2814 __ Cmp(zero, Operand(out, LSL, 1));
2815 __ Cset(out, eq);
2816 }
2817
VisitFloatIsInfinite(HInvoke * invoke)2818 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
2819 CreateFPToIntLocations(allocator_, invoke);
2820 }
2821
VisitFloatIsInfinite(HInvoke * invoke)2822 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
2823 GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
2824 }
2825
VisitDoubleIsInfinite(HInvoke * invoke)2826 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
2827 CreateFPToIntLocations(allocator_, invoke);
2828 }
2829
VisitDoubleIsInfinite(HInvoke * invoke)2830 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
2831 GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
2832 }
2833
VisitIntegerValueOf(HInvoke * invoke)2834 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
2835 InvokeRuntimeCallingConvention calling_convention;
2836 IntrinsicVisitor::ComputeIntegerValueOfLocations(
2837 invoke,
2838 codegen_,
2839 calling_convention.GetReturnLocation(DataType::Type::kReference),
2840 Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
2841 }
2842
VisitIntegerValueOf(HInvoke * invoke)2843 void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
2844 IntrinsicVisitor::IntegerValueOfInfo info =
2845 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2846 LocationSummary* locations = invoke->GetLocations();
2847 MacroAssembler* masm = GetVIXLAssembler();
2848
2849 Register out = RegisterFrom(locations->Out(), DataType::Type::kReference);
2850 UseScratchRegisterScope temps(masm);
2851 Register temp = temps.AcquireW();
2852 if (invoke->InputAt(0)->IsConstant()) {
2853 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2854 if (static_cast<uint32_t>(value - info.low) < info.length) {
2855 // Just embed the j.l.Integer in the code.
2856 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2857 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2858 } else {
2859 DCHECK(locations->CanCall());
2860 // Allocate and initialize a new j.l.Integer.
2861 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2862 // JIT object table.
2863 codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
2864 info.integer_boot_image_offset);
2865 __ Mov(temp.W(), value);
2866 __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
2867 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2868 // one.
2869 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2870 }
2871 } else {
2872 DCHECK(locations->CanCall());
2873 Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32);
2874 // Check bounds of our cache.
2875 __ Add(out.W(), in.W(), -info.low);
2876 __ Cmp(out.W(), info.length);
2877 vixl::aarch64::Label allocate, done;
2878 __ B(&allocate, hs);
2879 // If the value is within the bounds, load the j.l.Integer directly from the array.
2880 codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
2881 MemOperand source = HeapOperand(
2882 temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
2883 codegen_->Load(DataType::Type::kReference, out, source);
2884 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
2885 __ B(&done);
2886 __ Bind(&allocate);
2887 // Otherwise allocate and initialize a new j.l.Integer.
2888 codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
2889 info.integer_boot_image_offset);
2890 __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
2891 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2892 // one.
2893 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2894 __ Bind(&done);
2895 }
2896 }
2897
VisitThreadInterrupted(HInvoke * invoke)2898 void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
2899 LocationSummary* locations =
2900 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2901 locations->SetOut(Location::RequiresRegister());
2902 }
2903
VisitThreadInterrupted(HInvoke * invoke)2904 void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
2905 MacroAssembler* masm = GetVIXLAssembler();
2906 Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32);
2907 UseScratchRegisterScope temps(masm);
2908 Register temp = temps.AcquireX();
2909
2910 __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
2911 __ Ldar(out.W(), MemOperand(temp));
2912
2913 vixl::aarch64::Label done;
2914 __ Cbz(out.W(), &done);
2915 __ Stlr(wzr, MemOperand(temp));
2916 __ Bind(&done);
2917 }
2918
VisitReachabilityFence(HInvoke * invoke)2919 void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
2920 LocationSummary* locations =
2921 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2922 locations->SetInAt(0, Location::Any());
2923 }
2924
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)2925 void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
2926
VisitCRC32Update(HInvoke * invoke)2927 void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
2928 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
2929 return;
2930 }
2931
2932 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
2933 LocationSummary::kNoCall,
2934 kIntrinsified);
2935
2936 locations->SetInAt(0, Location::RequiresRegister());
2937 locations->SetInAt(1, Location::RequiresRegister());
2938 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2939 }
2940
2941 // Lower the invoke of CRC32.update(int crc, int b).
VisitCRC32Update(HInvoke * invoke)2942 void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) {
2943 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
2944
2945 MacroAssembler* masm = GetVIXLAssembler();
2946
2947 Register crc = InputRegisterAt(invoke, 0);
2948 Register val = InputRegisterAt(invoke, 1);
2949 Register out = OutputRegister(invoke);
2950
2951 // The general algorithm of the CRC32 calculation is:
2952 // crc = ~crc
2953 // result = crc32_for_byte(crc, b)
2954 // crc = ~result
2955 // It is directly lowered to three instructions.
2956
2957 UseScratchRegisterScope temps(masm);
2958 Register tmp = temps.AcquireSameSizeAs(out);
2959
2960 __ Mvn(tmp, crc);
2961 __ Crc32b(tmp, tmp, val);
2962 __ Mvn(out, tmp);
2963 }
2964
2965 // Generate code using CRC32 instructions which calculates
2966 // a CRC32 value of a byte.
2967 //
2968 // Parameters:
2969 // masm - VIXL macro assembler
2970 // crc - a register holding an initial CRC value
2971 // ptr - a register holding a memory address of bytes
2972 // length - a register holding a number of bytes to process
2973 // out - a register to put a result of calculation
GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler * masm,const Register & crc,const Register & ptr,const Register & length,const Register & out)2974 static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm,
2975 const Register& crc,
2976 const Register& ptr,
2977 const Register& length,
2978 const Register& out) {
2979 // The algorithm of CRC32 of bytes is:
2980 // crc = ~crc
2981 // process a few first bytes to make the array 8-byte aligned
2982 // while array has 8 bytes do:
2983 // crc = crc32_of_8bytes(crc, 8_bytes(array))
2984 // if array has 4 bytes:
2985 // crc = crc32_of_4bytes(crc, 4_bytes(array))
2986 // if array has 2 bytes:
2987 // crc = crc32_of_2bytes(crc, 2_bytes(array))
2988 // if array has a byte:
2989 // crc = crc32_of_byte(crc, 1_byte(array))
2990 // crc = ~crc
2991
2992 vixl::aarch64::Label loop, done;
2993 vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte;
2994 vixl::aarch64::Label aligned2, aligned4, aligned8;
2995
2996 // Use VIXL scratch registers as the VIXL macro assembler won't use them in
2997 // instructions below.
2998 UseScratchRegisterScope temps(masm);
2999 Register len = temps.AcquireW();
3000 Register array_elem = temps.AcquireW();
3001
3002 __ Mvn(out, crc);
3003 __ Mov(len, length);
3004
3005 __ Tbz(ptr, 0, &aligned2);
3006 __ Subs(len, len, 1);
3007 __ B(&done, lo);
3008 __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex));
3009 __ Crc32b(out, out, array_elem);
3010
3011 __ Bind(&aligned2);
3012 __ Tbz(ptr, 1, &aligned4);
3013 __ Subs(len, len, 2);
3014 __ B(&process_1byte, lo);
3015 __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3016 __ Crc32h(out, out, array_elem);
3017
3018 __ Bind(&aligned4);
3019 __ Tbz(ptr, 2, &aligned8);
3020 __ Subs(len, len, 4);
3021 __ B(&process_2bytes, lo);
3022 __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3023 __ Crc32w(out, out, array_elem);
3024
3025 __ Bind(&aligned8);
3026 __ Subs(len, len, 8);
3027 // If len < 8 go to process data by 4 bytes, 2 bytes and a byte.
3028 __ B(&process_4bytes, lo);
3029
3030 // The main loop processing data by 8 bytes.
3031 __ Bind(&loop);
3032 __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex));
3033 __ Subs(len, len, 8);
3034 __ Crc32x(out, out, array_elem.X());
3035 // if len >= 8, process the next 8 bytes.
3036 __ B(&loop, hs);
3037
3038 // Process the data which is less than 8 bytes.
3039 // The code generated below works with values of len
3040 // which come in the range [-8, 0].
3041 // The first three bits are used to detect whether 4 bytes or 2 bytes or
3042 // a byte can be processed.
3043 // The checking order is from bit 2 to bit 0:
3044 // bit 2 is set: at least 4 bytes available
3045 // bit 1 is set: at least 2 bytes available
3046 // bit 0 is set: at least a byte available
3047 __ Bind(&process_4bytes);
3048 // Goto process_2bytes if less than four bytes available
3049 __ Tbz(len, 2, &process_2bytes);
3050 __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3051 __ Crc32w(out, out, array_elem);
3052
3053 __ Bind(&process_2bytes);
3054 // Goto process_1bytes if less than two bytes available
3055 __ Tbz(len, 1, &process_1byte);
3056 __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3057 __ Crc32h(out, out, array_elem);
3058
3059 __ Bind(&process_1byte);
3060 // Goto done if no bytes available
3061 __ Tbz(len, 0, &done);
3062 __ Ldrb(array_elem, MemOperand(ptr));
3063 __ Crc32b(out, out, array_elem);
3064
3065 __ Bind(&done);
3066 __ Mvn(out, out);
3067 }
3068
3069 // The threshold for sizes of arrays to use the library provided implementation
3070 // of CRC32.updateBytes instead of the intrinsic.
3071 static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
3072
VisitCRC32UpdateBytes(HInvoke * invoke)3073 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3074 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3075 return;
3076 }
3077
3078 LocationSummary* locations =
3079 new (allocator_) LocationSummary(invoke,
3080 LocationSummary::kCallOnSlowPath,
3081 kIntrinsified);
3082
3083 locations->SetInAt(0, Location::RequiresRegister());
3084 locations->SetInAt(1, Location::RequiresRegister());
3085 locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
3086 locations->SetInAt(3, Location::RequiresRegister());
3087 locations->AddTemp(Location::RequiresRegister());
3088 locations->SetOut(Location::RequiresRegister());
3089 }
3090
3091 // Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
3092 //
3093 // Note: The intrinsic is not used if len exceeds a threshold.
VisitCRC32UpdateBytes(HInvoke * invoke)3094 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3095 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3096
3097 MacroAssembler* masm = GetVIXLAssembler();
3098 LocationSummary* locations = invoke->GetLocations();
3099
3100 SlowPathCodeARM64* slow_path =
3101 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
3102 codegen_->AddSlowPath(slow_path);
3103
3104 Register length = WRegisterFrom(locations->InAt(3));
3105 __ Cmp(length, kCRC32UpdateBytesThreshold);
3106 __ B(slow_path->GetEntryLabel(), hi);
3107
3108 const uint32_t array_data_offset =
3109 mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
3110 Register ptr = XRegisterFrom(locations->GetTemp(0));
3111 Register array = XRegisterFrom(locations->InAt(1));
3112 Location offset = locations->InAt(2);
3113 if (offset.IsConstant()) {
3114 int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
3115 __ Add(ptr, array, array_data_offset + offset_value);
3116 } else {
3117 __ Add(ptr, array, array_data_offset);
3118 __ Add(ptr, ptr, XRegisterFrom(offset));
3119 }
3120
3121 Register crc = WRegisterFrom(locations->InAt(0));
3122 Register out = WRegisterFrom(locations->Out());
3123
3124 GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3125
3126 __ Bind(slow_path->GetExitLabel());
3127 }
3128
VisitCRC32UpdateByteBuffer(HInvoke * invoke)3129 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3130 if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3131 return;
3132 }
3133
3134 LocationSummary* locations =
3135 new (allocator_) LocationSummary(invoke,
3136 LocationSummary::kNoCall,
3137 kIntrinsified);
3138
3139 locations->SetInAt(0, Location::RequiresRegister());
3140 locations->SetInAt(1, Location::RequiresRegister());
3141 locations->SetInAt(2, Location::RequiresRegister());
3142 locations->SetInAt(3, Location::RequiresRegister());
3143 locations->AddTemp(Location::RequiresRegister());
3144 locations->SetOut(Location::RequiresRegister());
3145 }
3146
3147 // Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len)
3148 //
3149 // There is no need to generate code checking if addr is 0.
3150 // The method updateByteBuffer is a private method of java.util.zip.CRC32.
3151 // This guarantees no calls outside of the CRC32 class.
3152 // An address of DirectBuffer is always passed to the call of updateByteBuffer.
3153 // It might be an implementation of an empty DirectBuffer which can use a zero
3154 // address but it must have the length to be zero. The current generated code
3155 // correctly works with the zero length.
VisitCRC32UpdateByteBuffer(HInvoke * invoke)3156 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3157 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3158
3159 MacroAssembler* masm = GetVIXLAssembler();
3160 LocationSummary* locations = invoke->GetLocations();
3161
3162 Register addr = XRegisterFrom(locations->InAt(1));
3163 Register ptr = XRegisterFrom(locations->GetTemp(0));
3164 __ Add(ptr, addr, XRegisterFrom(locations->InAt(2)));
3165
3166 Register crc = WRegisterFrom(locations->InAt(0));
3167 Register length = WRegisterFrom(locations->InAt(3));
3168 Register out = WRegisterFrom(locations->Out());
3169 GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3170 }
3171
3172 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
3173
3174 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
3175 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
3176 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
3177 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
3178 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
3179 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend);
3180 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
3181 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
3182
3183 // 1.8.
3184 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
3185 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
3186 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
3187 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
3188 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
3189
3190 UNREACHABLE_INTRINSICS(ARM64)
3191
3192 #undef __
3193
3194 } // namespace arm64
3195 } // namespace art
3196