1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_arm_vixl.h"
18
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "art_method.h"
21 #include "code_generator_arm_vixl.h"
22 #include "common_arm.h"
23 #include "heap_poisoning.h"
24 #include "lock_word.h"
25 #include "mirror/array-inl.h"
26 #include "mirror/object_array-inl.h"
27 #include "mirror/reference.h"
28 #include "mirror/string.h"
29 #include "scoped_thread_state_change-inl.h"
30 #include "thread-current-inl.h"
31
32 #include "aarch32/constants-aarch32.h"
33
34 namespace art {
35 namespace arm {
36
37 #define __ assembler->GetVIXLAssembler()->
38
39 using helpers::DRegisterFrom;
40 using helpers::HighRegisterFrom;
41 using helpers::InputDRegisterAt;
42 using helpers::InputRegisterAt;
43 using helpers::InputSRegisterAt;
44 using helpers::InputVRegisterAt;
45 using helpers::Int32ConstantFrom;
46 using helpers::LocationFrom;
47 using helpers::LowRegisterFrom;
48 using helpers::LowSRegisterFrom;
49 using helpers::HighSRegisterFrom;
50 using helpers::OutputDRegister;
51 using helpers::OutputSRegister;
52 using helpers::OutputRegister;
53 using helpers::OutputVRegister;
54 using helpers::RegisterFrom;
55 using helpers::SRegisterFrom;
56 using helpers::DRegisterFromS;
57
58 using namespace vixl::aarch32; // NOLINT(build/namespaces)
59
60 using vixl::ExactAssemblyScope;
61 using vixl::CodeBufferCheckScope;
62
GetAssembler()63 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
64 return codegen_->GetAssembler();
65 }
66
GetAllocator()67 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
68 return codegen_->GetGraph()->GetAllocator();
69 }
70
71 // Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
72 // intrinsified call. This will copy the arguments into the positions for a regular call.
73 //
74 // Note: The actual parameters are required to be in the locations given by the invoke's location
75 // summary. If an intrinsic modifies those locations before a slowpath call, they must be
76 // restored!
77 //
78 // Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
79 // sub-optimal (compared to a direct pointer call), but this is a slow-path.
80
81 class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
82 public:
IntrinsicSlowPathARMVIXL(HInvoke * invoke)83 explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
84 : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
85
MoveArguments(CodeGenerator * codegen)86 Location MoveArguments(CodeGenerator* codegen) {
87 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
88 IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
89 return calling_convention_visitor.GetMethodLocation();
90 }
91
EmitNativeCode(CodeGenerator * codegen)92 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
93 ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
94 __ Bind(GetEntryLabel());
95
96 SaveLiveRegisters(codegen, invoke_->GetLocations());
97
98 Location method_loc = MoveArguments(codegen);
99
100 if (invoke_->IsInvokeStaticOrDirect()) {
101 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this);
102 } else {
103 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this);
104 }
105
106 // Copy the result back to the expected output.
107 Location out = invoke_->GetLocations()->Out();
108 if (out.IsValid()) {
109 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
110 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
111 codegen->MoveFromReturnRegister(out, invoke_->GetType());
112 }
113
114 RestoreLiveRegisters(codegen, invoke_->GetLocations());
115 __ B(GetExitLabel());
116 }
117
GetDescription() const118 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
119
120 private:
121 // The instruction where this slow path is happening.
122 HInvoke* const invoke_;
123
124 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
125 };
126
127 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(ArmVIXLAssembler * assembler,DataType::Type type,const vixl32::Register & array,const Location & pos,const vixl32::Register & base)128 static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
129 DataType::Type type,
130 const vixl32::Register& array,
131 const Location& pos,
132 const vixl32::Register& base) {
133 // This routine is only used by the SystemArrayCopy intrinsic at the
134 // moment. We can allow DataType::Type::kReference as `type` to implement
135 // the SystemArrayCopyChar intrinsic.
136 DCHECK_EQ(type, DataType::Type::kReference);
137 const int32_t element_size = DataType::Size(type);
138 const uint32_t element_size_shift = DataType::SizeShift(type);
139 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
140
141 if (pos.IsConstant()) {
142 int32_t constant = Int32ConstantFrom(pos);
143 __ Add(base, array, element_size * constant + data_offset);
144 } else {
145 __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
146 __ Add(base, base, data_offset);
147 }
148 }
149
150 // Compute end address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(ArmVIXLAssembler * assembler,DataType::Type type,const Location & copy_length,const vixl32::Register & base,const vixl32::Register & end)151 static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
152 DataType::Type type,
153 const Location& copy_length,
154 const vixl32::Register& base,
155 const vixl32::Register& end) {
156 // This routine is only used by the SystemArrayCopy intrinsic at the
157 // moment. We can allow DataType::Type::kReference as `type` to implement
158 // the SystemArrayCopyChar intrinsic.
159 DCHECK_EQ(type, DataType::Type::kReference);
160 const int32_t element_size = DataType::Size(type);
161 const uint32_t element_size_shift = DataType::SizeShift(type);
162
163 if (copy_length.IsConstant()) {
164 int32_t constant = Int32ConstantFrom(copy_length);
165 __ Add(end, base, element_size * constant);
166 } else {
167 __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
168 }
169 }
170
171 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
172 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
173 public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)174 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
175 : SlowPathCodeARMVIXL(instruction) {
176 DCHECK(kEmitCompilerReadBarrier);
177 DCHECK(kUseBakerReadBarrier);
178 }
179
EmitNativeCode(CodeGenerator * codegen)180 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
181 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
182 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
183 LocationSummary* locations = instruction_->GetLocations();
184 DCHECK(locations->CanCall());
185 DCHECK(instruction_->IsInvokeStaticOrDirect())
186 << "Unexpected instruction in read barrier arraycopy slow path: "
187 << instruction_->DebugName();
188 DCHECK(instruction_->GetLocations()->Intrinsified());
189 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
190
191 DataType::Type type = DataType::Type::kReference;
192 const int32_t element_size = DataType::Size(type);
193
194 vixl32::Register dest = InputRegisterAt(instruction_, 2);
195 Location dest_pos = locations->InAt(3);
196 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
197 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
198 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
199 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
200
201 __ Bind(GetEntryLabel());
202 // Compute the base destination address in `dst_curr_addr`.
203 GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
204
205 vixl32::Label loop;
206 __ Bind(&loop);
207 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
208 assembler->MaybeUnpoisonHeapReference(tmp);
209 // TODO: Inline the mark bit check before calling the runtime?
210 // tmp = ReadBarrier::Mark(tmp);
211 // No need to save live registers; it's taken care of by the
212 // entrypoint. Also, there is no need to update the stack mask,
213 // as this runtime call will not trigger a garbage collection.
214 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
215 // explanations.)
216 DCHECK(!tmp.IsSP());
217 DCHECK(!tmp.IsLR());
218 DCHECK(!tmp.IsPC());
219 // IP is used internally by the ReadBarrierMarkRegX entry point
220 // as a temporary (and not preserved). It thus cannot be used by
221 // any live register in this slow path.
222 DCHECK(!src_curr_addr.Is(ip));
223 DCHECK(!dst_curr_addr.Is(ip));
224 DCHECK(!src_stop_addr.Is(ip));
225 DCHECK(!tmp.Is(ip));
226 DCHECK(tmp.IsRegister()) << tmp;
227 // TODO: Load the entrypoint once before the loop, instead of
228 // loading it at every iteration.
229 int32_t entry_point_offset =
230 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
231 // This runtime call does not require a stack map.
232 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
233 assembler->MaybePoisonHeapReference(tmp);
234 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
235 __ Cmp(src_curr_addr, src_stop_addr);
236 __ B(ne, &loop, /* far_target */ false);
237 __ B(GetExitLabel());
238 }
239
GetDescription() const240 const char* GetDescription() const OVERRIDE {
241 return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
242 }
243
244 private:
245 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
246 };
247
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)248 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
249 : allocator_(codegen->GetGraph()->GetAllocator()),
250 codegen_(codegen),
251 assembler_(codegen->GetAssembler()),
252 features_(codegen->GetInstructionSetFeatures()) {}
253
TryDispatch(HInvoke * invoke)254 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
255 Dispatch(invoke);
256 LocationSummary* res = invoke->GetLocations();
257 if (res == nullptr) {
258 return false;
259 }
260 return res->Intrinsified();
261 }
262
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)263 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
264 LocationSummary* locations =
265 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
266 locations->SetInAt(0, Location::RequiresFpuRegister());
267 locations->SetOut(Location::RequiresRegister());
268 }
269
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)270 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
271 LocationSummary* locations =
272 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
273 locations->SetInAt(0, Location::RequiresRegister());
274 locations->SetOut(Location::RequiresFpuRegister());
275 }
276
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)277 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
278 Location input = locations->InAt(0);
279 Location output = locations->Out();
280 if (is64bit) {
281 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
282 } else {
283 __ Vmov(RegisterFrom(output), SRegisterFrom(input));
284 }
285 }
286
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)287 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
288 Location input = locations->InAt(0);
289 Location output = locations->Out();
290 if (is64bit) {
291 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
292 } else {
293 __ Vmov(SRegisterFrom(output), RegisterFrom(input));
294 }
295 }
296
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)297 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
298 CreateFPToIntLocations(allocator_, invoke);
299 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)300 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
301 CreateIntToFPLocations(allocator_, invoke);
302 }
303
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)304 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
305 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
306 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)307 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
308 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
309 }
310
VisitFloatFloatToRawIntBits(HInvoke * invoke)311 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
312 CreateFPToIntLocations(allocator_, invoke);
313 }
VisitFloatIntBitsToFloat(HInvoke * invoke)314 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
315 CreateIntToFPLocations(allocator_, invoke);
316 }
317
VisitFloatFloatToRawIntBits(HInvoke * invoke)318 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
319 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
320 }
VisitFloatIntBitsToFloat(HInvoke * invoke)321 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
322 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
323 }
324
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)325 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
326 LocationSummary* locations =
327 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
328 locations->SetInAt(0, Location::RequiresRegister());
329 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
330 }
331
CreateLongToLongLocationsWithOverlap(ArenaAllocator * allocator,HInvoke * invoke)332 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) {
333 LocationSummary* locations =
334 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
335 locations->SetInAt(0, Location::RequiresRegister());
336 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
337 }
338
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)339 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
340 LocationSummary* locations =
341 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
342 locations->SetInAt(0, Location::RequiresFpuRegister());
343 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
344 }
345
GenNumberOfLeadingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)346 static void GenNumberOfLeadingZeros(HInvoke* invoke,
347 DataType::Type type,
348 CodeGeneratorARMVIXL* codegen) {
349 ArmVIXLAssembler* assembler = codegen->GetAssembler();
350 LocationSummary* locations = invoke->GetLocations();
351 Location in = locations->InAt(0);
352 vixl32::Register out = RegisterFrom(locations->Out());
353
354 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
355
356 if (type == DataType::Type::kInt64) {
357 vixl32::Register in_reg_lo = LowRegisterFrom(in);
358 vixl32::Register in_reg_hi = HighRegisterFrom(in);
359 vixl32::Label end;
360 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
361 __ Clz(out, in_reg_hi);
362 __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false);
363 __ Clz(out, in_reg_lo);
364 __ Add(out, out, 32);
365 if (end.IsReferenced()) {
366 __ Bind(&end);
367 }
368 } else {
369 __ Clz(out, RegisterFrom(in));
370 }
371 }
372
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)373 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
374 CreateIntToIntLocations(allocator_, invoke);
375 }
376
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)377 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
378 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
379 }
380
VisitLongNumberOfLeadingZeros(HInvoke * invoke)381 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
382 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
383 }
384
VisitLongNumberOfLeadingZeros(HInvoke * invoke)385 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
386 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
387 }
388
GenNumberOfTrailingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)389 static void GenNumberOfTrailingZeros(HInvoke* invoke,
390 DataType::Type type,
391 CodeGeneratorARMVIXL* codegen) {
392 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
393
394 ArmVIXLAssembler* assembler = codegen->GetAssembler();
395 LocationSummary* locations = invoke->GetLocations();
396 vixl32::Register out = RegisterFrom(locations->Out());
397
398 if (type == DataType::Type::kInt64) {
399 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
400 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
401 vixl32::Label end;
402 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
403 __ Rbit(out, in_reg_lo);
404 __ Clz(out, out);
405 __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false);
406 __ Rbit(out, in_reg_hi);
407 __ Clz(out, out);
408 __ Add(out, out, 32);
409 if (end.IsReferenced()) {
410 __ Bind(&end);
411 }
412 } else {
413 vixl32::Register in = RegisterFrom(locations->InAt(0));
414 __ Rbit(out, in);
415 __ Clz(out, out);
416 }
417 }
418
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)419 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
420 CreateIntToIntLocations(allocator_, invoke);
421 }
422
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)423 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
424 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
425 }
426
VisitLongNumberOfTrailingZeros(HInvoke * invoke)427 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
428 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
429 }
430
VisitLongNumberOfTrailingZeros(HInvoke * invoke)431 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
432 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
433 }
434
MathAbsFP(HInvoke * invoke,ArmVIXLAssembler * assembler)435 static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
436 __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
437 }
438
VisitMathAbsDouble(HInvoke * invoke)439 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
440 CreateFPToFPLocations(allocator_, invoke);
441 }
442
VisitMathAbsDouble(HInvoke * invoke)443 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
444 MathAbsFP(invoke, GetAssembler());
445 }
446
VisitMathAbsFloat(HInvoke * invoke)447 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
448 CreateFPToFPLocations(allocator_, invoke);
449 }
450
VisitMathAbsFloat(HInvoke * invoke)451 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
452 MathAbsFP(invoke, GetAssembler());
453 }
454
CreateIntToIntPlusTemp(ArenaAllocator * allocator,HInvoke * invoke)455 static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) {
456 LocationSummary* locations =
457 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
458 locations->SetInAt(0, Location::RequiresRegister());
459 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
460
461 locations->AddTemp(Location::RequiresRegister());
462 }
463
GenAbsInteger(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)464 static void GenAbsInteger(LocationSummary* locations,
465 bool is64bit,
466 ArmVIXLAssembler* assembler) {
467 Location in = locations->InAt(0);
468 Location output = locations->Out();
469
470 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
471
472 if (is64bit) {
473 vixl32::Register in_reg_lo = LowRegisterFrom(in);
474 vixl32::Register in_reg_hi = HighRegisterFrom(in);
475 vixl32::Register out_reg_lo = LowRegisterFrom(output);
476 vixl32::Register out_reg_hi = HighRegisterFrom(output);
477
478 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
479
480 __ Asr(mask, in_reg_hi, 31);
481 __ Adds(out_reg_lo, in_reg_lo, mask);
482 __ Adc(out_reg_hi, in_reg_hi, mask);
483 __ Eor(out_reg_lo, mask, out_reg_lo);
484 __ Eor(out_reg_hi, mask, out_reg_hi);
485 } else {
486 vixl32::Register in_reg = RegisterFrom(in);
487 vixl32::Register out_reg = RegisterFrom(output);
488
489 __ Asr(mask, in_reg, 31);
490 __ Add(out_reg, in_reg, mask);
491 __ Eor(out_reg, mask, out_reg);
492 }
493 }
494
VisitMathAbsInt(HInvoke * invoke)495 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
496 CreateIntToIntPlusTemp(allocator_, invoke);
497 }
498
VisitMathAbsInt(HInvoke * invoke)499 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
500 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
501 }
502
503
VisitMathAbsLong(HInvoke * invoke)504 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
505 CreateIntToIntPlusTemp(allocator_, invoke);
506 }
507
VisitMathAbsLong(HInvoke * invoke)508 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
509 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
510 }
511
GenMinMaxFloat(HInvoke * invoke,bool is_min,CodeGeneratorARMVIXL * codegen)512 static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
513 ArmVIXLAssembler* assembler = codegen->GetAssembler();
514 Location op1_loc = invoke->GetLocations()->InAt(0);
515 Location op2_loc = invoke->GetLocations()->InAt(1);
516 Location out_loc = invoke->GetLocations()->Out();
517
518 // Optimization: don't generate any code if inputs are the same.
519 if (op1_loc.Equals(op2_loc)) {
520 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
521 return;
522 }
523
524 vixl32::SRegister op1 = SRegisterFrom(op1_loc);
525 vixl32::SRegister op2 = SRegisterFrom(op2_loc);
526 vixl32::SRegister out = OutputSRegister(invoke);
527 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
528 const vixl32::Register temp1 = temps.Acquire();
529 vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
530 vixl32::Label nan, done;
531 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
532
533 DCHECK(op1.Is(out));
534
535 __ Vcmp(op1, op2);
536 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
537 __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling.
538
539 // op1 <> op2
540 vixl32::ConditionType cond = is_min ? gt : lt;
541 {
542 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
543 2 * kMaxInstructionSizeInBytes,
544 CodeBufferCheckScope::kMaximumSize);
545 __ it(cond);
546 __ vmov(cond, F32, out, op2);
547 }
548 // for <>(not equal), we've done min/max calculation.
549 __ B(ne, final_label, /* far_target */ false);
550
551 // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
552 __ Vmov(temp1, op1);
553 __ Vmov(temp2, op2);
554 if (is_min) {
555 __ Orr(temp1, temp1, temp2);
556 } else {
557 __ And(temp1, temp1, temp2);
558 }
559 __ Vmov(out, temp1);
560 __ B(final_label);
561
562 // handle NaN input.
563 __ Bind(&nan);
564 __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
565 __ Vmov(out, temp1);
566
567 if (done.IsReferenced()) {
568 __ Bind(&done);
569 }
570 }
571
CreateFPFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)572 static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
573 LocationSummary* locations =
574 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
575 locations->SetInAt(0, Location::RequiresFpuRegister());
576 locations->SetInAt(1, Location::RequiresFpuRegister());
577 locations->SetOut(Location::SameAsFirstInput());
578 }
579
VisitMathMinFloatFloat(HInvoke * invoke)580 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
581 CreateFPFPToFPLocations(allocator_, invoke);
582 invoke->GetLocations()->AddTemp(Location::RequiresRegister());
583 }
584
VisitMathMinFloatFloat(HInvoke * invoke)585 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
586 GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
587 }
588
VisitMathMaxFloatFloat(HInvoke * invoke)589 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
590 CreateFPFPToFPLocations(allocator_, invoke);
591 invoke->GetLocations()->AddTemp(Location::RequiresRegister());
592 }
593
VisitMathMaxFloatFloat(HInvoke * invoke)594 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
595 GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
596 }
597
GenMinMaxDouble(HInvoke * invoke,bool is_min,CodeGeneratorARMVIXL * codegen)598 static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
599 ArmVIXLAssembler* assembler = codegen->GetAssembler();
600 Location op1_loc = invoke->GetLocations()->InAt(0);
601 Location op2_loc = invoke->GetLocations()->InAt(1);
602 Location out_loc = invoke->GetLocations()->Out();
603
604 // Optimization: don't generate any code if inputs are the same.
605 if (op1_loc.Equals(op2_loc)) {
606 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
607 return;
608 }
609
610 vixl32::DRegister op1 = DRegisterFrom(op1_loc);
611 vixl32::DRegister op2 = DRegisterFrom(op2_loc);
612 vixl32::DRegister out = OutputDRegister(invoke);
613 vixl32::Label handle_nan_eq, done;
614 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
615
616 DCHECK(op1.Is(out));
617
618 __ Vcmp(op1, op2);
619 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
620 __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling.
621
622 // op1 <> op2
623 vixl32::ConditionType cond = is_min ? gt : lt;
624 {
625 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
626 2 * kMaxInstructionSizeInBytes,
627 CodeBufferCheckScope::kMaximumSize);
628 __ it(cond);
629 __ vmov(cond, F64, out, op2);
630 }
631 // for <>(not equal), we've done min/max calculation.
632 __ B(ne, final_label, /* far_target */ false);
633
634 // handle op1 == op2, max(+0.0,-0.0).
635 if (!is_min) {
636 __ Vand(F64, out, op1, op2);
637 __ B(final_label);
638 }
639
640 // handle op1 == op2, min(+0.0,-0.0), NaN input.
641 __ Bind(&handle_nan_eq);
642 __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
643
644 if (done.IsReferenced()) {
645 __ Bind(&done);
646 }
647 }
648
VisitMathMinDoubleDouble(HInvoke * invoke)649 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
650 CreateFPFPToFPLocations(allocator_, invoke);
651 }
652
VisitMathMinDoubleDouble(HInvoke * invoke)653 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
654 GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
655 }
656
VisitMathMaxDoubleDouble(HInvoke * invoke)657 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
658 CreateFPFPToFPLocations(allocator_, invoke);
659 }
660
VisitMathMaxDoubleDouble(HInvoke * invoke)661 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
662 GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
663 }
664
GenMinMaxLong(HInvoke * invoke,bool is_min,ArmVIXLAssembler * assembler)665 static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
666 Location op1_loc = invoke->GetLocations()->InAt(0);
667 Location op2_loc = invoke->GetLocations()->InAt(1);
668 Location out_loc = invoke->GetLocations()->Out();
669
670 // Optimization: don't generate any code if inputs are the same.
671 if (op1_loc.Equals(op2_loc)) {
672 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
673 return;
674 }
675
676 vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
677 vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
678 vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
679 vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
680 vixl32::Register out_lo = LowRegisterFrom(out_loc);
681 vixl32::Register out_hi = HighRegisterFrom(out_loc);
682 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
683 const vixl32::Register temp = temps.Acquire();
684
685 DCHECK(op1_lo.Is(out_lo));
686 DCHECK(op1_hi.Is(out_hi));
687
688 // Compare op1 >= op2, or op1 < op2.
689 __ Cmp(out_lo, op2_lo);
690 __ Sbcs(temp, out_hi, op2_hi);
691
692 // Now GE/LT condition code is correct for the long comparison.
693 {
694 vixl32::ConditionType cond = is_min ? ge : lt;
695 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
696 3 * kMaxInstructionSizeInBytes,
697 CodeBufferCheckScope::kMaximumSize);
698 __ itt(cond);
699 __ mov(cond, out_lo, op2_lo);
700 __ mov(cond, out_hi, op2_hi);
701 }
702 }
703
CreateLongLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)704 static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
705 LocationSummary* locations =
706 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
707 locations->SetInAt(0, Location::RequiresRegister());
708 locations->SetInAt(1, Location::RequiresRegister());
709 locations->SetOut(Location::SameAsFirstInput());
710 }
711
VisitMathMinLongLong(HInvoke * invoke)712 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
713 CreateLongLongToLongLocations(allocator_, invoke);
714 }
715
VisitMathMinLongLong(HInvoke * invoke)716 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
717 GenMinMaxLong(invoke, /* is_min */ true, GetAssembler());
718 }
719
VisitMathMaxLongLong(HInvoke * invoke)720 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
721 CreateLongLongToLongLocations(allocator_, invoke);
722 }
723
VisitMathMaxLongLong(HInvoke * invoke)724 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
725 GenMinMaxLong(invoke, /* is_min */ false, GetAssembler());
726 }
727
GenMinMax(HInvoke * invoke,bool is_min,ArmVIXLAssembler * assembler)728 static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
729 vixl32::Register op1 = InputRegisterAt(invoke, 0);
730 vixl32::Register op2 = InputRegisterAt(invoke, 1);
731 vixl32::Register out = OutputRegister(invoke);
732
733 __ Cmp(op1, op2);
734
735 {
736 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
737 3 * kMaxInstructionSizeInBytes,
738 CodeBufferCheckScope::kMaximumSize);
739
740 __ ite(is_min ? lt : gt);
741 __ mov(is_min ? lt : gt, out, op1);
742 __ mov(is_min ? ge : le, out, op2);
743 }
744 }
745
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)746 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
747 LocationSummary* locations =
748 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
749 locations->SetInAt(0, Location::RequiresRegister());
750 locations->SetInAt(1, Location::RequiresRegister());
751 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
752 }
753
VisitMathMinIntInt(HInvoke * invoke)754 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
755 CreateIntIntToIntLocations(allocator_, invoke);
756 }
757
VisitMathMinIntInt(HInvoke * invoke)758 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
759 GenMinMax(invoke, /* is_min */ true, GetAssembler());
760 }
761
VisitMathMaxIntInt(HInvoke * invoke)762 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
763 CreateIntIntToIntLocations(allocator_, invoke);
764 }
765
VisitMathMaxIntInt(HInvoke * invoke)766 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
767 GenMinMax(invoke, /* is_min */ false, GetAssembler());
768 }
769
VisitMathSqrt(HInvoke * invoke)770 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
771 CreateFPToFPLocations(allocator_, invoke);
772 }
773
VisitMathSqrt(HInvoke * invoke)774 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
775 ArmVIXLAssembler* assembler = GetAssembler();
776 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
777 }
778
VisitMathRint(HInvoke * invoke)779 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
780 if (features_.HasARMv8AInstructions()) {
781 CreateFPToFPLocations(allocator_, invoke);
782 }
783 }
784
VisitMathRint(HInvoke * invoke)785 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
786 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
787 ArmVIXLAssembler* assembler = GetAssembler();
788 __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
789 }
790
VisitMathRoundFloat(HInvoke * invoke)791 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
792 if (features_.HasARMv8AInstructions()) {
793 LocationSummary* locations =
794 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
795 locations->SetInAt(0, Location::RequiresFpuRegister());
796 locations->SetOut(Location::RequiresRegister());
797 locations->AddTemp(Location::RequiresFpuRegister());
798 }
799 }
800
VisitMathRoundFloat(HInvoke * invoke)801 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
802 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
803
804 ArmVIXLAssembler* assembler = GetAssembler();
805 vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
806 vixl32::Register out_reg = OutputRegister(invoke);
807 vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
808 vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
809 vixl32::Label done;
810 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
811
812 // Round to nearest integer, ties away from zero.
813 __ Vcvta(S32, F32, temp1, in_reg);
814 __ Vmov(out_reg, temp1);
815
816 // For positive, zero or NaN inputs, rounding is done.
817 __ Cmp(out_reg, 0);
818 __ B(ge, final_label, /* far_target */ false);
819
820 // Handle input < 0 cases.
821 // If input is negative but not a tie, previous result (round to nearest) is valid.
822 // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
823 __ Vrinta(F32, F32, temp1, in_reg);
824 __ Vmov(temp2, 0.5);
825 __ Vsub(F32, temp1, in_reg, temp1);
826 __ Vcmp(F32, temp1, temp2);
827 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
828 {
829 // Use ExactAsemblyScope here because we are using IT.
830 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
831 2 * kMaxInstructionSizeInBytes,
832 CodeBufferCheckScope::kMaximumSize);
833 __ it(eq);
834 __ add(eq, out_reg, out_reg, 1);
835 }
836
837 if (done.IsReferenced()) {
838 __ Bind(&done);
839 }
840 }
841
VisitMemoryPeekByte(HInvoke * invoke)842 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
843 CreateIntToIntLocations(allocator_, invoke);
844 }
845
VisitMemoryPeekByte(HInvoke * invoke)846 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
847 ArmVIXLAssembler* assembler = GetAssembler();
848 // Ignore upper 4B of long address.
849 __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
850 }
851
VisitMemoryPeekIntNative(HInvoke * invoke)852 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
853 CreateIntToIntLocations(allocator_, invoke);
854 }
855
VisitMemoryPeekIntNative(HInvoke * invoke)856 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
857 ArmVIXLAssembler* assembler = GetAssembler();
858 // Ignore upper 4B of long address.
859 __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
860 }
861
VisitMemoryPeekLongNative(HInvoke * invoke)862 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
863 CreateIntToIntLocations(allocator_, invoke);
864 }
865
VisitMemoryPeekLongNative(HInvoke * invoke)866 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
867 ArmVIXLAssembler* assembler = GetAssembler();
868 // Ignore upper 4B of long address.
869 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
870 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
871 // exception. So we can't use ldrd as addr may be unaligned.
872 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
873 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
874 if (addr.Is(lo)) {
875 __ Ldr(hi, MemOperand(addr, 4));
876 __ Ldr(lo, MemOperand(addr));
877 } else {
878 __ Ldr(lo, MemOperand(addr));
879 __ Ldr(hi, MemOperand(addr, 4));
880 }
881 }
882
VisitMemoryPeekShortNative(HInvoke * invoke)883 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
884 CreateIntToIntLocations(allocator_, invoke);
885 }
886
VisitMemoryPeekShortNative(HInvoke * invoke)887 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
888 ArmVIXLAssembler* assembler = GetAssembler();
889 // Ignore upper 4B of long address.
890 __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
891 }
892
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)893 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
894 LocationSummary* locations =
895 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
896 locations->SetInAt(0, Location::RequiresRegister());
897 locations->SetInAt(1, Location::RequiresRegister());
898 }
899
VisitMemoryPokeByte(HInvoke * invoke)900 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
901 CreateIntIntToVoidLocations(allocator_, invoke);
902 }
903
VisitMemoryPokeByte(HInvoke * invoke)904 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
905 ArmVIXLAssembler* assembler = GetAssembler();
906 __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
907 }
908
VisitMemoryPokeIntNative(HInvoke * invoke)909 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
910 CreateIntIntToVoidLocations(allocator_, invoke);
911 }
912
VisitMemoryPokeIntNative(HInvoke * invoke)913 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
914 ArmVIXLAssembler* assembler = GetAssembler();
915 __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
916 }
917
VisitMemoryPokeLongNative(HInvoke * invoke)918 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
919 CreateIntIntToVoidLocations(allocator_, invoke);
920 }
921
VisitMemoryPokeLongNative(HInvoke * invoke)922 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
923 ArmVIXLAssembler* assembler = GetAssembler();
924 // Ignore upper 4B of long address.
925 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
926 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
927 // exception. So we can't use ldrd as addr may be unaligned.
928 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
929 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
930 }
931
VisitMemoryPokeShortNative(HInvoke * invoke)932 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
933 CreateIntIntToVoidLocations(allocator_, invoke);
934 }
935
VisitMemoryPokeShortNative(HInvoke * invoke)936 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
937 ArmVIXLAssembler* assembler = GetAssembler();
938 __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
939 }
940
VisitThreadCurrentThread(HInvoke * invoke)941 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
942 LocationSummary* locations =
943 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
944 locations->SetOut(Location::RequiresRegister());
945 }
946
VisitThreadCurrentThread(HInvoke * invoke)947 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
948 ArmVIXLAssembler* assembler = GetAssembler();
949 __ Ldr(OutputRegister(invoke),
950 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
951 }
952
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARMVIXL * codegen)953 static void GenUnsafeGet(HInvoke* invoke,
954 DataType::Type type,
955 bool is_volatile,
956 CodeGeneratorARMVIXL* codegen) {
957 LocationSummary* locations = invoke->GetLocations();
958 ArmVIXLAssembler* assembler = codegen->GetAssembler();
959 Location base_loc = locations->InAt(1);
960 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
961 Location offset_loc = locations->InAt(2);
962 vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only.
963 Location trg_loc = locations->Out();
964
965 switch (type) {
966 case DataType::Type::kInt32: {
967 vixl32::Register trg = RegisterFrom(trg_loc);
968 __ Ldr(trg, MemOperand(base, offset));
969 if (is_volatile) {
970 __ Dmb(vixl32::ISH);
971 }
972 break;
973 }
974
975 case DataType::Type::kReference: {
976 vixl32::Register trg = RegisterFrom(trg_loc);
977 if (kEmitCompilerReadBarrier) {
978 if (kUseBakerReadBarrier) {
979 Location temp = locations->GetTemp(0);
980 codegen->GenerateReferenceLoadWithBakerReadBarrier(
981 invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
982 if (is_volatile) {
983 __ Dmb(vixl32::ISH);
984 }
985 } else {
986 __ Ldr(trg, MemOperand(base, offset));
987 if (is_volatile) {
988 __ Dmb(vixl32::ISH);
989 }
990 codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
991 }
992 } else {
993 __ Ldr(trg, MemOperand(base, offset));
994 if (is_volatile) {
995 __ Dmb(vixl32::ISH);
996 }
997 assembler->MaybeUnpoisonHeapReference(trg);
998 }
999 break;
1000 }
1001
1002 case DataType::Type::kInt64: {
1003 vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
1004 vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
1005 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
1006 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1007 const vixl32::Register temp_reg = temps.Acquire();
1008 __ Add(temp_reg, base, offset);
1009 __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
1010 } else {
1011 __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
1012 }
1013 if (is_volatile) {
1014 __ Dmb(vixl32::ISH);
1015 }
1016 break;
1017 }
1018
1019 default:
1020 LOG(FATAL) << "Unexpected type " << type;
1021 UNREACHABLE();
1022 }
1023 }
1024
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type)1025 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1026 HInvoke* invoke,
1027 DataType::Type type) {
1028 bool can_call = kEmitCompilerReadBarrier &&
1029 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1030 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1031 LocationSummary* locations =
1032 new (allocator) LocationSummary(invoke,
1033 can_call
1034 ? LocationSummary::kCallOnSlowPath
1035 : LocationSummary::kNoCall,
1036 kIntrinsified);
1037 if (can_call && kUseBakerReadBarrier) {
1038 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1039 }
1040 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1041 locations->SetInAt(1, Location::RequiresRegister());
1042 locations->SetInAt(2, Location::RequiresRegister());
1043 locations->SetOut(Location::RequiresRegister(),
1044 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1045 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1046 // We need a temporary register for the read barrier marking slow
1047 // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier.
1048 locations->AddTemp(Location::RequiresRegister());
1049 }
1050 }
1051
VisitUnsafeGet(HInvoke * invoke)1052 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
1053 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32);
1054 }
VisitUnsafeGetVolatile(HInvoke * invoke)1055 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
1056 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32);
1057 }
VisitUnsafeGetLong(HInvoke * invoke)1058 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
1059 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64);
1060 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1061 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1062 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64);
1063 }
VisitUnsafeGetObject(HInvoke * invoke)1064 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
1065 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference);
1066 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1067 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1068 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference);
1069 }
1070
VisitUnsafeGet(HInvoke * invoke)1071 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
1072 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_);
1073 }
VisitUnsafeGetVolatile(HInvoke * invoke)1074 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
1075 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_);
1076 }
VisitUnsafeGetLong(HInvoke * invoke)1077 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
1078 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_);
1079 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1080 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1081 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_);
1082 }
VisitUnsafeGetObject(HInvoke * invoke)1083 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
1084 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_);
1085 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1086 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1087 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_);
1088 }
1089
CreateIntIntIntIntToVoid(ArenaAllocator * allocator,const ArmInstructionSetFeatures & features,DataType::Type type,bool is_volatile,HInvoke * invoke)1090 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator,
1091 const ArmInstructionSetFeatures& features,
1092 DataType::Type type,
1093 bool is_volatile,
1094 HInvoke* invoke) {
1095 LocationSummary* locations =
1096 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1097 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1098 locations->SetInAt(1, Location::RequiresRegister());
1099 locations->SetInAt(2, Location::RequiresRegister());
1100 locations->SetInAt(3, Location::RequiresRegister());
1101
1102 if (type == DataType::Type::kInt64) {
1103 // Potentially need temps for ldrexd-strexd loop.
1104 if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
1105 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
1106 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
1107 }
1108 } else if (type == DataType::Type::kReference) {
1109 // Temps for card-marking.
1110 locations->AddTemp(Location::RequiresRegister()); // Temp.
1111 locations->AddTemp(Location::RequiresRegister()); // Card.
1112 }
1113 }
1114
VisitUnsafePut(HInvoke * invoke)1115 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
1116 CreateIntIntIntIntToVoid(
1117 allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke);
1118 }
VisitUnsafePutOrdered(HInvoke * invoke)1119 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
1120 CreateIntIntIntIntToVoid(
1121 allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke);
1122 }
VisitUnsafePutVolatile(HInvoke * invoke)1123 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
1124 CreateIntIntIntIntToVoid(
1125 allocator_, features_, DataType::Type::kInt32, /* is_volatile */ true, invoke);
1126 }
VisitUnsafePutObject(HInvoke * invoke)1127 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
1128 CreateIntIntIntIntToVoid(
1129 allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke);
1130 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1131 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1132 CreateIntIntIntIntToVoid(
1133 allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke);
1134 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1135 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1136 CreateIntIntIntIntToVoid(
1137 allocator_, features_, DataType::Type::kReference, /* is_volatile */ true, invoke);
1138 }
VisitUnsafePutLong(HInvoke * invoke)1139 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
1140 CreateIntIntIntIntToVoid(
1141 allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke);
1142 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1143 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1144 CreateIntIntIntIntToVoid(
1145 allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke);
1146 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1147 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1148 CreateIntIntIntIntToVoid(
1149 allocator_, features_, DataType::Type::kInt64, /* is_volatile */ true, invoke);
1150 }
1151
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARMVIXL * codegen)1152 static void GenUnsafePut(LocationSummary* locations,
1153 DataType::Type type,
1154 bool is_volatile,
1155 bool is_ordered,
1156 CodeGeneratorARMVIXL* codegen) {
1157 ArmVIXLAssembler* assembler = codegen->GetAssembler();
1158
1159 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer.
1160 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
1161 vixl32::Register value;
1162
1163 if (is_volatile || is_ordered) {
1164 __ Dmb(vixl32::ISH);
1165 }
1166
1167 if (type == DataType::Type::kInt64) {
1168 vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
1169 vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
1170 value = value_lo;
1171 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
1172 vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
1173 vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
1174 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1175 const vixl32::Register temp_reg = temps.Acquire();
1176
1177 __ Add(temp_reg, base, offset);
1178 vixl32::Label loop_head;
1179 __ Bind(&loop_head);
1180 __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
1181 __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
1182 __ Cmp(temp_lo, 0);
1183 __ B(ne, &loop_head, /* far_target */ false);
1184 } else {
1185 __ Strd(value_lo, value_hi, MemOperand(base, offset));
1186 }
1187 } else {
1188 value = RegisterFrom(locations->InAt(3));
1189 vixl32::Register source = value;
1190 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1191 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1192 __ Mov(temp, value);
1193 assembler->PoisonHeapReference(temp);
1194 source = temp;
1195 }
1196 __ Str(source, MemOperand(base, offset));
1197 }
1198
1199 if (is_volatile) {
1200 __ Dmb(vixl32::ISH);
1201 }
1202
1203 if (type == DataType::Type::kReference) {
1204 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1205 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
1206 bool value_can_be_null = true; // TODO: Worth finding out this information?
1207 codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
1208 }
1209 }
1210
VisitUnsafePut(HInvoke * invoke)1211 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
1212 GenUnsafePut(invoke->GetLocations(),
1213 DataType::Type::kInt32,
1214 /* is_volatile */ false,
1215 /* is_ordered */ false,
1216 codegen_);
1217 }
VisitUnsafePutOrdered(HInvoke * invoke)1218 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
1219 GenUnsafePut(invoke->GetLocations(),
1220 DataType::Type::kInt32,
1221 /* is_volatile */ false,
1222 /* is_ordered */ true,
1223 codegen_);
1224 }
VisitUnsafePutVolatile(HInvoke * invoke)1225 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
1226 GenUnsafePut(invoke->GetLocations(),
1227 DataType::Type::kInt32,
1228 /* is_volatile */ true,
1229 /* is_ordered */ false,
1230 codegen_);
1231 }
VisitUnsafePutObject(HInvoke * invoke)1232 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
1233 GenUnsafePut(invoke->GetLocations(),
1234 DataType::Type::kReference,
1235 /* is_volatile */ false,
1236 /* is_ordered */ false,
1237 codegen_);
1238 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1239 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1240 GenUnsafePut(invoke->GetLocations(),
1241 DataType::Type::kReference,
1242 /* is_volatile */ false,
1243 /* is_ordered */ true,
1244 codegen_);
1245 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1246 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1247 GenUnsafePut(invoke->GetLocations(),
1248 DataType::Type::kReference,
1249 /* is_volatile */ true,
1250 /* is_ordered */ false,
1251 codegen_);
1252 }
VisitUnsafePutLong(HInvoke * invoke)1253 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
1254 GenUnsafePut(invoke->GetLocations(),
1255 DataType::Type::kInt64,
1256 /* is_volatile */ false,
1257 /* is_ordered */ false,
1258 codegen_);
1259 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1260 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1261 GenUnsafePut(invoke->GetLocations(),
1262 DataType::Type::kInt64,
1263 /* is_volatile */ false,
1264 /* is_ordered */ true,
1265 codegen_);
1266 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1267 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1268 GenUnsafePut(invoke->GetLocations(),
1269 DataType::Type::kInt64,
1270 /* is_volatile */ true,
1271 /* is_ordered */ false,
1272 codegen_);
1273 }
1274
CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type)1275 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator,
1276 HInvoke* invoke,
1277 DataType::Type type) {
1278 bool can_call = kEmitCompilerReadBarrier &&
1279 kUseBakerReadBarrier &&
1280 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1281 LocationSummary* locations =
1282 new (allocator) LocationSummary(invoke,
1283 can_call
1284 ? LocationSummary::kCallOnSlowPath
1285 : LocationSummary::kNoCall,
1286 kIntrinsified);
1287 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1288 locations->SetInAt(1, Location::RequiresRegister());
1289 locations->SetInAt(2, Location::RequiresRegister());
1290 locations->SetInAt(3, Location::RequiresRegister());
1291 locations->SetInAt(4, Location::RequiresRegister());
1292
1293 // If heap poisoning is enabled, we don't want the unpoisoning
1294 // operations to potentially clobber the output. Likewise when
1295 // emitting a (Baker) read barrier, which may call.
1296 Location::OutputOverlap overlaps =
1297 ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call)
1298 ? Location::kOutputOverlap
1299 : Location::kNoOutputOverlap;
1300 locations->SetOut(Location::RequiresRegister(), overlaps);
1301
1302 // Temporary registers used in CAS. In the object case
1303 // (UnsafeCASObject intrinsic), these are also used for
1304 // card-marking, and possibly for (Baker) read barrier.
1305 locations->AddTemp(Location::RequiresRegister()); // Pointer.
1306 locations->AddTemp(Location::RequiresRegister()); // Temp 1.
1307 }
1308
GenCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)1309 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
1310 DCHECK_NE(type, DataType::Type::kInt64);
1311
1312 ArmVIXLAssembler* assembler = codegen->GetAssembler();
1313 LocationSummary* locations = invoke->GetLocations();
1314
1315 Location out_loc = locations->Out();
1316 vixl32::Register out = OutputRegister(invoke); // Boolean result.
1317
1318 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
1319 Location offset_loc = locations->InAt(2);
1320 vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B).
1321 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
1322 vixl32::Register value = InputRegisterAt(invoke, 4); // Value.
1323
1324 Location tmp_ptr_loc = locations->GetTemp(0);
1325 vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory.
1326 vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory.
1327
1328 if (type == DataType::Type::kReference) {
1329 // The only read barrier implementation supporting the
1330 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1331 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1332
1333 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1334 // object and scan the receiver at the next GC for nothing.
1335 bool value_can_be_null = true; // TODO: Worth finding out this information?
1336 codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1337
1338 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1339 // Need to make sure the reference stored in the field is a to-space
1340 // one before attempting the CAS or the CAS could fail incorrectly.
1341 codegen->UpdateReferenceFieldWithBakerReadBarrier(
1342 invoke,
1343 out_loc, // Unused, used only as a "temporary" within the read barrier.
1344 base,
1345 /* field_offset */ offset_loc,
1346 tmp_ptr_loc,
1347 /* needs_null_check */ false,
1348 tmp);
1349 }
1350 }
1351
1352 // Prevent reordering with prior memory operations.
1353 // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1354 // latter allows a preceding load to be delayed past the STXR
1355 // instruction below.
1356 __ Dmb(vixl32::ISH);
1357
1358 __ Add(tmp_ptr, base, offset);
1359
1360 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1361 codegen->GetAssembler()->PoisonHeapReference(expected);
1362 if (value.Is(expected)) {
1363 // Do not poison `value`, as it is the same register as
1364 // `expected`, which has just been poisoned.
1365 } else {
1366 codegen->GetAssembler()->PoisonHeapReference(value);
1367 }
1368 }
1369
1370 // do {
1371 // tmp = [r_ptr] - expected;
1372 // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1373 // result = tmp != 0;
1374
1375 vixl32::Label loop_head;
1376 __ Bind(&loop_head);
1377
1378 __ Ldrex(tmp, MemOperand(tmp_ptr));
1379
1380 __ Subs(tmp, tmp, expected);
1381
1382 {
1383 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1384 3 * kMaxInstructionSizeInBytes,
1385 CodeBufferCheckScope::kMaximumSize);
1386
1387 __ itt(eq);
1388 __ strex(eq, tmp, value, MemOperand(tmp_ptr));
1389 __ cmp(eq, tmp, 1);
1390 }
1391
1392 __ B(eq, &loop_head, /* far_target */ false);
1393
1394 __ Dmb(vixl32::ISH);
1395
1396 __ Rsbs(out, tmp, 1);
1397
1398 {
1399 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1400 2 * kMaxInstructionSizeInBytes,
1401 CodeBufferCheckScope::kMaximumSize);
1402
1403 __ it(cc);
1404 __ mov(cc, out, 0);
1405 }
1406
1407 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1408 codegen->GetAssembler()->UnpoisonHeapReference(expected);
1409 if (value.Is(expected)) {
1410 // Do not unpoison `value`, as it is the same register as
1411 // `expected`, which has just been unpoisoned.
1412 } else {
1413 codegen->GetAssembler()->UnpoisonHeapReference(value);
1414 }
1415 }
1416 }
1417
VisitUnsafeCASInt(HInvoke * invoke)1418 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1419 CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kInt32);
1420 }
VisitUnsafeCASObject(HInvoke * invoke)1421 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1422 // The only read barrier implementation supporting the
1423 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1424 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1425 return;
1426 }
1427
1428 CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kReference);
1429 }
VisitUnsafeCASInt(HInvoke * invoke)1430 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1431 GenCas(invoke, DataType::Type::kInt32, codegen_);
1432 }
VisitUnsafeCASObject(HInvoke * invoke)1433 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1434 // The only read barrier implementation supporting the
1435 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1436 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1437
1438 GenCas(invoke, DataType::Type::kReference, codegen_);
1439 }
1440
VisitStringCompareTo(HInvoke * invoke)1441 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1442 // The inputs plus one temp.
1443 LocationSummary* locations =
1444 new (allocator_) LocationSummary(invoke,
1445 invoke->InputAt(1)->CanBeNull()
1446 ? LocationSummary::kCallOnSlowPath
1447 : LocationSummary::kNoCall,
1448 kIntrinsified);
1449 locations->SetInAt(0, Location::RequiresRegister());
1450 locations->SetInAt(1, Location::RequiresRegister());
1451 locations->AddTemp(Location::RequiresRegister());
1452 locations->AddTemp(Location::RequiresRegister());
1453 locations->AddTemp(Location::RequiresRegister());
1454 // Need temporary registers for String compression's feature.
1455 if (mirror::kUseStringCompression) {
1456 locations->AddTemp(Location::RequiresRegister());
1457 }
1458 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1459 }
1460
1461 // Forward declaration.
1462 //
1463 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated
1464 // by the compiler for every C++ function, and if this function gets inlined in
1465 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a
1466 // build failure. That is the reason why NO_INLINE attribute is used.
1467 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
1468 HInvoke* invoke,
1469 vixl32::Label* end,
1470 vixl32::Label* different_compression);
1471
VisitStringCompareTo(HInvoke * invoke)1472 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1473 ArmVIXLAssembler* assembler = GetAssembler();
1474 LocationSummary* locations = invoke->GetLocations();
1475
1476 const vixl32::Register str = InputRegisterAt(invoke, 0);
1477 const vixl32::Register arg = InputRegisterAt(invoke, 1);
1478 const vixl32::Register out = OutputRegister(invoke);
1479
1480 const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1481 const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1482 const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1483 vixl32::Register temp3;
1484 if (mirror::kUseStringCompression) {
1485 temp3 = RegisterFrom(locations->GetTemp(3));
1486 }
1487
1488 vixl32::Label end;
1489 vixl32::Label different_compression;
1490
1491 // Get offsets of count and value fields within a string object.
1492 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1493
1494 // Note that the null check must have been done earlier.
1495 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1496
1497 // Take slow path and throw if input can be and is null.
1498 SlowPathCodeARMVIXL* slow_path = nullptr;
1499 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1500 if (can_slow_path) {
1501 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1502 codegen_->AddSlowPath(slow_path);
1503 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
1504 }
1505
1506 // Reference equality check, return 0 if same reference.
1507 __ Subs(out, str, arg);
1508 __ B(eq, &end);
1509
1510 if (mirror::kUseStringCompression) {
1511 // Load `count` fields of this and argument strings.
1512 __ Ldr(temp3, MemOperand(str, count_offset));
1513 __ Ldr(temp2, MemOperand(arg, count_offset));
1514 // Extract lengths from the `count` fields.
1515 __ Lsr(temp0, temp3, 1u);
1516 __ Lsr(temp1, temp2, 1u);
1517 } else {
1518 // Load lengths of this and argument strings.
1519 __ Ldr(temp0, MemOperand(str, count_offset));
1520 __ Ldr(temp1, MemOperand(arg, count_offset));
1521 }
1522 // out = length diff.
1523 __ Subs(out, temp0, temp1);
1524 // temp0 = min(len(str), len(arg)).
1525
1526 {
1527 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1528 2 * kMaxInstructionSizeInBytes,
1529 CodeBufferCheckScope::kMaximumSize);
1530
1531 __ it(gt);
1532 __ mov(gt, temp0, temp1);
1533 }
1534
1535 // Shorter string is empty?
1536 // Note that mirror::kUseStringCompression==true introduces lots of instructions,
1537 // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
1538 __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
1539
1540 if (mirror::kUseStringCompression) {
1541 // Check if both strings using same compression style to use this comparison loop.
1542 __ Eors(temp2, temp2, temp3);
1543 __ Lsrs(temp2, temp2, 1u);
1544 __ B(cs, &different_compression);
1545 // For string compression, calculate the number of bytes to compare (not chars).
1546 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1547 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
1548
1549 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1550 2 * kMaxInstructionSizeInBytes,
1551 CodeBufferCheckScope::kMaximumSize);
1552
1553 __ it(ne);
1554 __ add(ne, temp0, temp0, temp0);
1555 }
1556
1557
1558 GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression);
1559
1560 __ Bind(&end);
1561
1562 if (can_slow_path) {
1563 __ Bind(slow_path->GetExitLabel());
1564 }
1565 }
1566
GenerateStringCompareToLoop(ArmVIXLAssembler * assembler,HInvoke * invoke,vixl32::Label * end,vixl32::Label * different_compression)1567 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
1568 HInvoke* invoke,
1569 vixl32::Label* end,
1570 vixl32::Label* different_compression) {
1571 LocationSummary* locations = invoke->GetLocations();
1572
1573 const vixl32::Register str = InputRegisterAt(invoke, 0);
1574 const vixl32::Register arg = InputRegisterAt(invoke, 1);
1575 const vixl32::Register out = OutputRegister(invoke);
1576
1577 const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1578 const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1579 const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1580 vixl32::Register temp3;
1581 if (mirror::kUseStringCompression) {
1582 temp3 = RegisterFrom(locations->GetTemp(3));
1583 }
1584
1585 vixl32::Label loop;
1586 vixl32::Label find_char_diff;
1587
1588 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1589 // Store offset of string value in preparation for comparison loop.
1590 __ Mov(temp1, value_offset);
1591
1592 // Assertions that must hold in order to compare multiple characters at a time.
1593 CHECK_ALIGNED(value_offset, 8);
1594 static_assert(IsAligned<8>(kObjectAlignment),
1595 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1596
1597 const unsigned char_size = DataType::Size(DataType::Type::kUint16);
1598 DCHECK_EQ(char_size, 2u);
1599
1600 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1601
1602 vixl32::Label find_char_diff_2nd_cmp;
1603 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1604 __ Bind(&loop);
1605 vixl32::Register temp_reg = temps.Acquire();
1606 __ Ldr(temp_reg, MemOperand(str, temp1));
1607 __ Ldr(temp2, MemOperand(arg, temp1));
1608 __ Cmp(temp_reg, temp2);
1609 __ B(ne, &find_char_diff, /* far_target */ false);
1610 __ Add(temp1, temp1, char_size * 2);
1611
1612 __ Ldr(temp_reg, MemOperand(str, temp1));
1613 __ Ldr(temp2, MemOperand(arg, temp1));
1614 __ Cmp(temp_reg, temp2);
1615 __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false);
1616 __ Add(temp1, temp1, char_size * 2);
1617 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1618 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
1619 __ B(hi, &loop, /* far_target */ false);
1620 __ B(end);
1621
1622 __ Bind(&find_char_diff_2nd_cmp);
1623 if (mirror::kUseStringCompression) {
1624 __ Subs(temp0, temp0, 4); // 4 bytes previously compared.
1625 __ B(ls, end, /* far_target */ false); // Was the second comparison fully beyond the end?
1626 } else {
1627 // Without string compression, we can start treating temp0 as signed
1628 // and rely on the signed comparison below.
1629 __ Sub(temp0, temp0, 2);
1630 }
1631
1632 // Find the single character difference.
1633 __ Bind(&find_char_diff);
1634 // Get the bit position of the first character that differs.
1635 __ Eor(temp1, temp2, temp_reg);
1636 __ Rbit(temp1, temp1);
1637 __ Clz(temp1, temp1);
1638
1639 // temp0 = number of characters remaining to compare.
1640 // (Without string compression, it could be < 1 if a difference is found by the second CMP
1641 // in the comparison loop, and after the end of the shorter string data).
1642
1643 // Without string compression (temp1 >> 4) = character where difference occurs between the last
1644 // two words compared, in the interval [0,1].
1645 // (0 for low half-word different, 1 for high half-word different).
1646 // With string compression, (temp1 << 3) = byte where the difference occurs,
1647 // in the interval [0,3].
1648
1649 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1650 // the remaining string data, so just return length diff (out).
1651 // The comparison is unsigned for string compression, otherwise signed.
1652 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
1653 __ B((mirror::kUseStringCompression ? ls : le), end, /* far_target */ false);
1654
1655 // Extract the characters and calculate the difference.
1656 if (mirror::kUseStringCompression) {
1657 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1658 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1659 // The compression flag is now in the highest bit of temp3, so let's play some tricks.
1660 __ Orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u
1661 __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
1662 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
1663 __ Lsr(temp2, temp2, temp1); // Extract second character.
1664 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
1665 __ Lsr(out, temp_reg, temp1); // Extract first character.
1666 __ And(temp2, temp2, temp3);
1667 __ And(out, out, temp3);
1668 } else {
1669 __ Bic(temp1, temp1, 0xf);
1670 __ Lsr(temp2, temp2, temp1);
1671 __ Lsr(out, temp_reg, temp1);
1672 __ Movt(temp2, 0);
1673 __ Movt(out, 0);
1674 }
1675
1676 __ Sub(out, out, temp2);
1677 temps.Release(temp_reg);
1678
1679 if (mirror::kUseStringCompression) {
1680 __ B(end);
1681 __ Bind(different_compression);
1682
1683 // Comparison for different compression style.
1684 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1685 DCHECK_EQ(c_char_size, 1u);
1686
1687 // We want to free up the temp3, currently holding `str.count`, for comparison.
1688 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1689 // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1690 // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1691 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1692 __ Add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit.
1693 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1694 __ Mov(temp1, str);
1695 __ Mov(temp2, arg);
1696 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
1697 {
1698 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1699 3 * kMaxInstructionSizeInBytes,
1700 CodeBufferCheckScope::kMaximumSize);
1701 __ itt(cs); // Interleave with selection of temp1 and temp2.
1702 __ mov(cs, temp1, arg); // Preserves flags.
1703 __ mov(cs, temp2, str); // Preserves flags.
1704 }
1705 __ Sbc(temp0, temp0, 0); // Complete the move of the compression flag.
1706
1707 // Adjust temp1 and temp2 from string pointers to data pointers.
1708 __ Add(temp1, temp1, value_offset);
1709 __ Add(temp2, temp2, value_offset);
1710
1711 vixl32::Label different_compression_loop;
1712 vixl32::Label different_compression_diff;
1713
1714 // Main loop for different compression.
1715 temp_reg = temps.Acquire();
1716 __ Bind(&different_compression_loop);
1717 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
1718 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
1719 __ Cmp(temp_reg, temp3);
1720 __ B(ne, &different_compression_diff, /* far_target */ false);
1721 __ Subs(temp0, temp0, 2);
1722 __ B(hi, &different_compression_loop, /* far_target */ false);
1723 __ B(end);
1724
1725 // Calculate the difference.
1726 __ Bind(&different_compression_diff);
1727 __ Sub(out, temp_reg, temp3);
1728 temps.Release(temp_reg);
1729 // Flip the difference if the `arg` is compressed.
1730 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1731 __ Lsrs(temp0, temp0, 1u);
1732 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1733 "Expecting 0=compressed, 1=uncompressed");
1734
1735 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1736 2 * kMaxInstructionSizeInBytes,
1737 CodeBufferCheckScope::kMaximumSize);
1738 __ it(cc);
1739 __ rsb(cc, out, out, 0);
1740 }
1741 }
1742
1743 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1744 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
1745 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
1746 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
1747 // Allow up to 12 instructions (32 bytes) for the unrolled loop.
1748 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
1749
GetConstString(HInstruction * candidate,uint32_t * utf16_length)1750 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1751 if (candidate->IsLoadString()) {
1752 HLoadString* load_string = candidate->AsLoadString();
1753 const DexFile& dex_file = load_string->GetDexFile();
1754 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1755 }
1756 return nullptr;
1757 }
1758
VisitStringEquals(HInvoke * invoke)1759 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1760 if (kEmitCompilerReadBarrier &&
1761 !StringEqualsOptimizations(invoke).GetArgumentIsString() &&
1762 !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) {
1763 // No support for this odd case (String class is moveable, not in the boot image).
1764 return;
1765 }
1766
1767 LocationSummary* locations =
1768 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1769 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1770 locations->SetInAt(0, Location::RequiresRegister());
1771 locations->SetInAt(1, Location::RequiresRegister());
1772
1773 // Temporary registers to store lengths of strings and for calculations.
1774 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1775 locations->AddTemp(LocationFrom(r0));
1776
1777 // For the generic implementation and for long const strings we need an extra temporary.
1778 // We do not need it for short const strings, up to 4 bytes, see code generation below.
1779 uint32_t const_string_length = 0u;
1780 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1781 if (const_string == nullptr) {
1782 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1783 }
1784 bool is_compressed =
1785 mirror::kUseStringCompression &&
1786 const_string != nullptr &&
1787 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1788 if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
1789 locations->AddTemp(Location::RequiresRegister());
1790 }
1791
1792 // TODO: If the String.equals() is used only for an immediately following HIf, we can
1793 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1794 // Then we shall need an extra temporary register instead of the output register.
1795 locations->SetOut(Location::RequiresRegister());
1796 }
1797
VisitStringEquals(HInvoke * invoke)1798 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1799 ArmVIXLAssembler* assembler = GetAssembler();
1800 LocationSummary* locations = invoke->GetLocations();
1801
1802 vixl32::Register str = InputRegisterAt(invoke, 0);
1803 vixl32::Register arg = InputRegisterAt(invoke, 1);
1804 vixl32::Register out = OutputRegister(invoke);
1805
1806 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1807
1808 vixl32::Label loop;
1809 vixl32::Label end;
1810 vixl32::Label return_true;
1811 vixl32::Label return_false;
1812 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
1813
1814 // Get offsets of count, value, and class fields within a string object.
1815 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1816 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1817 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1818
1819 // Note that the null check must have been done earlier.
1820 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1821
1822 StringEqualsOptimizations optimizations(invoke);
1823 if (!optimizations.GetArgumentNotNull()) {
1824 // Check if input is null, return false if it is.
1825 __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
1826 }
1827
1828 // Reference equality check, return true if same reference.
1829 __ Cmp(str, arg);
1830 __ B(eq, &return_true, /* far_target */ false);
1831
1832 if (!optimizations.GetArgumentIsString()) {
1833 // Instanceof check for the argument by comparing class fields.
1834 // All string objects must have the same type since String cannot be subclassed.
1835 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1836 // If the argument is a string object, its class field must be equal to receiver's class field.
1837 __ Ldr(temp, MemOperand(str, class_offset));
1838 __ Ldr(out, MemOperand(arg, class_offset));
1839 __ Cmp(temp, out);
1840 __ B(ne, &return_false, /* far_target */ false);
1841 }
1842
1843 // Check if one of the inputs is a const string. Do not special-case both strings
1844 // being const, such cases should be handled by constant folding if needed.
1845 uint32_t const_string_length = 0u;
1846 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1847 if (const_string == nullptr) {
1848 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1849 if (const_string != nullptr) {
1850 std::swap(str, arg); // Make sure the const string is in `str`.
1851 }
1852 }
1853 bool is_compressed =
1854 mirror::kUseStringCompression &&
1855 const_string != nullptr &&
1856 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1857
1858 if (const_string != nullptr) {
1859 // Load `count` field of the argument string and check if it matches the const string.
1860 // Also compares the compression style, if differs return false.
1861 __ Ldr(temp, MemOperand(arg, count_offset));
1862 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1863 __ B(ne, &return_false, /* far_target */ false);
1864 } else {
1865 // Load `count` fields of this and argument strings.
1866 __ Ldr(temp, MemOperand(str, count_offset));
1867 __ Ldr(out, MemOperand(arg, count_offset));
1868 // Check if `count` fields are equal, return false if they're not.
1869 // Also compares the compression style, if differs return false.
1870 __ Cmp(temp, out);
1871 __ B(ne, &return_false, /* far_target */ false);
1872 }
1873
1874 // Assertions that must hold in order to compare strings 4 bytes at a time.
1875 // Ok to do this because strings are zero-padded to kObjectAlignment.
1876 DCHECK_ALIGNED(value_offset, 4);
1877 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1878
1879 if (const_string != nullptr &&
1880 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1881 : kShortConstStringEqualsCutoffInBytes / 2u)) {
1882 // Load and compare the contents. Though we know the contents of the short const string
1883 // at compile time, materializing constants may be more code than loading from memory.
1884 int32_t offset = value_offset;
1885 size_t remaining_bytes =
1886 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
1887 while (remaining_bytes > sizeof(uint32_t)) {
1888 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1889 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1890 vixl32::Register temp2 = scratch_scope.Acquire();
1891 __ Ldrd(temp, temp1, MemOperand(str, offset));
1892 __ Ldrd(temp2, out, MemOperand(arg, offset));
1893 __ Cmp(temp, temp2);
1894 __ B(ne, &return_false, /* far_label */ false);
1895 __ Cmp(temp1, out);
1896 __ B(ne, &return_false, /* far_label */ false);
1897 offset += 2u * sizeof(uint32_t);
1898 remaining_bytes -= 2u * sizeof(uint32_t);
1899 }
1900 if (remaining_bytes != 0u) {
1901 __ Ldr(temp, MemOperand(str, offset));
1902 __ Ldr(out, MemOperand(arg, offset));
1903 __ Cmp(temp, out);
1904 __ B(ne, &return_false, /* far_label */ false);
1905 }
1906 } else {
1907 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1908 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1909 "Expecting 0=compressed, 1=uncompressed");
1910 __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
1911
1912 if (mirror::kUseStringCompression) {
1913 // For string compression, calculate the number of bytes to compare (not chars).
1914 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1915 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
1916 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1917 2 * kMaxInstructionSizeInBytes,
1918 CodeBufferCheckScope::kMaximumSize);
1919 __ it(cs); // If uncompressed,
1920 __ add(cs, temp, temp, temp); // double the byte count.
1921 }
1922
1923 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1924 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1925 vixl32::Register temp2 = scratch_scope.Acquire();
1926
1927 // Store offset of string value in preparation for comparison loop.
1928 __ Mov(temp1, value_offset);
1929
1930 // Loop to compare strings 4 bytes at a time starting at the front of the string.
1931 __ Bind(&loop);
1932 __ Ldr(out, MemOperand(str, temp1));
1933 __ Ldr(temp2, MemOperand(arg, temp1));
1934 __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1935 __ Cmp(out, temp2);
1936 __ B(ne, &return_false, /* far_target */ false);
1937 // With string compression, we have compared 4 bytes, otherwise 2 chars.
1938 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1939 __ B(hi, &loop, /* far_target */ false);
1940 }
1941
1942 // Return true and exit the function.
1943 // If loop does not result in returning false, we return true.
1944 __ Bind(&return_true);
1945 __ Mov(out, 1);
1946 __ B(final_label);
1947
1948 // Return false and exit the function.
1949 __ Bind(&return_false);
1950 __ Mov(out, 0);
1951
1952 if (end.IsReferenced()) {
1953 __ Bind(&end);
1954 }
1955 }
1956
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,bool start_at_zero)1957 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1958 ArmVIXLAssembler* assembler,
1959 CodeGeneratorARMVIXL* codegen,
1960 bool start_at_zero) {
1961 LocationSummary* locations = invoke->GetLocations();
1962
1963 // Note that the null check must have been done earlier.
1964 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1965
1966 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1967 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1968 SlowPathCodeARMVIXL* slow_path = nullptr;
1969 HInstruction* code_point = invoke->InputAt(1);
1970 if (code_point->IsIntConstant()) {
1971 if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1972 std::numeric_limits<uint16_t>::max()) {
1973 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1974 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1975 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1976 codegen->AddSlowPath(slow_path);
1977 __ B(slow_path->GetEntryLabel());
1978 __ Bind(slow_path->GetExitLabel());
1979 return;
1980 }
1981 } else if (code_point->GetType() != DataType::Type::kUint16) {
1982 vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1983 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1984 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1985 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1986 codegen->AddSlowPath(slow_path);
1987 __ B(hs, slow_path->GetEntryLabel());
1988 }
1989
1990 if (start_at_zero) {
1991 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1992 DCHECK(tmp_reg.Is(r2));
1993 // Start-index = 0.
1994 __ Mov(tmp_reg, 0);
1995 }
1996
1997 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1998 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1999
2000 if (slow_path != nullptr) {
2001 __ Bind(slow_path->GetExitLabel());
2002 }
2003 }
2004
VisitStringIndexOf(HInvoke * invoke)2005 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
2006 LocationSummary* locations = new (allocator_) LocationSummary(
2007 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2008 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2009 // best to align the inputs accordingly.
2010 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2011 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2012 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2013 locations->SetOut(LocationFrom(r0));
2014
2015 // Need to send start-index=0.
2016 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2017 }
2018
VisitStringIndexOf(HInvoke * invoke)2019 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
2020 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true);
2021 }
2022
VisitStringIndexOfAfter(HInvoke * invoke)2023 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
2024 LocationSummary* locations = new (allocator_) LocationSummary(
2025 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2026 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2027 // best to align the inputs accordingly.
2028 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2029 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2030 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2031 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2032 locations->SetOut(LocationFrom(r0));
2033 }
2034
VisitStringIndexOfAfter(HInvoke * invoke)2035 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
2036 GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false);
2037 }
2038
VisitStringNewStringFromBytes(HInvoke * invoke)2039 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
2040 LocationSummary* locations = new (allocator_) LocationSummary(
2041 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2042 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2043 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2044 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2045 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2046 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
2047 locations->SetOut(LocationFrom(r0));
2048 }
2049
VisitStringNewStringFromBytes(HInvoke * invoke)2050 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
2051 ArmVIXLAssembler* assembler = GetAssembler();
2052 vixl32::Register byte_array = InputRegisterAt(invoke, 0);
2053 __ Cmp(byte_array, 0);
2054 SlowPathCodeARMVIXL* slow_path =
2055 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2056 codegen_->AddSlowPath(slow_path);
2057 __ B(eq, slow_path->GetEntryLabel());
2058
2059 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
2060 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
2061 __ Bind(slow_path->GetExitLabel());
2062 }
2063
VisitStringNewStringFromChars(HInvoke * invoke)2064 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
2065 LocationSummary* locations =
2066 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2067 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2068 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2069 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2070 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2071 locations->SetOut(LocationFrom(r0));
2072 }
2073
VisitStringNewStringFromChars(HInvoke * invoke)2074 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
2075 // No need to emit code checking whether `locations->InAt(2)` is a null
2076 // pointer, as callers of the native method
2077 //
2078 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
2079 //
2080 // all include a null check on `data` before calling that method.
2081 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
2082 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
2083 }
2084
VisitStringNewStringFromString(HInvoke * invoke)2085 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
2086 LocationSummary* locations = new (allocator_) LocationSummary(
2087 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2088 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2089 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2090 locations->SetOut(LocationFrom(r0));
2091 }
2092
VisitStringNewStringFromString(HInvoke * invoke)2093 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
2094 ArmVIXLAssembler* assembler = GetAssembler();
2095 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
2096 __ Cmp(string_to_copy, 0);
2097 SlowPathCodeARMVIXL* slow_path =
2098 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2099 codegen_->AddSlowPath(slow_path);
2100 __ B(eq, slow_path->GetEntryLabel());
2101
2102 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
2103 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
2104
2105 __ Bind(slow_path->GetExitLabel());
2106 }
2107
VisitSystemArrayCopy(HInvoke * invoke)2108 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
2109 // The only read barrier implementation supporting the
2110 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2111 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2112 return;
2113 }
2114
2115 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2116 LocationSummary* locations = invoke->GetLocations();
2117 if (locations == nullptr) {
2118 return;
2119 }
2120
2121 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2122 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2123 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2124
2125 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
2126 locations->SetInAt(1, Location::RequiresRegister());
2127 }
2128 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
2129 locations->SetInAt(3, Location::RequiresRegister());
2130 }
2131 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
2132 locations->SetInAt(4, Location::RequiresRegister());
2133 }
2134 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2135 // Temporary register IP cannot be used in
2136 // ReadBarrierSystemArrayCopySlowPathARM (because that register
2137 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
2138 // temporary register from the register allocator.
2139 locations->AddTemp(Location::RequiresRegister());
2140 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_);
2141 arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
2142 }
2143 }
2144
CheckPosition(ArmVIXLAssembler * assembler,Location pos,vixl32::Register input,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_input_length=false)2145 static void CheckPosition(ArmVIXLAssembler* assembler,
2146 Location pos,
2147 vixl32::Register input,
2148 Location length,
2149 SlowPathCodeARMVIXL* slow_path,
2150 vixl32::Register temp,
2151 bool length_is_input_length = false) {
2152 // Where is the length in the Array?
2153 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
2154
2155 if (pos.IsConstant()) {
2156 int32_t pos_const = Int32ConstantFrom(pos);
2157 if (pos_const == 0) {
2158 if (!length_is_input_length) {
2159 // Check that length(input) >= length.
2160 __ Ldr(temp, MemOperand(input, length_offset));
2161 if (length.IsConstant()) {
2162 __ Cmp(temp, Int32ConstantFrom(length));
2163 } else {
2164 __ Cmp(temp, RegisterFrom(length));
2165 }
2166 __ B(lt, slow_path->GetEntryLabel());
2167 }
2168 } else {
2169 // Check that length(input) >= pos.
2170 __ Ldr(temp, MemOperand(input, length_offset));
2171 __ Subs(temp, temp, pos_const);
2172 __ B(lt, slow_path->GetEntryLabel());
2173
2174 // Check that (length(input) - pos) >= length.
2175 if (length.IsConstant()) {
2176 __ Cmp(temp, Int32ConstantFrom(length));
2177 } else {
2178 __ Cmp(temp, RegisterFrom(length));
2179 }
2180 __ B(lt, slow_path->GetEntryLabel());
2181 }
2182 } else if (length_is_input_length) {
2183 // The only way the copy can succeed is if pos is zero.
2184 vixl32::Register pos_reg = RegisterFrom(pos);
2185 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
2186 } else {
2187 // Check that pos >= 0.
2188 vixl32::Register pos_reg = RegisterFrom(pos);
2189 __ Cmp(pos_reg, 0);
2190 __ B(lt, slow_path->GetEntryLabel());
2191
2192 // Check that pos <= length(input).
2193 __ Ldr(temp, MemOperand(input, length_offset));
2194 __ Subs(temp, temp, pos_reg);
2195 __ B(lt, slow_path->GetEntryLabel());
2196
2197 // Check that (length(input) - pos) >= length.
2198 if (length.IsConstant()) {
2199 __ Cmp(temp, Int32ConstantFrom(length));
2200 } else {
2201 __ Cmp(temp, RegisterFrom(length));
2202 }
2203 __ B(lt, slow_path->GetEntryLabel());
2204 }
2205 }
2206
VisitSystemArrayCopy(HInvoke * invoke)2207 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
2208 // The only read barrier implementation supporting the
2209 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2210 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2211
2212 ArmVIXLAssembler* assembler = GetAssembler();
2213 LocationSummary* locations = invoke->GetLocations();
2214
2215 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2216 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2217 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2218 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2219 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2220
2221 vixl32::Register src = InputRegisterAt(invoke, 0);
2222 Location src_pos = locations->InAt(1);
2223 vixl32::Register dest = InputRegisterAt(invoke, 2);
2224 Location dest_pos = locations->InAt(3);
2225 Location length = locations->InAt(4);
2226 Location temp1_loc = locations->GetTemp(0);
2227 vixl32::Register temp1 = RegisterFrom(temp1_loc);
2228 Location temp2_loc = locations->GetTemp(1);
2229 vixl32::Register temp2 = RegisterFrom(temp2_loc);
2230 Location temp3_loc = locations->GetTemp(2);
2231 vixl32::Register temp3 = RegisterFrom(temp3_loc);
2232
2233 SlowPathCodeARMVIXL* intrinsic_slow_path =
2234 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2235 codegen_->AddSlowPath(intrinsic_slow_path);
2236
2237 vixl32::Label conditions_on_positions_validated;
2238 SystemArrayCopyOptimizations optimizations(invoke);
2239
2240 // If source and destination are the same, we go to slow path if we need to do
2241 // forward copying.
2242 if (src_pos.IsConstant()) {
2243 int32_t src_pos_constant = Int32ConstantFrom(src_pos);
2244 if (dest_pos.IsConstant()) {
2245 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
2246 if (optimizations.GetDestinationIsSource()) {
2247 // Checked when building locations.
2248 DCHECK_GE(src_pos_constant, dest_pos_constant);
2249 } else if (src_pos_constant < dest_pos_constant) {
2250 __ Cmp(src, dest);
2251 __ B(eq, intrinsic_slow_path->GetEntryLabel());
2252 }
2253
2254 // Checked when building locations.
2255 DCHECK(!optimizations.GetDestinationIsSource()
2256 || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
2257 } else {
2258 if (!optimizations.GetDestinationIsSource()) {
2259 __ Cmp(src, dest);
2260 __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
2261 }
2262 __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
2263 __ B(gt, intrinsic_slow_path->GetEntryLabel());
2264 }
2265 } else {
2266 if (!optimizations.GetDestinationIsSource()) {
2267 __ Cmp(src, dest);
2268 __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
2269 }
2270 if (dest_pos.IsConstant()) {
2271 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
2272 __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
2273 } else {
2274 __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
2275 }
2276 __ B(lt, intrinsic_slow_path->GetEntryLabel());
2277 }
2278
2279 __ Bind(&conditions_on_positions_validated);
2280
2281 if (!optimizations.GetSourceIsNotNull()) {
2282 // Bail out if the source is null.
2283 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
2284 }
2285
2286 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2287 // Bail out if the destination is null.
2288 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
2289 }
2290
2291 // If the length is negative, bail out.
2292 // We have already checked in the LocationsBuilder for the constant case.
2293 if (!length.IsConstant() &&
2294 !optimizations.GetCountIsSourceLength() &&
2295 !optimizations.GetCountIsDestinationLength()) {
2296 __ Cmp(RegisterFrom(length), 0);
2297 __ B(lt, intrinsic_slow_path->GetEntryLabel());
2298 }
2299
2300 // Validity checks: source.
2301 CheckPosition(assembler,
2302 src_pos,
2303 src,
2304 length,
2305 intrinsic_slow_path,
2306 temp1,
2307 optimizations.GetCountIsSourceLength());
2308
2309 // Validity checks: dest.
2310 CheckPosition(assembler,
2311 dest_pos,
2312 dest,
2313 length,
2314 intrinsic_slow_path,
2315 temp1,
2316 optimizations.GetCountIsDestinationLength());
2317
2318 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2319 // Check whether all elements of the source array are assignable to the component
2320 // type of the destination array. We do two checks: the classes are the same,
2321 // or the destination is Object[]. If none of these checks succeed, we go to the
2322 // slow path.
2323
2324 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2325 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2326 // /* HeapReference<Class> */ temp1 = src->klass_
2327 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2328 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
2329 // Bail out if the source is not a non primitive array.
2330 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2331 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2332 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
2333 __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
2334 // If heap poisoning is enabled, `temp1` has been unpoisoned
2335 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2336 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2337 __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
2338 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2339 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2340 }
2341
2342 // /* HeapReference<Class> */ temp1 = dest->klass_
2343 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2344 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
2345
2346 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2347 // Bail out if the destination is not a non primitive array.
2348 //
2349 // Register `temp1` is not trashed by the read barrier emitted
2350 // by GenerateFieldLoadWithBakerReadBarrier below, as that
2351 // method produces a call to a ReadBarrierMarkRegX entry point,
2352 // which saves all potentially live registers, including
2353 // temporaries such a `temp1`.
2354 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2355 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2356 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
2357 __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
2358 // If heap poisoning is enabled, `temp2` has been unpoisoned
2359 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2360 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2361 __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
2362 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2363 __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
2364 }
2365
2366 // For the same reason given earlier, `temp1` is not trashed by the
2367 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2368 // /* HeapReference<Class> */ temp2 = src->klass_
2369 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2370 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
2371 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2372 __ Cmp(temp1, temp2);
2373
2374 if (optimizations.GetDestinationIsTypedObjectArray()) {
2375 vixl32::Label do_copy;
2376 __ B(eq, &do_copy, /* far_target */ false);
2377 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2378 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2379 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
2380 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2381 // We do not need to emit a read barrier for the following
2382 // heap reference load, as `temp1` is only used in a
2383 // comparison with null below, and this reference is not
2384 // kept afterwards.
2385 __ Ldr(temp1, MemOperand(temp1, super_offset));
2386 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2387 __ Bind(&do_copy);
2388 } else {
2389 __ B(ne, intrinsic_slow_path->GetEntryLabel());
2390 }
2391 } else {
2392 // Non read barrier code.
2393
2394 // /* HeapReference<Class> */ temp1 = dest->klass_
2395 __ Ldr(temp1, MemOperand(dest, class_offset));
2396 // /* HeapReference<Class> */ temp2 = src->klass_
2397 __ Ldr(temp2, MemOperand(src, class_offset));
2398 bool did_unpoison = false;
2399 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2400 !optimizations.GetSourceIsNonPrimitiveArray()) {
2401 // One or two of the references need to be unpoisoned. Unpoison them
2402 // both to make the identity check valid.
2403 assembler->MaybeUnpoisonHeapReference(temp1);
2404 assembler->MaybeUnpoisonHeapReference(temp2);
2405 did_unpoison = true;
2406 }
2407
2408 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2409 // Bail out if the destination is not a non primitive array.
2410 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2411 __ Ldr(temp3, MemOperand(temp1, component_offset));
2412 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2413 assembler->MaybeUnpoisonHeapReference(temp3);
2414 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2415 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2416 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2417 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2418 }
2419
2420 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2421 // Bail out if the source is not a non primitive array.
2422 // /* HeapReference<Class> */ temp3 = temp2->component_type_
2423 __ Ldr(temp3, MemOperand(temp2, component_offset));
2424 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2425 assembler->MaybeUnpoisonHeapReference(temp3);
2426 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2427 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2428 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2429 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2430 }
2431
2432 __ Cmp(temp1, temp2);
2433
2434 if (optimizations.GetDestinationIsTypedObjectArray()) {
2435 vixl32::Label do_copy;
2436 __ B(eq, &do_copy, /* far_target */ false);
2437 if (!did_unpoison) {
2438 assembler->MaybeUnpoisonHeapReference(temp1);
2439 }
2440 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2441 __ Ldr(temp1, MemOperand(temp1, component_offset));
2442 assembler->MaybeUnpoisonHeapReference(temp1);
2443 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2444 __ Ldr(temp1, MemOperand(temp1, super_offset));
2445 // No need to unpoison the result, we're comparing against null.
2446 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2447 __ Bind(&do_copy);
2448 } else {
2449 __ B(ne, intrinsic_slow_path->GetEntryLabel());
2450 }
2451 }
2452 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2453 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2454 // Bail out if the source is not a non primitive array.
2455 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2456 // /* HeapReference<Class> */ temp1 = src->klass_
2457 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2458 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
2459 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2460 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2461 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
2462 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2463 // If heap poisoning is enabled, `temp3` has been unpoisoned
2464 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2465 } else {
2466 // /* HeapReference<Class> */ temp1 = src->klass_
2467 __ Ldr(temp1, MemOperand(src, class_offset));
2468 assembler->MaybeUnpoisonHeapReference(temp1);
2469 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2470 __ Ldr(temp3, MemOperand(temp1, component_offset));
2471 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2472 assembler->MaybeUnpoisonHeapReference(temp3);
2473 }
2474 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2475 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2476 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2477 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2478 }
2479
2480 if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
2481 // Null constant length: not need to emit the loop code at all.
2482 } else {
2483 vixl32::Label done;
2484 const DataType::Type type = DataType::Type::kReference;
2485 const int32_t element_size = DataType::Size(type);
2486
2487 if (length.IsRegister()) {
2488 // Don't enter the copy loop if the length is null.
2489 __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false);
2490 }
2491
2492 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2493 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2494
2495 // SystemArrayCopy implementation for Baker read barriers (see
2496 // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
2497 //
2498 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2499 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
2500 // bool is_gray = (rb_state == ReadBarrier::GrayState());
2501 // if (is_gray) {
2502 // // Slow-path copy.
2503 // do {
2504 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2505 // } while (src_ptr != end_ptr)
2506 // } else {
2507 // // Fast-path copy.
2508 // do {
2509 // *dest_ptr++ = *src_ptr++;
2510 // } while (src_ptr != end_ptr)
2511 // }
2512
2513 // /* int32_t */ monitor = src->monitor_
2514 __ Ldr(temp2, MemOperand(src, monitor_offset));
2515 // /* LockWord */ lock_word = LockWord(monitor)
2516 static_assert(sizeof(LockWord) == sizeof(int32_t),
2517 "art::LockWord and int32_t have different sizes.");
2518
2519 // Introduce a dependency on the lock_word including the rb_state,
2520 // which shall prevent load-load reordering without using
2521 // a memory barrier (which would be more expensive).
2522 // `src` is unchanged by this operation, but its value now depends
2523 // on `temp2`.
2524 __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2525
2526 // Compute the base source address in `temp1`.
2527 // Note that `temp1` (the base source address) is computed from
2528 // `src` (and `src_pos`) here, and thus honors the artificial
2529 // dependency of `src` on `temp2`.
2530 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2531 // Compute the end source address in `temp3`.
2532 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2533 // The base destination address is computed later, as `temp2` is
2534 // used for intermediate computations.
2535
2536 // Slow path used to copy array when `src` is gray.
2537 // Note that the base destination address is computed in `temp2`
2538 // by the slow path code.
2539 SlowPathCodeARMVIXL* read_barrier_slow_path =
2540 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2541 codegen_->AddSlowPath(read_barrier_slow_path);
2542
2543 // Given the numeric representation, it's enough to check the low bit of the
2544 // rb_state. We do that by shifting the bit out of the lock word with LSRS
2545 // which can be a 16-bit instruction unlike the TST immediate.
2546 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2547 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2548 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2549 // Carry flag is the last bit shifted out by LSRS.
2550 __ B(cs, read_barrier_slow_path->GetEntryLabel());
2551
2552 // Fast-path copy.
2553 // Compute the base destination address in `temp2`.
2554 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2555 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2556 // poison/unpoison.
2557 vixl32::Label loop;
2558 __ Bind(&loop);
2559 {
2560 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2561 const vixl32::Register temp_reg = temps.Acquire();
2562 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2563 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2564 }
2565 __ Cmp(temp1, temp3);
2566 __ B(ne, &loop, /* far_target */ false);
2567
2568 __ Bind(read_barrier_slow_path->GetExitLabel());
2569 } else {
2570 // Non read barrier code.
2571 // Compute the base source address in `temp1`.
2572 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2573 // Compute the base destination address in `temp2`.
2574 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2575 // Compute the end source address in `temp3`.
2576 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2577 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2578 // poison/unpoison.
2579 vixl32::Label loop;
2580 __ Bind(&loop);
2581 {
2582 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2583 const vixl32::Register temp_reg = temps.Acquire();
2584 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2585 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2586 }
2587 __ Cmp(temp1, temp3);
2588 __ B(ne, &loop, /* far_target */ false);
2589 }
2590 __ Bind(&done);
2591 }
2592
2593 // We only need one card marking on the destination array.
2594 codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
2595
2596 __ Bind(intrinsic_slow_path->GetExitLabel());
2597 }
2598
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2599 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2600 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2601 // the code generator. Furthermore, the register allocator creates fixed live intervals
2602 // for all caller-saved registers because we are doing a function call. As a result, if
2603 // the input and output locations are unallocated, the register allocator runs out of
2604 // registers and fails; however, a debuggable graph is not the common case.
2605 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2606 return;
2607 }
2608
2609 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2610 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
2611 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
2612
2613 LocationSummary* const locations =
2614 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2615 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2616
2617 locations->SetInAt(0, Location::RequiresFpuRegister());
2618 locations->SetOut(Location::RequiresFpuRegister());
2619 // Native code uses the soft float ABI.
2620 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2621 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2622 }
2623
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2624 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2625 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2626 // the code generator. Furthermore, the register allocator creates fixed live intervals
2627 // for all caller-saved registers because we are doing a function call. As a result, if
2628 // the input and output locations are unallocated, the register allocator runs out of
2629 // registers and fails; however, a debuggable graph is not the common case.
2630 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2631 return;
2632 }
2633
2634 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2635 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
2636 DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
2637 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
2638
2639 LocationSummary* const locations =
2640 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2641 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2642
2643 locations->SetInAt(0, Location::RequiresFpuRegister());
2644 locations->SetInAt(1, Location::RequiresFpuRegister());
2645 locations->SetOut(Location::RequiresFpuRegister());
2646 // Native code uses the soft float ABI.
2647 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2648 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2649 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2650 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2651 }
2652
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)2653 static void GenFPToFPCall(HInvoke* invoke,
2654 ArmVIXLAssembler* assembler,
2655 CodeGeneratorARMVIXL* codegen,
2656 QuickEntrypointEnum entry) {
2657 LocationSummary* const locations = invoke->GetLocations();
2658
2659 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2660 DCHECK(locations->WillCall() && locations->Intrinsified());
2661
2662 // Native code uses the soft float ABI.
2663 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2664 RegisterFrom(locations->GetTemp(1)),
2665 InputDRegisterAt(invoke, 0));
2666 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2667 __ Vmov(OutputDRegister(invoke),
2668 RegisterFrom(locations->GetTemp(0)),
2669 RegisterFrom(locations->GetTemp(1)));
2670 }
2671
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)2672 static void GenFPFPToFPCall(HInvoke* invoke,
2673 ArmVIXLAssembler* assembler,
2674 CodeGeneratorARMVIXL* codegen,
2675 QuickEntrypointEnum entry) {
2676 LocationSummary* const locations = invoke->GetLocations();
2677
2678 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2679 DCHECK(locations->WillCall() && locations->Intrinsified());
2680
2681 // Native code uses the soft float ABI.
2682 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2683 RegisterFrom(locations->GetTemp(1)),
2684 InputDRegisterAt(invoke, 0));
2685 __ Vmov(RegisterFrom(locations->GetTemp(2)),
2686 RegisterFrom(locations->GetTemp(3)),
2687 InputDRegisterAt(invoke, 1));
2688 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2689 __ Vmov(OutputDRegister(invoke),
2690 RegisterFrom(locations->GetTemp(0)),
2691 RegisterFrom(locations->GetTemp(1)));
2692 }
2693
VisitMathCos(HInvoke * invoke)2694 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2695 CreateFPToFPCallLocations(allocator_, invoke);
2696 }
2697
VisitMathCos(HInvoke * invoke)2698 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2699 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2700 }
2701
VisitMathSin(HInvoke * invoke)2702 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2703 CreateFPToFPCallLocations(allocator_, invoke);
2704 }
2705
VisitMathSin(HInvoke * invoke)2706 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2707 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2708 }
2709
VisitMathAcos(HInvoke * invoke)2710 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2711 CreateFPToFPCallLocations(allocator_, invoke);
2712 }
2713
VisitMathAcos(HInvoke * invoke)2714 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2715 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2716 }
2717
VisitMathAsin(HInvoke * invoke)2718 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2719 CreateFPToFPCallLocations(allocator_, invoke);
2720 }
2721
VisitMathAsin(HInvoke * invoke)2722 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2723 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2724 }
2725
VisitMathAtan(HInvoke * invoke)2726 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2727 CreateFPToFPCallLocations(allocator_, invoke);
2728 }
2729
VisitMathAtan(HInvoke * invoke)2730 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2731 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2732 }
2733
VisitMathCbrt(HInvoke * invoke)2734 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2735 CreateFPToFPCallLocations(allocator_, invoke);
2736 }
2737
VisitMathCbrt(HInvoke * invoke)2738 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2739 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2740 }
2741
VisitMathCosh(HInvoke * invoke)2742 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2743 CreateFPToFPCallLocations(allocator_, invoke);
2744 }
2745
VisitMathCosh(HInvoke * invoke)2746 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2747 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2748 }
2749
VisitMathExp(HInvoke * invoke)2750 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2751 CreateFPToFPCallLocations(allocator_, invoke);
2752 }
2753
VisitMathExp(HInvoke * invoke)2754 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2755 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2756 }
2757
VisitMathExpm1(HInvoke * invoke)2758 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2759 CreateFPToFPCallLocations(allocator_, invoke);
2760 }
2761
VisitMathExpm1(HInvoke * invoke)2762 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2763 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2764 }
2765
VisitMathLog(HInvoke * invoke)2766 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2767 CreateFPToFPCallLocations(allocator_, invoke);
2768 }
2769
VisitMathLog(HInvoke * invoke)2770 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2771 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2772 }
2773
VisitMathLog10(HInvoke * invoke)2774 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2775 CreateFPToFPCallLocations(allocator_, invoke);
2776 }
2777
VisitMathLog10(HInvoke * invoke)2778 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2779 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2780 }
2781
VisitMathSinh(HInvoke * invoke)2782 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2783 CreateFPToFPCallLocations(allocator_, invoke);
2784 }
2785
VisitMathSinh(HInvoke * invoke)2786 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2787 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2788 }
2789
VisitMathTan(HInvoke * invoke)2790 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2791 CreateFPToFPCallLocations(allocator_, invoke);
2792 }
2793
VisitMathTan(HInvoke * invoke)2794 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2795 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2796 }
2797
VisitMathTanh(HInvoke * invoke)2798 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2799 CreateFPToFPCallLocations(allocator_, invoke);
2800 }
2801
VisitMathTanh(HInvoke * invoke)2802 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2803 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2804 }
2805
VisitMathAtan2(HInvoke * invoke)2806 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2807 CreateFPFPToFPCallLocations(allocator_, invoke);
2808 }
2809
VisitMathAtan2(HInvoke * invoke)2810 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2811 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2812 }
2813
VisitMathPow(HInvoke * invoke)2814 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
2815 CreateFPFPToFPCallLocations(allocator_, invoke);
2816 }
2817
VisitMathPow(HInvoke * invoke)2818 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
2819 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
2820 }
2821
VisitMathHypot(HInvoke * invoke)2822 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2823 CreateFPFPToFPCallLocations(allocator_, invoke);
2824 }
2825
VisitMathHypot(HInvoke * invoke)2826 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2827 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2828 }
2829
VisitMathNextAfter(HInvoke * invoke)2830 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2831 CreateFPFPToFPCallLocations(allocator_, invoke);
2832 }
2833
VisitMathNextAfter(HInvoke * invoke)2834 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2835 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2836 }
2837
VisitIntegerReverse(HInvoke * invoke)2838 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2839 CreateIntToIntLocations(allocator_, invoke);
2840 }
2841
VisitIntegerReverse(HInvoke * invoke)2842 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2843 ArmVIXLAssembler* assembler = GetAssembler();
2844 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2845 }
2846
VisitLongReverse(HInvoke * invoke)2847 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
2848 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2849 }
2850
VisitLongReverse(HInvoke * invoke)2851 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2852 ArmVIXLAssembler* assembler = GetAssembler();
2853 LocationSummary* locations = invoke->GetLocations();
2854
2855 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2856 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2857 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2858 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2859
2860 __ Rbit(out_reg_lo, in_reg_hi);
2861 __ Rbit(out_reg_hi, in_reg_lo);
2862 }
2863
VisitIntegerReverseBytes(HInvoke * invoke)2864 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2865 CreateIntToIntLocations(allocator_, invoke);
2866 }
2867
VisitIntegerReverseBytes(HInvoke * invoke)2868 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2869 ArmVIXLAssembler* assembler = GetAssembler();
2870 __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2871 }
2872
VisitLongReverseBytes(HInvoke * invoke)2873 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2874 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2875 }
2876
VisitLongReverseBytes(HInvoke * invoke)2877 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2878 ArmVIXLAssembler* assembler = GetAssembler();
2879 LocationSummary* locations = invoke->GetLocations();
2880
2881 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2882 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2883 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2884 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2885
2886 __ Rev(out_reg_lo, in_reg_hi);
2887 __ Rev(out_reg_hi, in_reg_lo);
2888 }
2889
VisitShortReverseBytes(HInvoke * invoke)2890 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2891 CreateIntToIntLocations(allocator_, invoke);
2892 }
2893
VisitShortReverseBytes(HInvoke * invoke)2894 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2895 ArmVIXLAssembler* assembler = GetAssembler();
2896 __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2897 }
2898
GenBitCount(HInvoke * instr,DataType::Type type,ArmVIXLAssembler * assembler)2899 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
2900 DCHECK(DataType::IsIntOrLongType(type)) << type;
2901 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
2902 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
2903
2904 bool is_long = type == DataType::Type::kInt64;
2905 LocationSummary* locations = instr->GetLocations();
2906 Location in = locations->InAt(0);
2907 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2908 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2909 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2910 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2911 vixl32::Register out_r = OutputRegister(instr);
2912
2913 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2914 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2915 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2916 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2917 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
2918 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
2919 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c|
2920 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c|
2921 if (is_long) {
2922 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c|
2923 }
2924 __ Vmov(out_r, tmp_s);
2925 }
2926
VisitIntegerBitCount(HInvoke * invoke)2927 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2928 CreateIntToIntLocations(allocator_, invoke);
2929 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2930 }
2931
VisitIntegerBitCount(HInvoke * invoke)2932 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2933 GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
2934 }
2935
VisitLongBitCount(HInvoke * invoke)2936 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2937 VisitIntegerBitCount(invoke);
2938 }
2939
VisitLongBitCount(HInvoke * invoke)2940 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2941 GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
2942 }
2943
GenHighestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2944 static void GenHighestOneBit(HInvoke* invoke,
2945 DataType::Type type,
2946 CodeGeneratorARMVIXL* codegen) {
2947 DCHECK(DataType::IsIntOrLongType(type));
2948
2949 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2950 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2951 const vixl32::Register temp = temps.Acquire();
2952
2953 if (type == DataType::Type::kInt64) {
2954 LocationSummary* locations = invoke->GetLocations();
2955 Location in = locations->InAt(0);
2956 Location out = locations->Out();
2957
2958 vixl32::Register in_reg_lo = LowRegisterFrom(in);
2959 vixl32::Register in_reg_hi = HighRegisterFrom(in);
2960 vixl32::Register out_reg_lo = LowRegisterFrom(out);
2961 vixl32::Register out_reg_hi = HighRegisterFrom(out);
2962
2963 __ Mov(temp, 0x80000000); // Modified immediate.
2964 __ Clz(out_reg_lo, in_reg_lo);
2965 __ Clz(out_reg_hi, in_reg_hi);
2966 __ Lsr(out_reg_lo, temp, out_reg_lo);
2967 __ Lsrs(out_reg_hi, temp, out_reg_hi);
2968
2969 // Discard result for lowest 32 bits if highest 32 bits are not zero.
2970 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2971 // we check that the output is in a low register, so that a 16-bit MOV
2972 // encoding can be used. If output is in a high register, then we generate
2973 // 4 more bytes of code to avoid a branch.
2974 Operand mov_src(0);
2975 if (!out_reg_lo.IsLow()) {
2976 __ Mov(LeaveFlags, temp, 0);
2977 mov_src = Operand(temp);
2978 }
2979 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2980 2 * vixl32::k16BitT32InstructionSizeInBytes,
2981 CodeBufferCheckScope::kExactSize);
2982 __ it(ne);
2983 __ mov(ne, out_reg_lo, mov_src);
2984 } else {
2985 vixl32::Register out = OutputRegister(invoke);
2986 vixl32::Register in = InputRegisterAt(invoke, 0);
2987
2988 __ Mov(temp, 0x80000000); // Modified immediate.
2989 __ Clz(out, in);
2990 __ Lsr(out, temp, out);
2991 }
2992 }
2993
VisitIntegerHighestOneBit(HInvoke * invoke)2994 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2995 CreateIntToIntLocations(allocator_, invoke);
2996 }
2997
VisitIntegerHighestOneBit(HInvoke * invoke)2998 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2999 GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
3000 }
3001
VisitLongHighestOneBit(HInvoke * invoke)3002 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
3003 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
3004 }
3005
VisitLongHighestOneBit(HInvoke * invoke)3006 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
3007 GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
3008 }
3009
GenLowestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)3010 static void GenLowestOneBit(HInvoke* invoke,
3011 DataType::Type type,
3012 CodeGeneratorARMVIXL* codegen) {
3013 DCHECK(DataType::IsIntOrLongType(type));
3014
3015 ArmVIXLAssembler* assembler = codegen->GetAssembler();
3016 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3017 const vixl32::Register temp = temps.Acquire();
3018
3019 if (type == DataType::Type::kInt64) {
3020 LocationSummary* locations = invoke->GetLocations();
3021 Location in = locations->InAt(0);
3022 Location out = locations->Out();
3023
3024 vixl32::Register in_reg_lo = LowRegisterFrom(in);
3025 vixl32::Register in_reg_hi = HighRegisterFrom(in);
3026 vixl32::Register out_reg_lo = LowRegisterFrom(out);
3027 vixl32::Register out_reg_hi = HighRegisterFrom(out);
3028
3029 __ Rsb(out_reg_hi, in_reg_hi, 0);
3030 __ Rsb(out_reg_lo, in_reg_lo, 0);
3031 __ And(out_reg_hi, out_reg_hi, in_reg_hi);
3032 // The result of this operation is 0 iff in_reg_lo is 0
3033 __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
3034
3035 // Discard result for highest 32 bits if lowest 32 bits are not zero.
3036 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
3037 // we check that the output is in a low register, so that a 16-bit MOV
3038 // encoding can be used. If output is in a high register, then we generate
3039 // 4 more bytes of code to avoid a branch.
3040 Operand mov_src(0);
3041 if (!out_reg_lo.IsLow()) {
3042 __ Mov(LeaveFlags, temp, 0);
3043 mov_src = Operand(temp);
3044 }
3045 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
3046 2 * vixl32::k16BitT32InstructionSizeInBytes,
3047 CodeBufferCheckScope::kExactSize);
3048 __ it(ne);
3049 __ mov(ne, out_reg_hi, mov_src);
3050 } else {
3051 vixl32::Register out = OutputRegister(invoke);
3052 vixl32::Register in = InputRegisterAt(invoke, 0);
3053
3054 __ Rsb(temp, in, 0);
3055 __ And(out, temp, in);
3056 }
3057 }
3058
VisitIntegerLowestOneBit(HInvoke * invoke)3059 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
3060 CreateIntToIntLocations(allocator_, invoke);
3061 }
3062
VisitIntegerLowestOneBit(HInvoke * invoke)3063 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
3064 GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
3065 }
3066
VisitLongLowestOneBit(HInvoke * invoke)3067 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
3068 CreateLongToLongLocationsWithOverlap(allocator_, invoke);
3069 }
3070
VisitLongLowestOneBit(HInvoke * invoke)3071 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
3072 GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
3073 }
3074
VisitStringGetCharsNoCheck(HInvoke * invoke)3075 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
3076 LocationSummary* locations =
3077 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3078 locations->SetInAt(0, Location::RequiresRegister());
3079 locations->SetInAt(1, Location::RequiresRegister());
3080 locations->SetInAt(2, Location::RequiresRegister());
3081 locations->SetInAt(3, Location::RequiresRegister());
3082 locations->SetInAt(4, Location::RequiresRegister());
3083
3084 // Temporary registers to store lengths of strings and for calculations.
3085 locations->AddTemp(Location::RequiresRegister());
3086 locations->AddTemp(Location::RequiresRegister());
3087 locations->AddTemp(Location::RequiresRegister());
3088 }
3089
VisitStringGetCharsNoCheck(HInvoke * invoke)3090 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
3091 ArmVIXLAssembler* assembler = GetAssembler();
3092 LocationSummary* locations = invoke->GetLocations();
3093
3094 // Check assumption that sizeof(Char) is 2 (used in scaling below).
3095 const size_t char_size = DataType::Size(DataType::Type::kUint16);
3096 DCHECK_EQ(char_size, 2u);
3097
3098 // Location of data in char array buffer.
3099 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
3100
3101 // Location of char array data in string.
3102 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
3103
3104 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
3105 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
3106 vixl32::Register srcObj = InputRegisterAt(invoke, 0);
3107 vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
3108 vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
3109 vixl32::Register dstObj = InputRegisterAt(invoke, 3);
3110 vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
3111
3112 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
3113 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
3114 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
3115
3116 vixl32::Label done, compressed_string_loop;
3117 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
3118 // dst to be copied.
3119 __ Add(dst_ptr, dstObj, data_offset);
3120 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
3121
3122 __ Subs(num_chr, srcEnd, srcBegin);
3123 // Early out for valid zero-length retrievals.
3124 __ B(eq, final_label, /* far_target */ false);
3125
3126 // src range to copy.
3127 __ Add(src_ptr, srcObj, value_offset);
3128
3129 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3130 vixl32::Register temp;
3131 vixl32::Label compressed_string_preloop;
3132 if (mirror::kUseStringCompression) {
3133 // Location of count in string.
3134 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
3135 temp = temps.Acquire();
3136 // String's length.
3137 __ Ldr(temp, MemOperand(srcObj, count_offset));
3138 __ Tst(temp, 1);
3139 temps.Release(temp);
3140 __ B(eq, &compressed_string_preloop, /* far_target */ false);
3141 }
3142 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
3143
3144 // Do the copy.
3145 vixl32::Label loop, remainder;
3146
3147 temp = temps.Acquire();
3148 // Save repairing the value of num_chr on the < 4 character path.
3149 __ Subs(temp, num_chr, 4);
3150 __ B(lt, &remainder, /* far_target */ false);
3151
3152 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
3153 __ Mov(num_chr, temp);
3154
3155 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
3156 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
3157 // to rectify these everywhere this intrinsic applies.)
3158 __ Bind(&loop);
3159 __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
3160 __ Subs(num_chr, num_chr, 4);
3161 __ Str(temp, MemOperand(dst_ptr, char_size * 2));
3162 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
3163 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
3164 temps.Release(temp);
3165 __ B(ge, &loop, /* far_target */ false);
3166
3167 __ Adds(num_chr, num_chr, 4);
3168 __ B(eq, final_label, /* far_target */ false);
3169
3170 // Main loop for < 4 character case and remainder handling. Loads and stores one
3171 // 16-bit Java character at a time.
3172 __ Bind(&remainder);
3173 temp = temps.Acquire();
3174 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
3175 __ Subs(num_chr, num_chr, 1);
3176 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
3177 temps.Release(temp);
3178 __ B(gt, &remainder, /* far_target */ false);
3179
3180 if (mirror::kUseStringCompression) {
3181 __ B(final_label);
3182
3183 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
3184 DCHECK_EQ(c_char_size, 1u);
3185 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
3186 __ Bind(&compressed_string_preloop);
3187 __ Add(src_ptr, src_ptr, srcBegin);
3188 __ Bind(&compressed_string_loop);
3189 temp = temps.Acquire();
3190 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
3191 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
3192 temps.Release(temp);
3193 __ Subs(num_chr, num_chr, 1);
3194 __ B(gt, &compressed_string_loop, /* far_target */ false);
3195 }
3196
3197 if (done.IsReferenced()) {
3198 __ Bind(&done);
3199 }
3200 }
3201
VisitFloatIsInfinite(HInvoke * invoke)3202 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
3203 CreateFPToIntLocations(allocator_, invoke);
3204 }
3205
VisitFloatIsInfinite(HInvoke * invoke)3206 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
3207 ArmVIXLAssembler* const assembler = GetAssembler();
3208 const vixl32::Register out = OutputRegister(invoke);
3209 // Shifting left by 1 bit makes the value encodable as an immediate operand;
3210 // we don't care about the sign bit anyway.
3211 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
3212
3213 __ Vmov(out, InputSRegisterAt(invoke, 0));
3214 // We don't care about the sign bit, so shift left.
3215 __ Lsl(out, out, 1);
3216 __ Eor(out, out, infinity);
3217 codegen_->GenerateConditionWithZero(kCondEQ, out, out);
3218 }
3219
VisitDoubleIsInfinite(HInvoke * invoke)3220 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
3221 CreateFPToIntLocations(allocator_, invoke);
3222 }
3223
VisitDoubleIsInfinite(HInvoke * invoke)3224 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
3225 ArmVIXLAssembler* const assembler = GetAssembler();
3226 const vixl32::Register out = OutputRegister(invoke);
3227 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3228 const vixl32::Register temp = temps.Acquire();
3229 // The highest 32 bits of double precision positive infinity separated into
3230 // two constants encodable as immediate operands.
3231 constexpr uint32_t infinity_high = 0x7f000000U;
3232 constexpr uint32_t infinity_high2 = 0x00f00000U;
3233
3234 static_assert((infinity_high | infinity_high2) ==
3235 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
3236 "The constants do not add up to the high 32 bits of double "
3237 "precision positive infinity.");
3238 __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
3239 __ Eor(out, out, infinity_high);
3240 __ Eor(out, out, infinity_high2);
3241 // We don't care about the sign bit, so shift left.
3242 __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
3243 codegen_->GenerateConditionWithZero(kCondEQ, out, out);
3244 }
3245
VisitMathCeil(HInvoke * invoke)3246 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
3247 if (features_.HasARMv8AInstructions()) {
3248 CreateFPToFPLocations(allocator_, invoke);
3249 }
3250 }
3251
VisitMathCeil(HInvoke * invoke)3252 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
3253 ArmVIXLAssembler* assembler = GetAssembler();
3254 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
3255 __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
3256 }
3257
VisitMathFloor(HInvoke * invoke)3258 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
3259 if (features_.HasARMv8AInstructions()) {
3260 CreateFPToFPLocations(allocator_, invoke);
3261 }
3262 }
3263
VisitMathFloor(HInvoke * invoke)3264 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
3265 ArmVIXLAssembler* assembler = GetAssembler();
3266 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
3267 __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
3268 }
3269
VisitIntegerValueOf(HInvoke * invoke)3270 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
3271 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3272 IntrinsicVisitor::ComputeIntegerValueOfLocations(
3273 invoke,
3274 codegen_,
3275 LocationFrom(r0),
3276 LocationFrom(calling_convention.GetRegisterAt(0)));
3277 }
3278
VisitIntegerValueOf(HInvoke * invoke)3279 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
3280 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
3281 LocationSummary* locations = invoke->GetLocations();
3282 ArmVIXLAssembler* const assembler = GetAssembler();
3283
3284 vixl32::Register out = RegisterFrom(locations->Out());
3285 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3286 vixl32::Register temp = temps.Acquire();
3287 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3288 vixl32::Register argument = calling_convention.GetRegisterAt(0);
3289 if (invoke->InputAt(0)->IsConstant()) {
3290 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3291 if (value >= info.low && value <= info.high) {
3292 // Just embed the j.l.Integer in the code.
3293 ScopedObjectAccess soa(Thread::Current());
3294 mirror::Object* boxed = info.cache->Get(value + (-info.low));
3295 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
3296 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
3297 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
3298 } else {
3299 // Allocate and initialize a new j.l.Integer.
3300 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3301 // JIT object table.
3302 uint32_t address =
3303 dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3304 __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
3305 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3306 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3307 __ Mov(temp, value);
3308 assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
3309 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3310 // one.
3311 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3312 }
3313 } else {
3314 vixl32::Register in = RegisterFrom(locations->InAt(0));
3315 // Check bounds of our cache.
3316 __ Add(out, in, -info.low);
3317 __ Cmp(out, info.high - info.low + 1);
3318 vixl32::Label allocate, done;
3319 __ B(hs, &allocate, /* is_far_target */ false);
3320 // If the value is within the bounds, load the j.l.Integer directly from the array.
3321 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3322 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3323 __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
3324 codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
3325 assembler->MaybeUnpoisonHeapReference(out);
3326 __ B(&done);
3327 __ Bind(&allocate);
3328 // Otherwise allocate and initialize a new j.l.Integer.
3329 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3330 __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
3331 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3332 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3333 assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
3334 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3335 // one.
3336 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3337 __ Bind(&done);
3338 }
3339 }
3340
VisitThreadInterrupted(HInvoke * invoke)3341 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
3342 LocationSummary* locations =
3343 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3344 locations->SetOut(Location::RequiresRegister());
3345 }
3346
VisitThreadInterrupted(HInvoke * invoke)3347 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
3348 ArmVIXLAssembler* assembler = GetAssembler();
3349 vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
3350 int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
3351 __ Ldr(out, MemOperand(tr, offset));
3352 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3353 vixl32::Register temp = temps.Acquire();
3354 vixl32::Label done;
3355 vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
3356 __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
3357 __ Dmb(vixl32::ISH);
3358 __ Mov(temp, 0);
3359 assembler->StoreToOffset(kStoreWord, temp, tr, offset);
3360 __ Dmb(vixl32::ISH);
3361 if (done.IsReferenced()) {
3362 __ Bind(&done);
3363 }
3364 }
3365
VisitReachabilityFence(HInvoke * invoke)3366 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
3367 LocationSummary* locations =
3368 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3369 locations->SetInAt(0, Location::Any());
3370 }
3371
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3372 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3373
3374 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
3375 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
3376 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
3377 UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
3378
3379 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
3380 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
3381 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
3382 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
3383 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
3384 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend);
3385 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
3386 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
3387
3388 // 1.8.
3389 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
3390 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
3391 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
3392 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
3393 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
3394
3395 UNREACHABLE_INTRINSICS(ARMVIXL)
3396
3397 #undef __
3398
3399 } // namespace arm
3400 } // namespace art
3401