• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm_vixl.h"
18 
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "art_method.h"
21 #include "code_generator_arm_vixl.h"
22 #include "common_arm.h"
23 #include "heap_poisoning.h"
24 #include "lock_word.h"
25 #include "mirror/array-inl.h"
26 #include "mirror/object_array-inl.h"
27 #include "mirror/reference.h"
28 #include "mirror/string.h"
29 #include "scoped_thread_state_change-inl.h"
30 #include "thread-current-inl.h"
31 
32 #include "aarch32/constants-aarch32.h"
33 
34 namespace art {
35 namespace arm {
36 
37 #define __ assembler->GetVIXLAssembler()->
38 
39 using helpers::DRegisterFrom;
40 using helpers::HighRegisterFrom;
41 using helpers::InputDRegisterAt;
42 using helpers::InputRegisterAt;
43 using helpers::InputSRegisterAt;
44 using helpers::InputVRegisterAt;
45 using helpers::Int32ConstantFrom;
46 using helpers::LocationFrom;
47 using helpers::LowRegisterFrom;
48 using helpers::LowSRegisterFrom;
49 using helpers::HighSRegisterFrom;
50 using helpers::OutputDRegister;
51 using helpers::OutputSRegister;
52 using helpers::OutputRegister;
53 using helpers::OutputVRegister;
54 using helpers::RegisterFrom;
55 using helpers::SRegisterFrom;
56 using helpers::DRegisterFromS;
57 
58 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
59 
60 using vixl::ExactAssemblyScope;
61 using vixl::CodeBufferCheckScope;
62 
GetAssembler()63 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
64   return codegen_->GetAssembler();
65 }
66 
GetAllocator()67 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
68   return codegen_->GetGraph()->GetAllocator();
69 }
70 
71 // Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
72 // intrinsified call. This will copy the arguments into the positions for a regular call.
73 //
74 // Note: The actual parameters are required to be in the locations given by the invoke's location
75 //       summary. If an intrinsic modifies those locations before a slowpath call, they must be
76 //       restored!
77 //
78 // Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
79 //       sub-optimal (compared to a direct pointer call), but this is a slow-path.
80 
81 class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
82  public:
IntrinsicSlowPathARMVIXL(HInvoke * invoke)83   explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
84       : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
85 
MoveArguments(CodeGenerator * codegen)86   Location MoveArguments(CodeGenerator* codegen) {
87     InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
88     IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
89     return calling_convention_visitor.GetMethodLocation();
90   }
91 
EmitNativeCode(CodeGenerator * codegen)92   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
93     ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
94     __ Bind(GetEntryLabel());
95 
96     SaveLiveRegisters(codegen, invoke_->GetLocations());
97 
98     Location method_loc = MoveArguments(codegen);
99 
100     if (invoke_->IsInvokeStaticOrDirect()) {
101       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this);
102     } else {
103       codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this);
104     }
105 
106     // Copy the result back to the expected output.
107     Location out = invoke_->GetLocations()->Out();
108     if (out.IsValid()) {
109       DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
110       DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
111       codegen->MoveFromReturnRegister(out, invoke_->GetType());
112     }
113 
114     RestoreLiveRegisters(codegen, invoke_->GetLocations());
115     __ B(GetExitLabel());
116   }
117 
GetDescription() const118   const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
119 
120  private:
121   // The instruction where this slow path is happening.
122   HInvoke* const invoke_;
123 
124   DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
125 };
126 
127 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(ArmVIXLAssembler * assembler,DataType::Type type,const vixl32::Register & array,const Location & pos,const vixl32::Register & base)128 static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
129                                           DataType::Type type,
130                                           const vixl32::Register& array,
131                                           const Location& pos,
132                                           const vixl32::Register& base) {
133   // This routine is only used by the SystemArrayCopy intrinsic at the
134   // moment. We can allow DataType::Type::kReference as `type` to implement
135   // the SystemArrayCopyChar intrinsic.
136   DCHECK_EQ(type, DataType::Type::kReference);
137   const int32_t element_size = DataType::Size(type);
138   const uint32_t element_size_shift = DataType::SizeShift(type);
139   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
140 
141   if (pos.IsConstant()) {
142     int32_t constant = Int32ConstantFrom(pos);
143     __ Add(base, array, element_size * constant + data_offset);
144   } else {
145     __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
146     __ Add(base, base, data_offset);
147   }
148 }
149 
150 // Compute end address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(ArmVIXLAssembler * assembler,DataType::Type type,const Location & copy_length,const vixl32::Register & base,const vixl32::Register & end)151 static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
152                                          DataType::Type type,
153                                          const Location& copy_length,
154                                          const vixl32::Register& base,
155                                          const vixl32::Register& end) {
156   // This routine is only used by the SystemArrayCopy intrinsic at the
157   // moment. We can allow DataType::Type::kReference as `type` to implement
158   // the SystemArrayCopyChar intrinsic.
159   DCHECK_EQ(type, DataType::Type::kReference);
160   const int32_t element_size = DataType::Size(type);
161   const uint32_t element_size_shift = DataType::SizeShift(type);
162 
163   if (copy_length.IsConstant()) {
164     int32_t constant = Int32ConstantFrom(copy_length);
165     __ Add(end, base, element_size * constant);
166   } else {
167     __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
168   }
169 }
170 
171 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
172 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
173  public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)174   explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
175       : SlowPathCodeARMVIXL(instruction) {
176     DCHECK(kEmitCompilerReadBarrier);
177     DCHECK(kUseBakerReadBarrier);
178   }
179 
EmitNativeCode(CodeGenerator * codegen)180   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
181     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
182     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
183     LocationSummary* locations = instruction_->GetLocations();
184     DCHECK(locations->CanCall());
185     DCHECK(instruction_->IsInvokeStaticOrDirect())
186         << "Unexpected instruction in read barrier arraycopy slow path: "
187         << instruction_->DebugName();
188     DCHECK(instruction_->GetLocations()->Intrinsified());
189     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
190 
191     DataType::Type type = DataType::Type::kReference;
192     const int32_t element_size = DataType::Size(type);
193 
194     vixl32::Register dest = InputRegisterAt(instruction_, 2);
195     Location dest_pos = locations->InAt(3);
196     vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
197     vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
198     vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
199     vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
200 
201     __ Bind(GetEntryLabel());
202     // Compute the base destination address in `dst_curr_addr`.
203     GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
204 
205     vixl32::Label loop;
206     __ Bind(&loop);
207     __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
208     assembler->MaybeUnpoisonHeapReference(tmp);
209     // TODO: Inline the mark bit check before calling the runtime?
210     // tmp = ReadBarrier::Mark(tmp);
211     // No need to save live registers; it's taken care of by the
212     // entrypoint. Also, there is no need to update the stack mask,
213     // as this runtime call will not trigger a garbage collection.
214     // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
215     // explanations.)
216     DCHECK(!tmp.IsSP());
217     DCHECK(!tmp.IsLR());
218     DCHECK(!tmp.IsPC());
219     // IP is used internally by the ReadBarrierMarkRegX entry point
220     // as a temporary (and not preserved).  It thus cannot be used by
221     // any live register in this slow path.
222     DCHECK(!src_curr_addr.Is(ip));
223     DCHECK(!dst_curr_addr.Is(ip));
224     DCHECK(!src_stop_addr.Is(ip));
225     DCHECK(!tmp.Is(ip));
226     DCHECK(tmp.IsRegister()) << tmp;
227     // TODO: Load the entrypoint once before the loop, instead of
228     // loading it at every iteration.
229     int32_t entry_point_offset =
230         Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
231     // This runtime call does not require a stack map.
232     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
233     assembler->MaybePoisonHeapReference(tmp);
234     __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
235     __ Cmp(src_curr_addr, src_stop_addr);
236     __ B(ne, &loop, /* far_target */ false);
237     __ B(GetExitLabel());
238   }
239 
GetDescription() const240   const char* GetDescription() const OVERRIDE {
241     return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
242   }
243 
244  private:
245   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
246 };
247 
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)248 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
249     : allocator_(codegen->GetGraph()->GetAllocator()),
250       codegen_(codegen),
251       assembler_(codegen->GetAssembler()),
252       features_(codegen->GetInstructionSetFeatures()) {}
253 
TryDispatch(HInvoke * invoke)254 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
255   Dispatch(invoke);
256   LocationSummary* res = invoke->GetLocations();
257   if (res == nullptr) {
258     return false;
259   }
260   return res->Intrinsified();
261 }
262 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)263 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
264   LocationSummary* locations =
265       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
266   locations->SetInAt(0, Location::RequiresFpuRegister());
267   locations->SetOut(Location::RequiresRegister());
268 }
269 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)270 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
271   LocationSummary* locations =
272       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
273   locations->SetInAt(0, Location::RequiresRegister());
274   locations->SetOut(Location::RequiresFpuRegister());
275 }
276 
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)277 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
278   Location input = locations->InAt(0);
279   Location output = locations->Out();
280   if (is64bit) {
281     __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
282   } else {
283     __ Vmov(RegisterFrom(output), SRegisterFrom(input));
284   }
285 }
286 
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)287 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
288   Location input = locations->InAt(0);
289   Location output = locations->Out();
290   if (is64bit) {
291     __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
292   } else {
293     __ Vmov(SRegisterFrom(output), RegisterFrom(input));
294   }
295 }
296 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)297 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
298   CreateFPToIntLocations(allocator_, invoke);
299 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)300 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
301   CreateIntToFPLocations(allocator_, invoke);
302 }
303 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)304 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
305   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
306 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)307 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
308   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
309 }
310 
VisitFloatFloatToRawIntBits(HInvoke * invoke)311 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
312   CreateFPToIntLocations(allocator_, invoke);
313 }
VisitFloatIntBitsToFloat(HInvoke * invoke)314 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
315   CreateIntToFPLocations(allocator_, invoke);
316 }
317 
VisitFloatFloatToRawIntBits(HInvoke * invoke)318 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
319   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
320 }
VisitFloatIntBitsToFloat(HInvoke * invoke)321 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
322   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
323 }
324 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)325 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
326   LocationSummary* locations =
327       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
328   locations->SetInAt(0, Location::RequiresRegister());
329   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
330 }
331 
CreateLongToLongLocationsWithOverlap(ArenaAllocator * allocator,HInvoke * invoke)332 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) {
333   LocationSummary* locations =
334       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
335   locations->SetInAt(0, Location::RequiresRegister());
336   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
337 }
338 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)339 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
340   LocationSummary* locations =
341       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
342   locations->SetInAt(0, Location::RequiresFpuRegister());
343   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
344 }
345 
GenNumberOfLeadingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)346 static void GenNumberOfLeadingZeros(HInvoke* invoke,
347                                     DataType::Type type,
348                                     CodeGeneratorARMVIXL* codegen) {
349   ArmVIXLAssembler* assembler = codegen->GetAssembler();
350   LocationSummary* locations = invoke->GetLocations();
351   Location in = locations->InAt(0);
352   vixl32::Register out = RegisterFrom(locations->Out());
353 
354   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
355 
356   if (type == DataType::Type::kInt64) {
357     vixl32::Register in_reg_lo = LowRegisterFrom(in);
358     vixl32::Register in_reg_hi = HighRegisterFrom(in);
359     vixl32::Label end;
360     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
361     __ Clz(out, in_reg_hi);
362     __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false);
363     __ Clz(out, in_reg_lo);
364     __ Add(out, out, 32);
365     if (end.IsReferenced()) {
366       __ Bind(&end);
367     }
368   } else {
369     __ Clz(out, RegisterFrom(in));
370   }
371 }
372 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)373 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
374   CreateIntToIntLocations(allocator_, invoke);
375 }
376 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)377 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
378   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
379 }
380 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)381 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
382   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
383 }
384 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)385 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
386   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
387 }
388 
GenNumberOfTrailingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)389 static void GenNumberOfTrailingZeros(HInvoke* invoke,
390                                      DataType::Type type,
391                                      CodeGeneratorARMVIXL* codegen) {
392   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
393 
394   ArmVIXLAssembler* assembler = codegen->GetAssembler();
395   LocationSummary* locations = invoke->GetLocations();
396   vixl32::Register out = RegisterFrom(locations->Out());
397 
398   if (type == DataType::Type::kInt64) {
399     vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
400     vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
401     vixl32::Label end;
402     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
403     __ Rbit(out, in_reg_lo);
404     __ Clz(out, out);
405     __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false);
406     __ Rbit(out, in_reg_hi);
407     __ Clz(out, out);
408     __ Add(out, out, 32);
409     if (end.IsReferenced()) {
410       __ Bind(&end);
411     }
412   } else {
413     vixl32::Register in = RegisterFrom(locations->InAt(0));
414     __ Rbit(out, in);
415     __ Clz(out, out);
416   }
417 }
418 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)419 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
420   CreateIntToIntLocations(allocator_, invoke);
421 }
422 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)423 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
424   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
425 }
426 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)427 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
428   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
429 }
430 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)431 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
432   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
433 }
434 
MathAbsFP(HInvoke * invoke,ArmVIXLAssembler * assembler)435 static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
436   __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
437 }
438 
VisitMathAbsDouble(HInvoke * invoke)439 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
440   CreateFPToFPLocations(allocator_, invoke);
441 }
442 
VisitMathAbsDouble(HInvoke * invoke)443 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
444   MathAbsFP(invoke, GetAssembler());
445 }
446 
VisitMathAbsFloat(HInvoke * invoke)447 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
448   CreateFPToFPLocations(allocator_, invoke);
449 }
450 
VisitMathAbsFloat(HInvoke * invoke)451 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
452   MathAbsFP(invoke, GetAssembler());
453 }
454 
CreateIntToIntPlusTemp(ArenaAllocator * allocator,HInvoke * invoke)455 static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) {
456   LocationSummary* locations =
457       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
458   locations->SetInAt(0, Location::RequiresRegister());
459   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
460 
461   locations->AddTemp(Location::RequiresRegister());
462 }
463 
GenAbsInteger(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)464 static void GenAbsInteger(LocationSummary* locations,
465                           bool is64bit,
466                           ArmVIXLAssembler* assembler) {
467   Location in = locations->InAt(0);
468   Location output = locations->Out();
469 
470   vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
471 
472   if (is64bit) {
473     vixl32::Register in_reg_lo = LowRegisterFrom(in);
474     vixl32::Register in_reg_hi = HighRegisterFrom(in);
475     vixl32::Register out_reg_lo = LowRegisterFrom(output);
476     vixl32::Register out_reg_hi = HighRegisterFrom(output);
477 
478     DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
479 
480     __ Asr(mask, in_reg_hi, 31);
481     __ Adds(out_reg_lo, in_reg_lo, mask);
482     __ Adc(out_reg_hi, in_reg_hi, mask);
483     __ Eor(out_reg_lo, mask, out_reg_lo);
484     __ Eor(out_reg_hi, mask, out_reg_hi);
485   } else {
486     vixl32::Register in_reg = RegisterFrom(in);
487     vixl32::Register out_reg = RegisterFrom(output);
488 
489     __ Asr(mask, in_reg, 31);
490     __ Add(out_reg, in_reg, mask);
491     __ Eor(out_reg, mask, out_reg);
492   }
493 }
494 
VisitMathAbsInt(HInvoke * invoke)495 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
496   CreateIntToIntPlusTemp(allocator_, invoke);
497 }
498 
VisitMathAbsInt(HInvoke * invoke)499 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
500   GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
501 }
502 
503 
VisitMathAbsLong(HInvoke * invoke)504 void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
505   CreateIntToIntPlusTemp(allocator_, invoke);
506 }
507 
VisitMathAbsLong(HInvoke * invoke)508 void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
509   GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
510 }
511 
GenMinMaxFloat(HInvoke * invoke,bool is_min,CodeGeneratorARMVIXL * codegen)512 static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
513   ArmVIXLAssembler* assembler = codegen->GetAssembler();
514   Location op1_loc = invoke->GetLocations()->InAt(0);
515   Location op2_loc = invoke->GetLocations()->InAt(1);
516   Location out_loc = invoke->GetLocations()->Out();
517 
518   // Optimization: don't generate any code if inputs are the same.
519   if (op1_loc.Equals(op2_loc)) {
520     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
521     return;
522   }
523 
524   vixl32::SRegister op1 = SRegisterFrom(op1_loc);
525   vixl32::SRegister op2 = SRegisterFrom(op2_loc);
526   vixl32::SRegister out = OutputSRegister(invoke);
527   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
528   const vixl32::Register temp1 = temps.Acquire();
529   vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
530   vixl32::Label nan, done;
531   vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
532 
533   DCHECK(op1.Is(out));
534 
535   __ Vcmp(op1, op2);
536   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
537   __ B(vs, &nan, /* far_target */ false);  // if un-ordered, go to NaN handling.
538 
539   // op1 <> op2
540   vixl32::ConditionType cond = is_min ? gt : lt;
541   {
542     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
543                                 2 * kMaxInstructionSizeInBytes,
544                                 CodeBufferCheckScope::kMaximumSize);
545     __ it(cond);
546     __ vmov(cond, F32, out, op2);
547   }
548   // for <>(not equal), we've done min/max calculation.
549   __ B(ne, final_label, /* far_target */ false);
550 
551   // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
552   __ Vmov(temp1, op1);
553   __ Vmov(temp2, op2);
554   if (is_min) {
555     __ Orr(temp1, temp1, temp2);
556   } else {
557     __ And(temp1, temp1, temp2);
558   }
559   __ Vmov(out, temp1);
560   __ B(final_label);
561 
562   // handle NaN input.
563   __ Bind(&nan);
564   __ Movt(temp1, High16Bits(kNanFloat));  // 0x7FC0xxxx is a NaN.
565   __ Vmov(out, temp1);
566 
567   if (done.IsReferenced()) {
568     __ Bind(&done);
569   }
570 }
571 
CreateFPFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)572 static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
573   LocationSummary* locations =
574       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
575   locations->SetInAt(0, Location::RequiresFpuRegister());
576   locations->SetInAt(1, Location::RequiresFpuRegister());
577   locations->SetOut(Location::SameAsFirstInput());
578 }
579 
VisitMathMinFloatFloat(HInvoke * invoke)580 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
581   CreateFPFPToFPLocations(allocator_, invoke);
582   invoke->GetLocations()->AddTemp(Location::RequiresRegister());
583 }
584 
VisitMathMinFloatFloat(HInvoke * invoke)585 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
586   GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
587 }
588 
VisitMathMaxFloatFloat(HInvoke * invoke)589 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
590   CreateFPFPToFPLocations(allocator_, invoke);
591   invoke->GetLocations()->AddTemp(Location::RequiresRegister());
592 }
593 
VisitMathMaxFloatFloat(HInvoke * invoke)594 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
595   GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
596 }
597 
GenMinMaxDouble(HInvoke * invoke,bool is_min,CodeGeneratorARMVIXL * codegen)598 static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
599   ArmVIXLAssembler* assembler = codegen->GetAssembler();
600   Location op1_loc = invoke->GetLocations()->InAt(0);
601   Location op2_loc = invoke->GetLocations()->InAt(1);
602   Location out_loc = invoke->GetLocations()->Out();
603 
604   // Optimization: don't generate any code if inputs are the same.
605   if (op1_loc.Equals(op2_loc)) {
606     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in.
607     return;
608   }
609 
610   vixl32::DRegister op1 = DRegisterFrom(op1_loc);
611   vixl32::DRegister op2 = DRegisterFrom(op2_loc);
612   vixl32::DRegister out = OutputDRegister(invoke);
613   vixl32::Label handle_nan_eq, done;
614   vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
615 
616   DCHECK(op1.Is(out));
617 
618   __ Vcmp(op1, op2);
619   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
620   __ B(vs, &handle_nan_eq, /* far_target */ false);  // if un-ordered, go to NaN handling.
621 
622   // op1 <> op2
623   vixl32::ConditionType cond = is_min ? gt : lt;
624   {
625     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
626                                 2 * kMaxInstructionSizeInBytes,
627                                 CodeBufferCheckScope::kMaximumSize);
628     __ it(cond);
629     __ vmov(cond, F64, out, op2);
630   }
631   // for <>(not equal), we've done min/max calculation.
632   __ B(ne, final_label, /* far_target */ false);
633 
634   // handle op1 == op2, max(+0.0,-0.0).
635   if (!is_min) {
636     __ Vand(F64, out, op1, op2);
637     __ B(final_label);
638   }
639 
640   // handle op1 == op2, min(+0.0,-0.0), NaN input.
641   __ Bind(&handle_nan_eq);
642   __ Vorr(F64, out, op1, op2);  // assemble op1/-0.0/NaN.
643 
644   if (done.IsReferenced()) {
645     __ Bind(&done);
646   }
647 }
648 
VisitMathMinDoubleDouble(HInvoke * invoke)649 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
650   CreateFPFPToFPLocations(allocator_, invoke);
651 }
652 
VisitMathMinDoubleDouble(HInvoke * invoke)653 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
654   GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
655 }
656 
VisitMathMaxDoubleDouble(HInvoke * invoke)657 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
658   CreateFPFPToFPLocations(allocator_, invoke);
659 }
660 
VisitMathMaxDoubleDouble(HInvoke * invoke)661 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
662   GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
663 }
664 
GenMinMaxLong(HInvoke * invoke,bool is_min,ArmVIXLAssembler * assembler)665 static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
666   Location op1_loc = invoke->GetLocations()->InAt(0);
667   Location op2_loc = invoke->GetLocations()->InAt(1);
668   Location out_loc = invoke->GetLocations()->Out();
669 
670   // Optimization: don't generate any code if inputs are the same.
671   if (op1_loc.Equals(op2_loc)) {
672     DCHECK(out_loc.Equals(op1_loc));  // out_loc is set as SameAsFirstInput() in location builder.
673     return;
674   }
675 
676   vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
677   vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
678   vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
679   vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
680   vixl32::Register out_lo = LowRegisterFrom(out_loc);
681   vixl32::Register out_hi = HighRegisterFrom(out_loc);
682   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
683   const vixl32::Register temp = temps.Acquire();
684 
685   DCHECK(op1_lo.Is(out_lo));
686   DCHECK(op1_hi.Is(out_hi));
687 
688   // Compare op1 >= op2, or op1 < op2.
689   __ Cmp(out_lo, op2_lo);
690   __ Sbcs(temp, out_hi, op2_hi);
691 
692   // Now GE/LT condition code is correct for the long comparison.
693   {
694     vixl32::ConditionType cond = is_min ? ge : lt;
695     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
696                                 3 * kMaxInstructionSizeInBytes,
697                                 CodeBufferCheckScope::kMaximumSize);
698     __ itt(cond);
699     __ mov(cond, out_lo, op2_lo);
700     __ mov(cond, out_hi, op2_hi);
701   }
702 }
703 
CreateLongLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)704 static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
705   LocationSummary* locations =
706       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
707   locations->SetInAt(0, Location::RequiresRegister());
708   locations->SetInAt(1, Location::RequiresRegister());
709   locations->SetOut(Location::SameAsFirstInput());
710 }
711 
VisitMathMinLongLong(HInvoke * invoke)712 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
713   CreateLongLongToLongLocations(allocator_, invoke);
714 }
715 
VisitMathMinLongLong(HInvoke * invoke)716 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
717   GenMinMaxLong(invoke, /* is_min */ true, GetAssembler());
718 }
719 
VisitMathMaxLongLong(HInvoke * invoke)720 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
721   CreateLongLongToLongLocations(allocator_, invoke);
722 }
723 
VisitMathMaxLongLong(HInvoke * invoke)724 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
725   GenMinMaxLong(invoke, /* is_min */ false, GetAssembler());
726 }
727 
GenMinMax(HInvoke * invoke,bool is_min,ArmVIXLAssembler * assembler)728 static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
729   vixl32::Register op1 = InputRegisterAt(invoke, 0);
730   vixl32::Register op2 = InputRegisterAt(invoke, 1);
731   vixl32::Register out = OutputRegister(invoke);
732 
733   __ Cmp(op1, op2);
734 
735   {
736     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
737                            3 * kMaxInstructionSizeInBytes,
738                            CodeBufferCheckScope::kMaximumSize);
739 
740     __ ite(is_min ? lt : gt);
741     __ mov(is_min ? lt : gt, out, op1);
742     __ mov(is_min ? ge : le, out, op2);
743   }
744 }
745 
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)746 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
747   LocationSummary* locations =
748       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
749   locations->SetInAt(0, Location::RequiresRegister());
750   locations->SetInAt(1, Location::RequiresRegister());
751   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
752 }
753 
VisitMathMinIntInt(HInvoke * invoke)754 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
755   CreateIntIntToIntLocations(allocator_, invoke);
756 }
757 
VisitMathMinIntInt(HInvoke * invoke)758 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
759   GenMinMax(invoke, /* is_min */ true, GetAssembler());
760 }
761 
VisitMathMaxIntInt(HInvoke * invoke)762 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
763   CreateIntIntToIntLocations(allocator_, invoke);
764 }
765 
VisitMathMaxIntInt(HInvoke * invoke)766 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
767   GenMinMax(invoke, /* is_min */ false, GetAssembler());
768 }
769 
VisitMathSqrt(HInvoke * invoke)770 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
771   CreateFPToFPLocations(allocator_, invoke);
772 }
773 
VisitMathSqrt(HInvoke * invoke)774 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
775   ArmVIXLAssembler* assembler = GetAssembler();
776   __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
777 }
778 
VisitMathRint(HInvoke * invoke)779 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
780   if (features_.HasARMv8AInstructions()) {
781     CreateFPToFPLocations(allocator_, invoke);
782   }
783 }
784 
VisitMathRint(HInvoke * invoke)785 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
786   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
787   ArmVIXLAssembler* assembler = GetAssembler();
788   __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
789 }
790 
VisitMathRoundFloat(HInvoke * invoke)791 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
792   if (features_.HasARMv8AInstructions()) {
793     LocationSummary* locations =
794         new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
795     locations->SetInAt(0, Location::RequiresFpuRegister());
796     locations->SetOut(Location::RequiresRegister());
797     locations->AddTemp(Location::RequiresFpuRegister());
798   }
799 }
800 
VisitMathRoundFloat(HInvoke * invoke)801 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
802   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
803 
804   ArmVIXLAssembler* assembler = GetAssembler();
805   vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
806   vixl32::Register out_reg = OutputRegister(invoke);
807   vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
808   vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
809   vixl32::Label done;
810   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
811 
812   // Round to nearest integer, ties away from zero.
813   __ Vcvta(S32, F32, temp1, in_reg);
814   __ Vmov(out_reg, temp1);
815 
816   // For positive, zero or NaN inputs, rounding is done.
817   __ Cmp(out_reg, 0);
818   __ B(ge, final_label, /* far_target */ false);
819 
820   // Handle input < 0 cases.
821   // If input is negative but not a tie, previous result (round to nearest) is valid.
822   // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
823   __ Vrinta(F32, F32, temp1, in_reg);
824   __ Vmov(temp2, 0.5);
825   __ Vsub(F32, temp1, in_reg, temp1);
826   __ Vcmp(F32, temp1, temp2);
827   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
828   {
829     // Use ExactAsemblyScope here because we are using IT.
830     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
831                                 2 * kMaxInstructionSizeInBytes,
832                                 CodeBufferCheckScope::kMaximumSize);
833     __ it(eq);
834     __ add(eq, out_reg, out_reg, 1);
835   }
836 
837   if (done.IsReferenced()) {
838     __ Bind(&done);
839   }
840 }
841 
VisitMemoryPeekByte(HInvoke * invoke)842 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
843   CreateIntToIntLocations(allocator_, invoke);
844 }
845 
VisitMemoryPeekByte(HInvoke * invoke)846 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
847   ArmVIXLAssembler* assembler = GetAssembler();
848   // Ignore upper 4B of long address.
849   __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
850 }
851 
VisitMemoryPeekIntNative(HInvoke * invoke)852 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
853   CreateIntToIntLocations(allocator_, invoke);
854 }
855 
VisitMemoryPeekIntNative(HInvoke * invoke)856 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
857   ArmVIXLAssembler* assembler = GetAssembler();
858   // Ignore upper 4B of long address.
859   __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
860 }
861 
VisitMemoryPeekLongNative(HInvoke * invoke)862 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
863   CreateIntToIntLocations(allocator_, invoke);
864 }
865 
VisitMemoryPeekLongNative(HInvoke * invoke)866 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
867   ArmVIXLAssembler* assembler = GetAssembler();
868   // Ignore upper 4B of long address.
869   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
870   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
871   // exception. So we can't use ldrd as addr may be unaligned.
872   vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
873   vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
874   if (addr.Is(lo)) {
875     __ Ldr(hi, MemOperand(addr, 4));
876     __ Ldr(lo, MemOperand(addr));
877   } else {
878     __ Ldr(lo, MemOperand(addr));
879     __ Ldr(hi, MemOperand(addr, 4));
880   }
881 }
882 
VisitMemoryPeekShortNative(HInvoke * invoke)883 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
884   CreateIntToIntLocations(allocator_, invoke);
885 }
886 
VisitMemoryPeekShortNative(HInvoke * invoke)887 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
888   ArmVIXLAssembler* assembler = GetAssembler();
889   // Ignore upper 4B of long address.
890   __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
891 }
892 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)893 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
894   LocationSummary* locations =
895       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
896   locations->SetInAt(0, Location::RequiresRegister());
897   locations->SetInAt(1, Location::RequiresRegister());
898 }
899 
VisitMemoryPokeByte(HInvoke * invoke)900 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
901   CreateIntIntToVoidLocations(allocator_, invoke);
902 }
903 
VisitMemoryPokeByte(HInvoke * invoke)904 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
905   ArmVIXLAssembler* assembler = GetAssembler();
906   __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
907 }
908 
VisitMemoryPokeIntNative(HInvoke * invoke)909 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
910   CreateIntIntToVoidLocations(allocator_, invoke);
911 }
912 
VisitMemoryPokeIntNative(HInvoke * invoke)913 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
914   ArmVIXLAssembler* assembler = GetAssembler();
915   __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
916 }
917 
VisitMemoryPokeLongNative(HInvoke * invoke)918 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
919   CreateIntIntToVoidLocations(allocator_, invoke);
920 }
921 
VisitMemoryPokeLongNative(HInvoke * invoke)922 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
923   ArmVIXLAssembler* assembler = GetAssembler();
924   // Ignore upper 4B of long address.
925   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
926   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
927   // exception. So we can't use ldrd as addr may be unaligned.
928   __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
929   __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
930 }
931 
VisitMemoryPokeShortNative(HInvoke * invoke)932 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
933   CreateIntIntToVoidLocations(allocator_, invoke);
934 }
935 
VisitMemoryPokeShortNative(HInvoke * invoke)936 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
937   ArmVIXLAssembler* assembler = GetAssembler();
938   __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
939 }
940 
VisitThreadCurrentThread(HInvoke * invoke)941 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
942   LocationSummary* locations =
943       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
944   locations->SetOut(Location::RequiresRegister());
945 }
946 
VisitThreadCurrentThread(HInvoke * invoke)947 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
948   ArmVIXLAssembler* assembler = GetAssembler();
949   __ Ldr(OutputRegister(invoke),
950          MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
951 }
952 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARMVIXL * codegen)953 static void GenUnsafeGet(HInvoke* invoke,
954                          DataType::Type type,
955                          bool is_volatile,
956                          CodeGeneratorARMVIXL* codegen) {
957   LocationSummary* locations = invoke->GetLocations();
958   ArmVIXLAssembler* assembler = codegen->GetAssembler();
959   Location base_loc = locations->InAt(1);
960   vixl32::Register base = InputRegisterAt(invoke, 1);     // Object pointer.
961   Location offset_loc = locations->InAt(2);
962   vixl32::Register offset = LowRegisterFrom(offset_loc);  // Long offset, lo part only.
963   Location trg_loc = locations->Out();
964 
965   switch (type) {
966     case DataType::Type::kInt32: {
967       vixl32::Register trg = RegisterFrom(trg_loc);
968       __ Ldr(trg, MemOperand(base, offset));
969       if (is_volatile) {
970         __ Dmb(vixl32::ISH);
971       }
972       break;
973     }
974 
975     case DataType::Type::kReference: {
976       vixl32::Register trg = RegisterFrom(trg_loc);
977       if (kEmitCompilerReadBarrier) {
978         if (kUseBakerReadBarrier) {
979           Location temp = locations->GetTemp(0);
980           codegen->GenerateReferenceLoadWithBakerReadBarrier(
981               invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
982           if (is_volatile) {
983             __ Dmb(vixl32::ISH);
984           }
985         } else {
986           __ Ldr(trg, MemOperand(base, offset));
987           if (is_volatile) {
988             __ Dmb(vixl32::ISH);
989           }
990           codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
991         }
992       } else {
993         __ Ldr(trg, MemOperand(base, offset));
994         if (is_volatile) {
995           __ Dmb(vixl32::ISH);
996         }
997         assembler->MaybeUnpoisonHeapReference(trg);
998       }
999       break;
1000     }
1001 
1002     case DataType::Type::kInt64: {
1003       vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
1004       vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
1005       if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
1006         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1007         const vixl32::Register temp_reg = temps.Acquire();
1008         __ Add(temp_reg, base, offset);
1009         __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
1010       } else {
1011         __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
1012       }
1013       if (is_volatile) {
1014         __ Dmb(vixl32::ISH);
1015       }
1016       break;
1017     }
1018 
1019     default:
1020       LOG(FATAL) << "Unexpected type " << type;
1021       UNREACHABLE();
1022   }
1023 }
1024 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type)1025 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1026                                           HInvoke* invoke,
1027                                           DataType::Type type) {
1028   bool can_call = kEmitCompilerReadBarrier &&
1029       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1030        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1031   LocationSummary* locations =
1032       new (allocator) LocationSummary(invoke,
1033                                       can_call
1034                                           ? LocationSummary::kCallOnSlowPath
1035                                           : LocationSummary::kNoCall,
1036                                       kIntrinsified);
1037   if (can_call && kUseBakerReadBarrier) {
1038     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1039   }
1040   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1041   locations->SetInAt(1, Location::RequiresRegister());
1042   locations->SetInAt(2, Location::RequiresRegister());
1043   locations->SetOut(Location::RequiresRegister(),
1044                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1045   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1046     // We need a temporary register for the read barrier marking slow
1047     // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier.
1048     locations->AddTemp(Location::RequiresRegister());
1049   }
1050 }
1051 
VisitUnsafeGet(HInvoke * invoke)1052 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
1053   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32);
1054 }
VisitUnsafeGetVolatile(HInvoke * invoke)1055 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
1056   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32);
1057 }
VisitUnsafeGetLong(HInvoke * invoke)1058 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
1059   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64);
1060 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1061 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1062   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64);
1063 }
VisitUnsafeGetObject(HInvoke * invoke)1064 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
1065   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference);
1066 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1067 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1068   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference);
1069 }
1070 
VisitUnsafeGet(HInvoke * invoke)1071 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
1072   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_);
1073 }
VisitUnsafeGetVolatile(HInvoke * invoke)1074 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
1075   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_);
1076 }
VisitUnsafeGetLong(HInvoke * invoke)1077 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
1078   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_);
1079 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1080 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1081   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_);
1082 }
VisitUnsafeGetObject(HInvoke * invoke)1083 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
1084   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_);
1085 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1086 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1087   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_);
1088 }
1089 
CreateIntIntIntIntToVoid(ArenaAllocator * allocator,const ArmInstructionSetFeatures & features,DataType::Type type,bool is_volatile,HInvoke * invoke)1090 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator,
1091                                      const ArmInstructionSetFeatures& features,
1092                                      DataType::Type type,
1093                                      bool is_volatile,
1094                                      HInvoke* invoke) {
1095   LocationSummary* locations =
1096       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1097   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1098   locations->SetInAt(1, Location::RequiresRegister());
1099   locations->SetInAt(2, Location::RequiresRegister());
1100   locations->SetInAt(3, Location::RequiresRegister());
1101 
1102   if (type == DataType::Type::kInt64) {
1103     // Potentially need temps for ldrexd-strexd loop.
1104     if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
1105       locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
1106       locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
1107     }
1108   } else if (type == DataType::Type::kReference) {
1109     // Temps for card-marking.
1110     locations->AddTemp(Location::RequiresRegister());  // Temp.
1111     locations->AddTemp(Location::RequiresRegister());  // Card.
1112   }
1113 }
1114 
VisitUnsafePut(HInvoke * invoke)1115 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
1116   CreateIntIntIntIntToVoid(
1117       allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke);
1118 }
VisitUnsafePutOrdered(HInvoke * invoke)1119 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
1120   CreateIntIntIntIntToVoid(
1121       allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke);
1122 }
VisitUnsafePutVolatile(HInvoke * invoke)1123 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
1124   CreateIntIntIntIntToVoid(
1125       allocator_, features_, DataType::Type::kInt32, /* is_volatile */ true, invoke);
1126 }
VisitUnsafePutObject(HInvoke * invoke)1127 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
1128   CreateIntIntIntIntToVoid(
1129       allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke);
1130 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1131 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1132   CreateIntIntIntIntToVoid(
1133       allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke);
1134 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1135 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1136   CreateIntIntIntIntToVoid(
1137       allocator_, features_, DataType::Type::kReference, /* is_volatile */ true, invoke);
1138 }
VisitUnsafePutLong(HInvoke * invoke)1139 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
1140   CreateIntIntIntIntToVoid(
1141       allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke);
1142 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1143 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1144   CreateIntIntIntIntToVoid(
1145       allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke);
1146 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1147 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1148   CreateIntIntIntIntToVoid(
1149       allocator_, features_, DataType::Type::kInt64, /* is_volatile */ true, invoke);
1150 }
1151 
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARMVIXL * codegen)1152 static void GenUnsafePut(LocationSummary* locations,
1153                          DataType::Type type,
1154                          bool is_volatile,
1155                          bool is_ordered,
1156                          CodeGeneratorARMVIXL* codegen) {
1157   ArmVIXLAssembler* assembler = codegen->GetAssembler();
1158 
1159   vixl32::Register base = RegisterFrom(locations->InAt(1));       // Object pointer.
1160   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
1161   vixl32::Register value;
1162 
1163   if (is_volatile || is_ordered) {
1164     __ Dmb(vixl32::ISH);
1165   }
1166 
1167   if (type == DataType::Type::kInt64) {
1168     vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
1169     vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
1170     value = value_lo;
1171     if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
1172       vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
1173       vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
1174       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1175       const vixl32::Register temp_reg = temps.Acquire();
1176 
1177       __ Add(temp_reg, base, offset);
1178       vixl32::Label loop_head;
1179       __ Bind(&loop_head);
1180       __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
1181       __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
1182       __ Cmp(temp_lo, 0);
1183       __ B(ne, &loop_head, /* far_target */ false);
1184     } else {
1185       __ Strd(value_lo, value_hi, MemOperand(base, offset));
1186     }
1187   } else {
1188     value = RegisterFrom(locations->InAt(3));
1189     vixl32::Register source = value;
1190     if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1191       vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1192       __ Mov(temp, value);
1193       assembler->PoisonHeapReference(temp);
1194       source = temp;
1195     }
1196     __ Str(source, MemOperand(base, offset));
1197   }
1198 
1199   if (is_volatile) {
1200     __ Dmb(vixl32::ISH);
1201   }
1202 
1203   if (type == DataType::Type::kReference) {
1204     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1205     vixl32::Register card = RegisterFrom(locations->GetTemp(1));
1206     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1207     codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
1208   }
1209 }
1210 
VisitUnsafePut(HInvoke * invoke)1211 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
1212   GenUnsafePut(invoke->GetLocations(),
1213                DataType::Type::kInt32,
1214                /* is_volatile */ false,
1215                /* is_ordered */ false,
1216                codegen_);
1217 }
VisitUnsafePutOrdered(HInvoke * invoke)1218 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
1219   GenUnsafePut(invoke->GetLocations(),
1220                DataType::Type::kInt32,
1221                /* is_volatile */ false,
1222                /* is_ordered */ true,
1223                codegen_);
1224 }
VisitUnsafePutVolatile(HInvoke * invoke)1225 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
1226   GenUnsafePut(invoke->GetLocations(),
1227                DataType::Type::kInt32,
1228                /* is_volatile */ true,
1229                /* is_ordered */ false,
1230                codegen_);
1231 }
VisitUnsafePutObject(HInvoke * invoke)1232 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
1233   GenUnsafePut(invoke->GetLocations(),
1234                DataType::Type::kReference,
1235                /* is_volatile */ false,
1236                /* is_ordered */ false,
1237                codegen_);
1238 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1239 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1240   GenUnsafePut(invoke->GetLocations(),
1241                DataType::Type::kReference,
1242                /* is_volatile */ false,
1243                /* is_ordered */ true,
1244                codegen_);
1245 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1246 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1247   GenUnsafePut(invoke->GetLocations(),
1248                DataType::Type::kReference,
1249                /* is_volatile */ true,
1250                /* is_ordered */ false,
1251                codegen_);
1252 }
VisitUnsafePutLong(HInvoke * invoke)1253 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
1254   GenUnsafePut(invoke->GetLocations(),
1255                DataType::Type::kInt64,
1256                /* is_volatile */ false,
1257                /* is_ordered */ false,
1258                codegen_);
1259 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1260 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1261   GenUnsafePut(invoke->GetLocations(),
1262                DataType::Type::kInt64,
1263                /* is_volatile */ false,
1264                /* is_ordered */ true,
1265                codegen_);
1266 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1267 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1268   GenUnsafePut(invoke->GetLocations(),
1269                DataType::Type::kInt64,
1270                /* is_volatile */ true,
1271                /* is_ordered */ false,
1272                codegen_);
1273 }
1274 
CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type)1275 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator,
1276                                                 HInvoke* invoke,
1277                                                 DataType::Type type) {
1278   bool can_call = kEmitCompilerReadBarrier &&
1279       kUseBakerReadBarrier &&
1280       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1281   LocationSummary* locations =
1282       new (allocator) LocationSummary(invoke,
1283                                       can_call
1284                                           ? LocationSummary::kCallOnSlowPath
1285                                           : LocationSummary::kNoCall,
1286                                       kIntrinsified);
1287   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1288   locations->SetInAt(1, Location::RequiresRegister());
1289   locations->SetInAt(2, Location::RequiresRegister());
1290   locations->SetInAt(3, Location::RequiresRegister());
1291   locations->SetInAt(4, Location::RequiresRegister());
1292 
1293   // If heap poisoning is enabled, we don't want the unpoisoning
1294   // operations to potentially clobber the output. Likewise when
1295   // emitting a (Baker) read barrier, which may call.
1296   Location::OutputOverlap overlaps =
1297       ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call)
1298       ? Location::kOutputOverlap
1299       : Location::kNoOutputOverlap;
1300   locations->SetOut(Location::RequiresRegister(), overlaps);
1301 
1302   // Temporary registers used in CAS. In the object case
1303   // (UnsafeCASObject intrinsic), these are also used for
1304   // card-marking, and possibly for (Baker) read barrier.
1305   locations->AddTemp(Location::RequiresRegister());  // Pointer.
1306   locations->AddTemp(Location::RequiresRegister());  // Temp 1.
1307 }
1308 
GenCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)1309 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
1310   DCHECK_NE(type, DataType::Type::kInt64);
1311 
1312   ArmVIXLAssembler* assembler = codegen->GetAssembler();
1313   LocationSummary* locations = invoke->GetLocations();
1314 
1315   Location out_loc = locations->Out();
1316   vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
1317 
1318   vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
1319   Location offset_loc = locations->InAt(2);
1320   vixl32::Register offset = LowRegisterFrom(offset_loc);              // Offset (discard high 4B).
1321   vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
1322   vixl32::Register value = InputRegisterAt(invoke, 4);                // Value.
1323 
1324   Location tmp_ptr_loc = locations->GetTemp(0);
1325   vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc);               // Pointer to actual memory.
1326   vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Value in memory.
1327 
1328   if (type == DataType::Type::kReference) {
1329     // The only read barrier implementation supporting the
1330     // UnsafeCASObject intrinsic is the Baker-style read barriers.
1331     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1332 
1333     // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1334     // object and scan the receiver at the next GC for nothing.
1335     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1336     codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1337 
1338     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1339       // Need to make sure the reference stored in the field is a to-space
1340       // one before attempting the CAS or the CAS could fail incorrectly.
1341       codegen->UpdateReferenceFieldWithBakerReadBarrier(
1342           invoke,
1343           out_loc,  // Unused, used only as a "temporary" within the read barrier.
1344           base,
1345           /* field_offset */ offset_loc,
1346           tmp_ptr_loc,
1347           /* needs_null_check */ false,
1348           tmp);
1349     }
1350   }
1351 
1352   // Prevent reordering with prior memory operations.
1353   // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1354   // latter allows a preceding load to be delayed past the STXR
1355   // instruction below.
1356   __ Dmb(vixl32::ISH);
1357 
1358   __ Add(tmp_ptr, base, offset);
1359 
1360   if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1361     codegen->GetAssembler()->PoisonHeapReference(expected);
1362     if (value.Is(expected)) {
1363       // Do not poison `value`, as it is the same register as
1364       // `expected`, which has just been poisoned.
1365     } else {
1366       codegen->GetAssembler()->PoisonHeapReference(value);
1367     }
1368   }
1369 
1370   // do {
1371   //   tmp = [r_ptr] - expected;
1372   // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1373   // result = tmp != 0;
1374 
1375   vixl32::Label loop_head;
1376   __ Bind(&loop_head);
1377 
1378   __ Ldrex(tmp, MemOperand(tmp_ptr));
1379 
1380   __ Subs(tmp, tmp, expected);
1381 
1382   {
1383     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1384                            3 * kMaxInstructionSizeInBytes,
1385                            CodeBufferCheckScope::kMaximumSize);
1386 
1387     __ itt(eq);
1388     __ strex(eq, tmp, value, MemOperand(tmp_ptr));
1389     __ cmp(eq, tmp, 1);
1390   }
1391 
1392   __ B(eq, &loop_head, /* far_target */ false);
1393 
1394   __ Dmb(vixl32::ISH);
1395 
1396   __ Rsbs(out, tmp, 1);
1397 
1398   {
1399     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1400                            2 * kMaxInstructionSizeInBytes,
1401                            CodeBufferCheckScope::kMaximumSize);
1402 
1403     __ it(cc);
1404     __ mov(cc, out, 0);
1405   }
1406 
1407   if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1408     codegen->GetAssembler()->UnpoisonHeapReference(expected);
1409     if (value.Is(expected)) {
1410       // Do not unpoison `value`, as it is the same register as
1411       // `expected`, which has just been unpoisoned.
1412     } else {
1413       codegen->GetAssembler()->UnpoisonHeapReference(value);
1414     }
1415   }
1416 }
1417 
VisitUnsafeCASInt(HInvoke * invoke)1418 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1419   CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kInt32);
1420 }
VisitUnsafeCASObject(HInvoke * invoke)1421 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1422   // The only read barrier implementation supporting the
1423   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1424   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1425     return;
1426   }
1427 
1428   CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kReference);
1429 }
VisitUnsafeCASInt(HInvoke * invoke)1430 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1431   GenCas(invoke, DataType::Type::kInt32, codegen_);
1432 }
VisitUnsafeCASObject(HInvoke * invoke)1433 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1434   // The only read barrier implementation supporting the
1435   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1436   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1437 
1438   GenCas(invoke, DataType::Type::kReference, codegen_);
1439 }
1440 
VisitStringCompareTo(HInvoke * invoke)1441 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1442   // The inputs plus one temp.
1443   LocationSummary* locations =
1444       new (allocator_) LocationSummary(invoke,
1445                                        invoke->InputAt(1)->CanBeNull()
1446                                            ? LocationSummary::kCallOnSlowPath
1447                                            : LocationSummary::kNoCall,
1448                                        kIntrinsified);
1449   locations->SetInAt(0, Location::RequiresRegister());
1450   locations->SetInAt(1, Location::RequiresRegister());
1451   locations->AddTemp(Location::RequiresRegister());
1452   locations->AddTemp(Location::RequiresRegister());
1453   locations->AddTemp(Location::RequiresRegister());
1454   // Need temporary registers for String compression's feature.
1455   if (mirror::kUseStringCompression) {
1456     locations->AddTemp(Location::RequiresRegister());
1457   }
1458   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1459 }
1460 
1461 // Forward declaration.
1462 //
1463 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated
1464 // by the compiler for every C++ function, and if this function gets inlined in
1465 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a
1466 // build failure. That is the reason why NO_INLINE attribute is used.
1467 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
1468                                                   HInvoke* invoke,
1469                                                   vixl32::Label* end,
1470                                                   vixl32::Label* different_compression);
1471 
VisitStringCompareTo(HInvoke * invoke)1472 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1473   ArmVIXLAssembler* assembler = GetAssembler();
1474   LocationSummary* locations = invoke->GetLocations();
1475 
1476   const vixl32::Register str = InputRegisterAt(invoke, 0);
1477   const vixl32::Register arg = InputRegisterAt(invoke, 1);
1478   const vixl32::Register out = OutputRegister(invoke);
1479 
1480   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1481   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1482   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1483   vixl32::Register temp3;
1484   if (mirror::kUseStringCompression) {
1485     temp3 = RegisterFrom(locations->GetTemp(3));
1486   }
1487 
1488   vixl32::Label end;
1489   vixl32::Label different_compression;
1490 
1491   // Get offsets of count and value fields within a string object.
1492   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1493 
1494   // Note that the null check must have been done earlier.
1495   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1496 
1497   // Take slow path and throw if input can be and is null.
1498   SlowPathCodeARMVIXL* slow_path = nullptr;
1499   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1500   if (can_slow_path) {
1501     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1502     codegen_->AddSlowPath(slow_path);
1503     __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
1504   }
1505 
1506   // Reference equality check, return 0 if same reference.
1507   __ Subs(out, str, arg);
1508   __ B(eq, &end);
1509 
1510   if (mirror::kUseStringCompression) {
1511     // Load `count` fields of this and argument strings.
1512     __ Ldr(temp3, MemOperand(str, count_offset));
1513     __ Ldr(temp2, MemOperand(arg, count_offset));
1514     // Extract lengths from the `count` fields.
1515     __ Lsr(temp0, temp3, 1u);
1516     __ Lsr(temp1, temp2, 1u);
1517   } else {
1518     // Load lengths of this and argument strings.
1519     __ Ldr(temp0, MemOperand(str, count_offset));
1520     __ Ldr(temp1, MemOperand(arg, count_offset));
1521   }
1522   // out = length diff.
1523   __ Subs(out, temp0, temp1);
1524   // temp0 = min(len(str), len(arg)).
1525 
1526   {
1527     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1528                            2 * kMaxInstructionSizeInBytes,
1529                            CodeBufferCheckScope::kMaximumSize);
1530 
1531     __ it(gt);
1532     __ mov(gt, temp0, temp1);
1533   }
1534 
1535   // Shorter string is empty?
1536   // Note that mirror::kUseStringCompression==true introduces lots of instructions,
1537   // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
1538   __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
1539 
1540   if (mirror::kUseStringCompression) {
1541     // Check if both strings using same compression style to use this comparison loop.
1542     __ Eors(temp2, temp2, temp3);
1543     __ Lsrs(temp2, temp2, 1u);
1544     __ B(cs, &different_compression);
1545     // For string compression, calculate the number of bytes to compare (not chars).
1546     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1547     __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
1548 
1549     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1550                            2 * kMaxInstructionSizeInBytes,
1551                            CodeBufferCheckScope::kMaximumSize);
1552 
1553     __ it(ne);
1554     __ add(ne, temp0, temp0, temp0);
1555   }
1556 
1557 
1558   GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression);
1559 
1560   __ Bind(&end);
1561 
1562   if (can_slow_path) {
1563     __ Bind(slow_path->GetExitLabel());
1564   }
1565 }
1566 
GenerateStringCompareToLoop(ArmVIXLAssembler * assembler,HInvoke * invoke,vixl32::Label * end,vixl32::Label * different_compression)1567 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
1568                                                   HInvoke* invoke,
1569                                                   vixl32::Label* end,
1570                                                   vixl32::Label* different_compression) {
1571   LocationSummary* locations = invoke->GetLocations();
1572 
1573   const vixl32::Register str = InputRegisterAt(invoke, 0);
1574   const vixl32::Register arg = InputRegisterAt(invoke, 1);
1575   const vixl32::Register out = OutputRegister(invoke);
1576 
1577   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1578   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1579   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1580   vixl32::Register temp3;
1581   if (mirror::kUseStringCompression) {
1582     temp3 = RegisterFrom(locations->GetTemp(3));
1583   }
1584 
1585   vixl32::Label loop;
1586   vixl32::Label find_char_diff;
1587 
1588   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1589   // Store offset of string value in preparation for comparison loop.
1590   __ Mov(temp1, value_offset);
1591 
1592   // Assertions that must hold in order to compare multiple characters at a time.
1593   CHECK_ALIGNED(value_offset, 8);
1594   static_assert(IsAligned<8>(kObjectAlignment),
1595                 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1596 
1597   const unsigned char_size = DataType::Size(DataType::Type::kUint16);
1598   DCHECK_EQ(char_size, 2u);
1599 
1600   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1601 
1602   vixl32::Label find_char_diff_2nd_cmp;
1603   // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1604   __ Bind(&loop);
1605   vixl32::Register temp_reg = temps.Acquire();
1606   __ Ldr(temp_reg, MemOperand(str, temp1));
1607   __ Ldr(temp2, MemOperand(arg, temp1));
1608   __ Cmp(temp_reg, temp2);
1609   __ B(ne, &find_char_diff, /* far_target */ false);
1610   __ Add(temp1, temp1, char_size * 2);
1611 
1612   __ Ldr(temp_reg, MemOperand(str, temp1));
1613   __ Ldr(temp2, MemOperand(arg, temp1));
1614   __ Cmp(temp_reg, temp2);
1615   __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false);
1616   __ Add(temp1, temp1, char_size * 2);
1617   // With string compression, we have compared 8 bytes, otherwise 4 chars.
1618   __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
1619   __ B(hi, &loop, /* far_target */ false);
1620   __ B(end);
1621 
1622   __ Bind(&find_char_diff_2nd_cmp);
1623   if (mirror::kUseStringCompression) {
1624     __ Subs(temp0, temp0, 4);  // 4 bytes previously compared.
1625     __ B(ls, end, /* far_target */ false);  // Was the second comparison fully beyond the end?
1626   } else {
1627     // Without string compression, we can start treating temp0 as signed
1628     // and rely on the signed comparison below.
1629     __ Sub(temp0, temp0, 2);
1630   }
1631 
1632   // Find the single character difference.
1633   __ Bind(&find_char_diff);
1634   // Get the bit position of the first character that differs.
1635   __ Eor(temp1, temp2, temp_reg);
1636   __ Rbit(temp1, temp1);
1637   __ Clz(temp1, temp1);
1638 
1639   // temp0 = number of characters remaining to compare.
1640   // (Without string compression, it could be < 1 if a difference is found by the second CMP
1641   // in the comparison loop, and after the end of the shorter string data).
1642 
1643   // Without string compression (temp1 >> 4) = character where difference occurs between the last
1644   // two words compared, in the interval [0,1].
1645   // (0 for low half-word different, 1 for high half-word different).
1646   // With string compression, (temp1 << 3) = byte where the difference occurs,
1647   // in the interval [0,3].
1648 
1649   // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1650   // the remaining string data, so just return length diff (out).
1651   // The comparison is unsigned for string compression, otherwise signed.
1652   __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
1653   __ B((mirror::kUseStringCompression ? ls : le), end, /* far_target */ false);
1654 
1655   // Extract the characters and calculate the difference.
1656   if (mirror::kUseStringCompression) {
1657     // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1658     // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1659     // The compression flag is now in the highest bit of temp3, so let's play some tricks.
1660     __ Orr(temp3, temp3, 0xffu << 23);                  // uncompressed ? 0xff800000u : 0x7ff80000u
1661     __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
1662     __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
1663     __ Lsr(temp2, temp2, temp1);                        // Extract second character.
1664     __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
1665     __ Lsr(out, temp_reg, temp1);                       // Extract first character.
1666     __ And(temp2, temp2, temp3);
1667     __ And(out, out, temp3);
1668   } else {
1669     __ Bic(temp1, temp1, 0xf);
1670     __ Lsr(temp2, temp2, temp1);
1671     __ Lsr(out, temp_reg, temp1);
1672     __ Movt(temp2, 0);
1673     __ Movt(out, 0);
1674   }
1675 
1676   __ Sub(out, out, temp2);
1677   temps.Release(temp_reg);
1678 
1679   if (mirror::kUseStringCompression) {
1680     __ B(end);
1681     __ Bind(different_compression);
1682 
1683     // Comparison for different compression style.
1684     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1685     DCHECK_EQ(c_char_size, 1u);
1686 
1687     // We want to free up the temp3, currently holding `str.count`, for comparison.
1688     // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1689     // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1690     // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1691     // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1692     __ Add(temp0, temp0, temp0);              // Unlike LSL, this ADD is always 16-bit.
1693     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1694     __ Mov(temp1, str);
1695     __ Mov(temp2, arg);
1696     __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
1697     {
1698       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1699                              3 * kMaxInstructionSizeInBytes,
1700                              CodeBufferCheckScope::kMaximumSize);
1701       __ itt(cs);                             // Interleave with selection of temp1 and temp2.
1702       __ mov(cs, temp1, arg);                 // Preserves flags.
1703       __ mov(cs, temp2, str);                 // Preserves flags.
1704     }
1705     __ Sbc(temp0, temp0, 0);                  // Complete the move of the compression flag.
1706 
1707     // Adjust temp1 and temp2 from string pointers to data pointers.
1708     __ Add(temp1, temp1, value_offset);
1709     __ Add(temp2, temp2, value_offset);
1710 
1711     vixl32::Label different_compression_loop;
1712     vixl32::Label different_compression_diff;
1713 
1714     // Main loop for different compression.
1715     temp_reg = temps.Acquire();
1716     __ Bind(&different_compression_loop);
1717     __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
1718     __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
1719     __ Cmp(temp_reg, temp3);
1720     __ B(ne, &different_compression_diff, /* far_target */ false);
1721     __ Subs(temp0, temp0, 2);
1722     __ B(hi, &different_compression_loop, /* far_target */ false);
1723     __ B(end);
1724 
1725     // Calculate the difference.
1726     __ Bind(&different_compression_diff);
1727     __ Sub(out, temp_reg, temp3);
1728     temps.Release(temp_reg);
1729     // Flip the difference if the `arg` is compressed.
1730     // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1731     __ Lsrs(temp0, temp0, 1u);
1732     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1733                   "Expecting 0=compressed, 1=uncompressed");
1734 
1735     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1736                            2 * kMaxInstructionSizeInBytes,
1737                            CodeBufferCheckScope::kMaximumSize);
1738     __ it(cc);
1739     __ rsb(cc, out, out, 0);
1740   }
1741 }
1742 
1743 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1744 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
1745 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
1746 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
1747 // Allow up to 12 instructions (32 bytes) for the unrolled loop.
1748 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
1749 
GetConstString(HInstruction * candidate,uint32_t * utf16_length)1750 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1751   if (candidate->IsLoadString()) {
1752     HLoadString* load_string = candidate->AsLoadString();
1753     const DexFile& dex_file = load_string->GetDexFile();
1754     return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1755   }
1756   return nullptr;
1757 }
1758 
VisitStringEquals(HInvoke * invoke)1759 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1760   if (kEmitCompilerReadBarrier &&
1761       !StringEqualsOptimizations(invoke).GetArgumentIsString() &&
1762       !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) {
1763     // No support for this odd case (String class is moveable, not in the boot image).
1764     return;
1765   }
1766 
1767   LocationSummary* locations =
1768       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1769   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1770   locations->SetInAt(0, Location::RequiresRegister());
1771   locations->SetInAt(1, Location::RequiresRegister());
1772 
1773   // Temporary registers to store lengths of strings and for calculations.
1774   // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1775   locations->AddTemp(LocationFrom(r0));
1776 
1777   // For the generic implementation and for long const strings we need an extra temporary.
1778   // We do not need it for short const strings, up to 4 bytes, see code generation below.
1779   uint32_t const_string_length = 0u;
1780   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1781   if (const_string == nullptr) {
1782     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1783   }
1784   bool is_compressed =
1785       mirror::kUseStringCompression &&
1786       const_string != nullptr &&
1787       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1788   if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
1789     locations->AddTemp(Location::RequiresRegister());
1790   }
1791 
1792   // TODO: If the String.equals() is used only for an immediately following HIf, we can
1793   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1794   // Then we shall need an extra temporary register instead of the output register.
1795   locations->SetOut(Location::RequiresRegister());
1796 }
1797 
VisitStringEquals(HInvoke * invoke)1798 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1799   ArmVIXLAssembler* assembler = GetAssembler();
1800   LocationSummary* locations = invoke->GetLocations();
1801 
1802   vixl32::Register str = InputRegisterAt(invoke, 0);
1803   vixl32::Register arg = InputRegisterAt(invoke, 1);
1804   vixl32::Register out = OutputRegister(invoke);
1805 
1806   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1807 
1808   vixl32::Label loop;
1809   vixl32::Label end;
1810   vixl32::Label return_true;
1811   vixl32::Label return_false;
1812   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
1813 
1814   // Get offsets of count, value, and class fields within a string object.
1815   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1816   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1817   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1818 
1819   // Note that the null check must have been done earlier.
1820   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1821 
1822   StringEqualsOptimizations optimizations(invoke);
1823   if (!optimizations.GetArgumentNotNull()) {
1824     // Check if input is null, return false if it is.
1825     __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
1826   }
1827 
1828   // Reference equality check, return true if same reference.
1829   __ Cmp(str, arg);
1830   __ B(eq, &return_true, /* far_target */ false);
1831 
1832   if (!optimizations.GetArgumentIsString()) {
1833     // Instanceof check for the argument by comparing class fields.
1834     // All string objects must have the same type since String cannot be subclassed.
1835     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1836     // If the argument is a string object, its class field must be equal to receiver's class field.
1837     __ Ldr(temp, MemOperand(str, class_offset));
1838     __ Ldr(out, MemOperand(arg, class_offset));
1839     __ Cmp(temp, out);
1840     __ B(ne, &return_false, /* far_target */ false);
1841   }
1842 
1843   // Check if one of the inputs is a const string. Do not special-case both strings
1844   // being const, such cases should be handled by constant folding if needed.
1845   uint32_t const_string_length = 0u;
1846   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1847   if (const_string == nullptr) {
1848     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1849     if (const_string != nullptr) {
1850       std::swap(str, arg);  // Make sure the const string is in `str`.
1851     }
1852   }
1853   bool is_compressed =
1854       mirror::kUseStringCompression &&
1855       const_string != nullptr &&
1856       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1857 
1858   if (const_string != nullptr) {
1859     // Load `count` field of the argument string and check if it matches the const string.
1860     // Also compares the compression style, if differs return false.
1861     __ Ldr(temp, MemOperand(arg, count_offset));
1862     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1863     __ B(ne, &return_false, /* far_target */ false);
1864   } else {
1865     // Load `count` fields of this and argument strings.
1866     __ Ldr(temp, MemOperand(str, count_offset));
1867     __ Ldr(out, MemOperand(arg, count_offset));
1868     // Check if `count` fields are equal, return false if they're not.
1869     // Also compares the compression style, if differs return false.
1870     __ Cmp(temp, out);
1871     __ B(ne, &return_false, /* far_target */ false);
1872   }
1873 
1874   // Assertions that must hold in order to compare strings 4 bytes at a time.
1875   // Ok to do this because strings are zero-padded to kObjectAlignment.
1876   DCHECK_ALIGNED(value_offset, 4);
1877   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1878 
1879   if (const_string != nullptr &&
1880       const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1881                                             : kShortConstStringEqualsCutoffInBytes / 2u)) {
1882     // Load and compare the contents. Though we know the contents of the short const string
1883     // at compile time, materializing constants may be more code than loading from memory.
1884     int32_t offset = value_offset;
1885     size_t remaining_bytes =
1886         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
1887     while (remaining_bytes > sizeof(uint32_t)) {
1888       vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1889       UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1890       vixl32::Register temp2 = scratch_scope.Acquire();
1891       __ Ldrd(temp, temp1, MemOperand(str, offset));
1892       __ Ldrd(temp2, out, MemOperand(arg, offset));
1893       __ Cmp(temp, temp2);
1894       __ B(ne, &return_false, /* far_label */ false);
1895       __ Cmp(temp1, out);
1896       __ B(ne, &return_false, /* far_label */ false);
1897       offset += 2u * sizeof(uint32_t);
1898       remaining_bytes -= 2u * sizeof(uint32_t);
1899     }
1900     if (remaining_bytes != 0u) {
1901       __ Ldr(temp, MemOperand(str, offset));
1902       __ Ldr(out, MemOperand(arg, offset));
1903       __ Cmp(temp, out);
1904       __ B(ne, &return_false, /* far_label */ false);
1905     }
1906   } else {
1907     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1908     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1909                   "Expecting 0=compressed, 1=uncompressed");
1910     __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
1911 
1912     if (mirror::kUseStringCompression) {
1913       // For string compression, calculate the number of bytes to compare (not chars).
1914       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1915       __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
1916       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1917                              2 * kMaxInstructionSizeInBytes,
1918                              CodeBufferCheckScope::kMaximumSize);
1919       __ it(cs);                                      // If uncompressed,
1920       __ add(cs, temp, temp, temp);                   //   double the byte count.
1921     }
1922 
1923     vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1924     UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1925     vixl32::Register temp2 = scratch_scope.Acquire();
1926 
1927     // Store offset of string value in preparation for comparison loop.
1928     __ Mov(temp1, value_offset);
1929 
1930     // Loop to compare strings 4 bytes at a time starting at the front of the string.
1931     __ Bind(&loop);
1932     __ Ldr(out, MemOperand(str, temp1));
1933     __ Ldr(temp2, MemOperand(arg, temp1));
1934     __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1935     __ Cmp(out, temp2);
1936     __ B(ne, &return_false, /* far_target */ false);
1937     // With string compression, we have compared 4 bytes, otherwise 2 chars.
1938     __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1939     __ B(hi, &loop, /* far_target */ false);
1940   }
1941 
1942   // Return true and exit the function.
1943   // If loop does not result in returning false, we return true.
1944   __ Bind(&return_true);
1945   __ Mov(out, 1);
1946   __ B(final_label);
1947 
1948   // Return false and exit the function.
1949   __ Bind(&return_false);
1950   __ Mov(out, 0);
1951 
1952   if (end.IsReferenced()) {
1953     __ Bind(&end);
1954   }
1955 }
1956 
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,bool start_at_zero)1957 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1958                                        ArmVIXLAssembler* assembler,
1959                                        CodeGeneratorARMVIXL* codegen,
1960                                        bool start_at_zero) {
1961   LocationSummary* locations = invoke->GetLocations();
1962 
1963   // Note that the null check must have been done earlier.
1964   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1965 
1966   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1967   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1968   SlowPathCodeARMVIXL* slow_path = nullptr;
1969   HInstruction* code_point = invoke->InputAt(1);
1970   if (code_point->IsIntConstant()) {
1971     if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1972         std::numeric_limits<uint16_t>::max()) {
1973       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1974       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1975       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1976       codegen->AddSlowPath(slow_path);
1977       __ B(slow_path->GetEntryLabel());
1978       __ Bind(slow_path->GetExitLabel());
1979       return;
1980     }
1981   } else if (code_point->GetType() != DataType::Type::kUint16) {
1982     vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1983     // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1984     __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1985     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1986     codegen->AddSlowPath(slow_path);
1987     __ B(hs, slow_path->GetEntryLabel());
1988   }
1989 
1990   if (start_at_zero) {
1991     vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1992     DCHECK(tmp_reg.Is(r2));
1993     // Start-index = 0.
1994     __ Mov(tmp_reg, 0);
1995   }
1996 
1997   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1998   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1999 
2000   if (slow_path != nullptr) {
2001     __ Bind(slow_path->GetExitLabel());
2002   }
2003 }
2004 
VisitStringIndexOf(HInvoke * invoke)2005 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
2006   LocationSummary* locations = new (allocator_) LocationSummary(
2007       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2008   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2009   // best to align the inputs accordingly.
2010   InvokeRuntimeCallingConventionARMVIXL calling_convention;
2011   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2012   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2013   locations->SetOut(LocationFrom(r0));
2014 
2015   // Need to send start-index=0.
2016   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2017 }
2018 
VisitStringIndexOf(HInvoke * invoke)2019 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
2020   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true);
2021 }
2022 
VisitStringIndexOfAfter(HInvoke * invoke)2023 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
2024   LocationSummary* locations = new (allocator_) LocationSummary(
2025       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2026   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2027   // best to align the inputs accordingly.
2028   InvokeRuntimeCallingConventionARMVIXL calling_convention;
2029   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2030   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2031   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2032   locations->SetOut(LocationFrom(r0));
2033 }
2034 
VisitStringIndexOfAfter(HInvoke * invoke)2035 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
2036   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false);
2037 }
2038 
VisitStringNewStringFromBytes(HInvoke * invoke)2039 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
2040   LocationSummary* locations = new (allocator_) LocationSummary(
2041       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2042   InvokeRuntimeCallingConventionARMVIXL calling_convention;
2043   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2044   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2045   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2046   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
2047   locations->SetOut(LocationFrom(r0));
2048 }
2049 
VisitStringNewStringFromBytes(HInvoke * invoke)2050 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
2051   ArmVIXLAssembler* assembler = GetAssembler();
2052   vixl32::Register byte_array = InputRegisterAt(invoke, 0);
2053   __ Cmp(byte_array, 0);
2054   SlowPathCodeARMVIXL* slow_path =
2055       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2056   codegen_->AddSlowPath(slow_path);
2057   __ B(eq, slow_path->GetEntryLabel());
2058 
2059   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
2060   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
2061   __ Bind(slow_path->GetExitLabel());
2062 }
2063 
VisitStringNewStringFromChars(HInvoke * invoke)2064 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
2065   LocationSummary* locations =
2066       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2067   InvokeRuntimeCallingConventionARMVIXL calling_convention;
2068   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2069   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2070   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2071   locations->SetOut(LocationFrom(r0));
2072 }
2073 
VisitStringNewStringFromChars(HInvoke * invoke)2074 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
2075   // No need to emit code checking whether `locations->InAt(2)` is a null
2076   // pointer, as callers of the native method
2077   //
2078   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
2079   //
2080   // all include a null check on `data` before calling that method.
2081   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
2082   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
2083 }
2084 
VisitStringNewStringFromString(HInvoke * invoke)2085 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
2086   LocationSummary* locations = new (allocator_) LocationSummary(
2087       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
2088   InvokeRuntimeCallingConventionARMVIXL calling_convention;
2089   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2090   locations->SetOut(LocationFrom(r0));
2091 }
2092 
VisitStringNewStringFromString(HInvoke * invoke)2093 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
2094   ArmVIXLAssembler* assembler = GetAssembler();
2095   vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
2096   __ Cmp(string_to_copy, 0);
2097   SlowPathCodeARMVIXL* slow_path =
2098       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2099   codegen_->AddSlowPath(slow_path);
2100   __ B(eq, slow_path->GetEntryLabel());
2101 
2102   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
2103   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
2104 
2105   __ Bind(slow_path->GetExitLabel());
2106 }
2107 
VisitSystemArrayCopy(HInvoke * invoke)2108 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
2109   // The only read barrier implementation supporting the
2110   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2111   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2112     return;
2113   }
2114 
2115   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2116   LocationSummary* locations = invoke->GetLocations();
2117   if (locations == nullptr) {
2118     return;
2119   }
2120 
2121   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2122   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2123   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2124 
2125   if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
2126     locations->SetInAt(1, Location::RequiresRegister());
2127   }
2128   if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
2129     locations->SetInAt(3, Location::RequiresRegister());
2130   }
2131   if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
2132     locations->SetInAt(4, Location::RequiresRegister());
2133   }
2134   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2135     // Temporary register IP cannot be used in
2136     // ReadBarrierSystemArrayCopySlowPathARM (because that register
2137     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
2138     // temporary register from the register allocator.
2139     locations->AddTemp(Location::RequiresRegister());
2140     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_);
2141     arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
2142   }
2143 }
2144 
CheckPosition(ArmVIXLAssembler * assembler,Location pos,vixl32::Register input,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_input_length=false)2145 static void CheckPosition(ArmVIXLAssembler* assembler,
2146                           Location pos,
2147                           vixl32::Register input,
2148                           Location length,
2149                           SlowPathCodeARMVIXL* slow_path,
2150                           vixl32::Register temp,
2151                           bool length_is_input_length = false) {
2152   // Where is the length in the Array?
2153   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
2154 
2155   if (pos.IsConstant()) {
2156     int32_t pos_const = Int32ConstantFrom(pos);
2157     if (pos_const == 0) {
2158       if (!length_is_input_length) {
2159         // Check that length(input) >= length.
2160         __ Ldr(temp, MemOperand(input, length_offset));
2161         if (length.IsConstant()) {
2162           __ Cmp(temp, Int32ConstantFrom(length));
2163         } else {
2164           __ Cmp(temp, RegisterFrom(length));
2165         }
2166         __ B(lt, slow_path->GetEntryLabel());
2167       }
2168     } else {
2169       // Check that length(input) >= pos.
2170       __ Ldr(temp, MemOperand(input, length_offset));
2171       __ Subs(temp, temp, pos_const);
2172       __ B(lt, slow_path->GetEntryLabel());
2173 
2174       // Check that (length(input) - pos) >= length.
2175       if (length.IsConstant()) {
2176         __ Cmp(temp, Int32ConstantFrom(length));
2177       } else {
2178         __ Cmp(temp, RegisterFrom(length));
2179       }
2180       __ B(lt, slow_path->GetEntryLabel());
2181     }
2182   } else if (length_is_input_length) {
2183     // The only way the copy can succeed is if pos is zero.
2184     vixl32::Register pos_reg = RegisterFrom(pos);
2185     __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
2186   } else {
2187     // Check that pos >= 0.
2188     vixl32::Register pos_reg = RegisterFrom(pos);
2189     __ Cmp(pos_reg, 0);
2190     __ B(lt, slow_path->GetEntryLabel());
2191 
2192     // Check that pos <= length(input).
2193     __ Ldr(temp, MemOperand(input, length_offset));
2194     __ Subs(temp, temp, pos_reg);
2195     __ B(lt, slow_path->GetEntryLabel());
2196 
2197     // Check that (length(input) - pos) >= length.
2198     if (length.IsConstant()) {
2199       __ Cmp(temp, Int32ConstantFrom(length));
2200     } else {
2201       __ Cmp(temp, RegisterFrom(length));
2202     }
2203     __ B(lt, slow_path->GetEntryLabel());
2204   }
2205 }
2206 
VisitSystemArrayCopy(HInvoke * invoke)2207 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
2208   // The only read barrier implementation supporting the
2209   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2210   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2211 
2212   ArmVIXLAssembler* assembler = GetAssembler();
2213   LocationSummary* locations = invoke->GetLocations();
2214 
2215   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2216   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2217   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2218   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2219   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2220 
2221   vixl32::Register src = InputRegisterAt(invoke, 0);
2222   Location src_pos = locations->InAt(1);
2223   vixl32::Register dest = InputRegisterAt(invoke, 2);
2224   Location dest_pos = locations->InAt(3);
2225   Location length = locations->InAt(4);
2226   Location temp1_loc = locations->GetTemp(0);
2227   vixl32::Register temp1 = RegisterFrom(temp1_loc);
2228   Location temp2_loc = locations->GetTemp(1);
2229   vixl32::Register temp2 = RegisterFrom(temp2_loc);
2230   Location temp3_loc = locations->GetTemp(2);
2231   vixl32::Register temp3 = RegisterFrom(temp3_loc);
2232 
2233   SlowPathCodeARMVIXL* intrinsic_slow_path =
2234       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2235   codegen_->AddSlowPath(intrinsic_slow_path);
2236 
2237   vixl32::Label conditions_on_positions_validated;
2238   SystemArrayCopyOptimizations optimizations(invoke);
2239 
2240   // If source and destination are the same, we go to slow path if we need to do
2241   // forward copying.
2242   if (src_pos.IsConstant()) {
2243     int32_t src_pos_constant = Int32ConstantFrom(src_pos);
2244     if (dest_pos.IsConstant()) {
2245       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
2246       if (optimizations.GetDestinationIsSource()) {
2247         // Checked when building locations.
2248         DCHECK_GE(src_pos_constant, dest_pos_constant);
2249       } else if (src_pos_constant < dest_pos_constant) {
2250         __ Cmp(src, dest);
2251         __ B(eq, intrinsic_slow_path->GetEntryLabel());
2252       }
2253 
2254       // Checked when building locations.
2255       DCHECK(!optimizations.GetDestinationIsSource()
2256              || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
2257     } else {
2258       if (!optimizations.GetDestinationIsSource()) {
2259         __ Cmp(src, dest);
2260         __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
2261       }
2262       __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
2263       __ B(gt, intrinsic_slow_path->GetEntryLabel());
2264     }
2265   } else {
2266     if (!optimizations.GetDestinationIsSource()) {
2267       __ Cmp(src, dest);
2268       __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
2269     }
2270     if (dest_pos.IsConstant()) {
2271       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
2272       __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
2273     } else {
2274       __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
2275     }
2276     __ B(lt, intrinsic_slow_path->GetEntryLabel());
2277   }
2278 
2279   __ Bind(&conditions_on_positions_validated);
2280 
2281   if (!optimizations.GetSourceIsNotNull()) {
2282     // Bail out if the source is null.
2283     __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
2284   }
2285 
2286   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2287     // Bail out if the destination is null.
2288     __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
2289   }
2290 
2291   // If the length is negative, bail out.
2292   // We have already checked in the LocationsBuilder for the constant case.
2293   if (!length.IsConstant() &&
2294       !optimizations.GetCountIsSourceLength() &&
2295       !optimizations.GetCountIsDestinationLength()) {
2296     __ Cmp(RegisterFrom(length), 0);
2297     __ B(lt, intrinsic_slow_path->GetEntryLabel());
2298   }
2299 
2300   // Validity checks: source.
2301   CheckPosition(assembler,
2302                 src_pos,
2303                 src,
2304                 length,
2305                 intrinsic_slow_path,
2306                 temp1,
2307                 optimizations.GetCountIsSourceLength());
2308 
2309   // Validity checks: dest.
2310   CheckPosition(assembler,
2311                 dest_pos,
2312                 dest,
2313                 length,
2314                 intrinsic_slow_path,
2315                 temp1,
2316                 optimizations.GetCountIsDestinationLength());
2317 
2318   if (!optimizations.GetDoesNotNeedTypeCheck()) {
2319     // Check whether all elements of the source array are assignable to the component
2320     // type of the destination array. We do two checks: the classes are the same,
2321     // or the destination is Object[]. If none of these checks succeed, we go to the
2322     // slow path.
2323 
2324     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2325       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2326         // /* HeapReference<Class> */ temp1 = src->klass_
2327         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2328             invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
2329         // Bail out if the source is not a non primitive array.
2330         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2331         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2332             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
2333         __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
2334         // If heap poisoning is enabled, `temp1` has been unpoisoned
2335         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2336         // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2337         __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
2338         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2339         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2340       }
2341 
2342       // /* HeapReference<Class> */ temp1 = dest->klass_
2343       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2344           invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
2345 
2346       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2347         // Bail out if the destination is not a non primitive array.
2348         //
2349         // Register `temp1` is not trashed by the read barrier emitted
2350         // by GenerateFieldLoadWithBakerReadBarrier below, as that
2351         // method produces a call to a ReadBarrierMarkRegX entry point,
2352         // which saves all potentially live registers, including
2353         // temporaries such a `temp1`.
2354         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2355         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2356             invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
2357         __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
2358         // If heap poisoning is enabled, `temp2` has been unpoisoned
2359         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2360         // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2361         __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
2362         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2363         __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
2364       }
2365 
2366       // For the same reason given earlier, `temp1` is not trashed by the
2367       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2368       // /* HeapReference<Class> */ temp2 = src->klass_
2369       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2370           invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
2371       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2372       __ Cmp(temp1, temp2);
2373 
2374       if (optimizations.GetDestinationIsTypedObjectArray()) {
2375         vixl32::Label do_copy;
2376         __ B(eq, &do_copy, /* far_target */ false);
2377         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2378         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2379             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
2380         // /* HeapReference<Class> */ temp1 = temp1->super_class_
2381         // We do not need to emit a read barrier for the following
2382         // heap reference load, as `temp1` is only used in a
2383         // comparison with null below, and this reference is not
2384         // kept afterwards.
2385         __ Ldr(temp1, MemOperand(temp1, super_offset));
2386         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2387         __ Bind(&do_copy);
2388       } else {
2389         __ B(ne, intrinsic_slow_path->GetEntryLabel());
2390       }
2391     } else {
2392       // Non read barrier code.
2393 
2394       // /* HeapReference<Class> */ temp1 = dest->klass_
2395       __ Ldr(temp1, MemOperand(dest, class_offset));
2396       // /* HeapReference<Class> */ temp2 = src->klass_
2397       __ Ldr(temp2, MemOperand(src, class_offset));
2398       bool did_unpoison = false;
2399       if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2400           !optimizations.GetSourceIsNonPrimitiveArray()) {
2401         // One or two of the references need to be unpoisoned. Unpoison them
2402         // both to make the identity check valid.
2403         assembler->MaybeUnpoisonHeapReference(temp1);
2404         assembler->MaybeUnpoisonHeapReference(temp2);
2405         did_unpoison = true;
2406       }
2407 
2408       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2409         // Bail out if the destination is not a non primitive array.
2410         // /* HeapReference<Class> */ temp3 = temp1->component_type_
2411         __ Ldr(temp3, MemOperand(temp1, component_offset));
2412         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2413         assembler->MaybeUnpoisonHeapReference(temp3);
2414         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2415         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2416         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2417         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2418       }
2419 
2420       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2421         // Bail out if the source is not a non primitive array.
2422         // /* HeapReference<Class> */ temp3 = temp2->component_type_
2423         __ Ldr(temp3, MemOperand(temp2, component_offset));
2424         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2425         assembler->MaybeUnpoisonHeapReference(temp3);
2426         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2427         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2428         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2429         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2430       }
2431 
2432       __ Cmp(temp1, temp2);
2433 
2434       if (optimizations.GetDestinationIsTypedObjectArray()) {
2435         vixl32::Label do_copy;
2436         __ B(eq, &do_copy, /* far_target */ false);
2437         if (!did_unpoison) {
2438           assembler->MaybeUnpoisonHeapReference(temp1);
2439         }
2440         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2441         __ Ldr(temp1, MemOperand(temp1, component_offset));
2442         assembler->MaybeUnpoisonHeapReference(temp1);
2443         // /* HeapReference<Class> */ temp1 = temp1->super_class_
2444         __ Ldr(temp1, MemOperand(temp1, super_offset));
2445         // No need to unpoison the result, we're comparing against null.
2446         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2447         __ Bind(&do_copy);
2448       } else {
2449         __ B(ne, intrinsic_slow_path->GetEntryLabel());
2450       }
2451     }
2452   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2453     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2454     // Bail out if the source is not a non primitive array.
2455     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2456       // /* HeapReference<Class> */ temp1 = src->klass_
2457       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2458           invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
2459       // /* HeapReference<Class> */ temp3 = temp1->component_type_
2460       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2461           invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
2462       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2463       // If heap poisoning is enabled, `temp3` has been unpoisoned
2464       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2465     } else {
2466       // /* HeapReference<Class> */ temp1 = src->klass_
2467       __ Ldr(temp1, MemOperand(src, class_offset));
2468       assembler->MaybeUnpoisonHeapReference(temp1);
2469       // /* HeapReference<Class> */ temp3 = temp1->component_type_
2470       __ Ldr(temp3, MemOperand(temp1, component_offset));
2471       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2472       assembler->MaybeUnpoisonHeapReference(temp3);
2473     }
2474     // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2475     __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2476     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2477     __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2478   }
2479 
2480   if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
2481     // Null constant length: not need to emit the loop code at all.
2482   } else {
2483     vixl32::Label done;
2484     const DataType::Type type = DataType::Type::kReference;
2485     const int32_t element_size = DataType::Size(type);
2486 
2487     if (length.IsRegister()) {
2488       // Don't enter the copy loop if the length is null.
2489       __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false);
2490     }
2491 
2492     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2493       // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2494 
2495       // SystemArrayCopy implementation for Baker read barriers (see
2496       // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
2497       //
2498       //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2499       //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
2500       //   bool is_gray = (rb_state == ReadBarrier::GrayState());
2501       //   if (is_gray) {
2502       //     // Slow-path copy.
2503       //     do {
2504       //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2505       //     } while (src_ptr != end_ptr)
2506       //   } else {
2507       //     // Fast-path copy.
2508       //     do {
2509       //       *dest_ptr++ = *src_ptr++;
2510       //     } while (src_ptr != end_ptr)
2511       //   }
2512 
2513       // /* int32_t */ monitor = src->monitor_
2514       __ Ldr(temp2, MemOperand(src, monitor_offset));
2515       // /* LockWord */ lock_word = LockWord(monitor)
2516       static_assert(sizeof(LockWord) == sizeof(int32_t),
2517                     "art::LockWord and int32_t have different sizes.");
2518 
2519       // Introduce a dependency on the lock_word including the rb_state,
2520       // which shall prevent load-load reordering without using
2521       // a memory barrier (which would be more expensive).
2522       // `src` is unchanged by this operation, but its value now depends
2523       // on `temp2`.
2524       __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2525 
2526       // Compute the base source address in `temp1`.
2527       // Note that `temp1` (the base source address) is computed from
2528       // `src` (and `src_pos`) here, and thus honors the artificial
2529       // dependency of `src` on `temp2`.
2530       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2531       // Compute the end source address in `temp3`.
2532       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2533       // The base destination address is computed later, as `temp2` is
2534       // used for intermediate computations.
2535 
2536       // Slow path used to copy array when `src` is gray.
2537       // Note that the base destination address is computed in `temp2`
2538       // by the slow path code.
2539       SlowPathCodeARMVIXL* read_barrier_slow_path =
2540           new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2541       codegen_->AddSlowPath(read_barrier_slow_path);
2542 
2543       // Given the numeric representation, it's enough to check the low bit of the
2544       // rb_state. We do that by shifting the bit out of the lock word with LSRS
2545       // which can be a 16-bit instruction unlike the TST immediate.
2546       static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2547       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2548       __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2549       // Carry flag is the last bit shifted out by LSRS.
2550       __ B(cs, read_barrier_slow_path->GetEntryLabel());
2551 
2552       // Fast-path copy.
2553       // Compute the base destination address in `temp2`.
2554       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2555       // Iterate over the arrays and do a raw copy of the objects. We don't need to
2556       // poison/unpoison.
2557       vixl32::Label loop;
2558       __ Bind(&loop);
2559       {
2560         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2561         const vixl32::Register temp_reg = temps.Acquire();
2562         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2563         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2564       }
2565       __ Cmp(temp1, temp3);
2566       __ B(ne, &loop, /* far_target */ false);
2567 
2568       __ Bind(read_barrier_slow_path->GetExitLabel());
2569     } else {
2570       // Non read barrier code.
2571       // Compute the base source address in `temp1`.
2572       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2573       // Compute the base destination address in `temp2`.
2574       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2575       // Compute the end source address in `temp3`.
2576       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2577       // Iterate over the arrays and do a raw copy of the objects. We don't need to
2578       // poison/unpoison.
2579       vixl32::Label loop;
2580       __ Bind(&loop);
2581       {
2582         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2583         const vixl32::Register temp_reg = temps.Acquire();
2584         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2585         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2586       }
2587       __ Cmp(temp1, temp3);
2588       __ B(ne, &loop, /* far_target */ false);
2589     }
2590     __ Bind(&done);
2591   }
2592 
2593   // We only need one card marking on the destination array.
2594   codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
2595 
2596   __ Bind(intrinsic_slow_path->GetExitLabel());
2597 }
2598 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2599 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2600   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2601   // the code generator. Furthermore, the register allocator creates fixed live intervals
2602   // for all caller-saved registers because we are doing a function call. As a result, if
2603   // the input and output locations are unallocated, the register allocator runs out of
2604   // registers and fails; however, a debuggable graph is not the common case.
2605   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2606     return;
2607   }
2608 
2609   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2610   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
2611   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
2612 
2613   LocationSummary* const locations =
2614       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2615   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2616 
2617   locations->SetInAt(0, Location::RequiresFpuRegister());
2618   locations->SetOut(Location::RequiresFpuRegister());
2619   // Native code uses the soft float ABI.
2620   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2621   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2622 }
2623 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2624 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2625   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2626   // the code generator. Furthermore, the register allocator creates fixed live intervals
2627   // for all caller-saved registers because we are doing a function call. As a result, if
2628   // the input and output locations are unallocated, the register allocator runs out of
2629   // registers and fails; however, a debuggable graph is not the common case.
2630   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2631     return;
2632   }
2633 
2634   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2635   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
2636   DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
2637   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
2638 
2639   LocationSummary* const locations =
2640       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2641   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2642 
2643   locations->SetInAt(0, Location::RequiresFpuRegister());
2644   locations->SetInAt(1, Location::RequiresFpuRegister());
2645   locations->SetOut(Location::RequiresFpuRegister());
2646   // Native code uses the soft float ABI.
2647   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2648   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2649   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2650   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2651 }
2652 
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)2653 static void GenFPToFPCall(HInvoke* invoke,
2654                           ArmVIXLAssembler* assembler,
2655                           CodeGeneratorARMVIXL* codegen,
2656                           QuickEntrypointEnum entry) {
2657   LocationSummary* const locations = invoke->GetLocations();
2658 
2659   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2660   DCHECK(locations->WillCall() && locations->Intrinsified());
2661 
2662   // Native code uses the soft float ABI.
2663   __ Vmov(RegisterFrom(locations->GetTemp(0)),
2664           RegisterFrom(locations->GetTemp(1)),
2665           InputDRegisterAt(invoke, 0));
2666   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2667   __ Vmov(OutputDRegister(invoke),
2668           RegisterFrom(locations->GetTemp(0)),
2669           RegisterFrom(locations->GetTemp(1)));
2670 }
2671 
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)2672 static void GenFPFPToFPCall(HInvoke* invoke,
2673                             ArmVIXLAssembler* assembler,
2674                             CodeGeneratorARMVIXL* codegen,
2675                             QuickEntrypointEnum entry) {
2676   LocationSummary* const locations = invoke->GetLocations();
2677 
2678   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2679   DCHECK(locations->WillCall() && locations->Intrinsified());
2680 
2681   // Native code uses the soft float ABI.
2682   __ Vmov(RegisterFrom(locations->GetTemp(0)),
2683           RegisterFrom(locations->GetTemp(1)),
2684           InputDRegisterAt(invoke, 0));
2685   __ Vmov(RegisterFrom(locations->GetTemp(2)),
2686           RegisterFrom(locations->GetTemp(3)),
2687           InputDRegisterAt(invoke, 1));
2688   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2689   __ Vmov(OutputDRegister(invoke),
2690           RegisterFrom(locations->GetTemp(0)),
2691           RegisterFrom(locations->GetTemp(1)));
2692 }
2693 
VisitMathCos(HInvoke * invoke)2694 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2695   CreateFPToFPCallLocations(allocator_, invoke);
2696 }
2697 
VisitMathCos(HInvoke * invoke)2698 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2699   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2700 }
2701 
VisitMathSin(HInvoke * invoke)2702 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2703   CreateFPToFPCallLocations(allocator_, invoke);
2704 }
2705 
VisitMathSin(HInvoke * invoke)2706 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2707   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2708 }
2709 
VisitMathAcos(HInvoke * invoke)2710 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2711   CreateFPToFPCallLocations(allocator_, invoke);
2712 }
2713 
VisitMathAcos(HInvoke * invoke)2714 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2715   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2716 }
2717 
VisitMathAsin(HInvoke * invoke)2718 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2719   CreateFPToFPCallLocations(allocator_, invoke);
2720 }
2721 
VisitMathAsin(HInvoke * invoke)2722 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2723   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2724 }
2725 
VisitMathAtan(HInvoke * invoke)2726 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2727   CreateFPToFPCallLocations(allocator_, invoke);
2728 }
2729 
VisitMathAtan(HInvoke * invoke)2730 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2731   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2732 }
2733 
VisitMathCbrt(HInvoke * invoke)2734 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2735   CreateFPToFPCallLocations(allocator_, invoke);
2736 }
2737 
VisitMathCbrt(HInvoke * invoke)2738 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2739   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2740 }
2741 
VisitMathCosh(HInvoke * invoke)2742 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2743   CreateFPToFPCallLocations(allocator_, invoke);
2744 }
2745 
VisitMathCosh(HInvoke * invoke)2746 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2747   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2748 }
2749 
VisitMathExp(HInvoke * invoke)2750 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2751   CreateFPToFPCallLocations(allocator_, invoke);
2752 }
2753 
VisitMathExp(HInvoke * invoke)2754 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2755   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2756 }
2757 
VisitMathExpm1(HInvoke * invoke)2758 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2759   CreateFPToFPCallLocations(allocator_, invoke);
2760 }
2761 
VisitMathExpm1(HInvoke * invoke)2762 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2763   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2764 }
2765 
VisitMathLog(HInvoke * invoke)2766 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2767   CreateFPToFPCallLocations(allocator_, invoke);
2768 }
2769 
VisitMathLog(HInvoke * invoke)2770 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2771   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2772 }
2773 
VisitMathLog10(HInvoke * invoke)2774 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2775   CreateFPToFPCallLocations(allocator_, invoke);
2776 }
2777 
VisitMathLog10(HInvoke * invoke)2778 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2779   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2780 }
2781 
VisitMathSinh(HInvoke * invoke)2782 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2783   CreateFPToFPCallLocations(allocator_, invoke);
2784 }
2785 
VisitMathSinh(HInvoke * invoke)2786 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2787   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2788 }
2789 
VisitMathTan(HInvoke * invoke)2790 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2791   CreateFPToFPCallLocations(allocator_, invoke);
2792 }
2793 
VisitMathTan(HInvoke * invoke)2794 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2795   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2796 }
2797 
VisitMathTanh(HInvoke * invoke)2798 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2799   CreateFPToFPCallLocations(allocator_, invoke);
2800 }
2801 
VisitMathTanh(HInvoke * invoke)2802 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2803   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2804 }
2805 
VisitMathAtan2(HInvoke * invoke)2806 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2807   CreateFPFPToFPCallLocations(allocator_, invoke);
2808 }
2809 
VisitMathAtan2(HInvoke * invoke)2810 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2811   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2812 }
2813 
VisitMathPow(HInvoke * invoke)2814 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
2815   CreateFPFPToFPCallLocations(allocator_, invoke);
2816 }
2817 
VisitMathPow(HInvoke * invoke)2818 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
2819   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
2820 }
2821 
VisitMathHypot(HInvoke * invoke)2822 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2823   CreateFPFPToFPCallLocations(allocator_, invoke);
2824 }
2825 
VisitMathHypot(HInvoke * invoke)2826 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2827   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2828 }
2829 
VisitMathNextAfter(HInvoke * invoke)2830 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2831   CreateFPFPToFPCallLocations(allocator_, invoke);
2832 }
2833 
VisitMathNextAfter(HInvoke * invoke)2834 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2835   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2836 }
2837 
VisitIntegerReverse(HInvoke * invoke)2838 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2839   CreateIntToIntLocations(allocator_, invoke);
2840 }
2841 
VisitIntegerReverse(HInvoke * invoke)2842 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2843   ArmVIXLAssembler* assembler = GetAssembler();
2844   __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2845 }
2846 
VisitLongReverse(HInvoke * invoke)2847 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
2848   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2849 }
2850 
VisitLongReverse(HInvoke * invoke)2851 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2852   ArmVIXLAssembler* assembler = GetAssembler();
2853   LocationSummary* locations = invoke->GetLocations();
2854 
2855   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
2856   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
2857   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2858   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2859 
2860   __ Rbit(out_reg_lo, in_reg_hi);
2861   __ Rbit(out_reg_hi, in_reg_lo);
2862 }
2863 
VisitIntegerReverseBytes(HInvoke * invoke)2864 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2865   CreateIntToIntLocations(allocator_, invoke);
2866 }
2867 
VisitIntegerReverseBytes(HInvoke * invoke)2868 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2869   ArmVIXLAssembler* assembler = GetAssembler();
2870   __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2871 }
2872 
VisitLongReverseBytes(HInvoke * invoke)2873 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2874   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2875 }
2876 
VisitLongReverseBytes(HInvoke * invoke)2877 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2878   ArmVIXLAssembler* assembler = GetAssembler();
2879   LocationSummary* locations = invoke->GetLocations();
2880 
2881   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
2882   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
2883   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2884   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2885 
2886   __ Rev(out_reg_lo, in_reg_hi);
2887   __ Rev(out_reg_hi, in_reg_lo);
2888 }
2889 
VisitShortReverseBytes(HInvoke * invoke)2890 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2891   CreateIntToIntLocations(allocator_, invoke);
2892 }
2893 
VisitShortReverseBytes(HInvoke * invoke)2894 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2895   ArmVIXLAssembler* assembler = GetAssembler();
2896   __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2897 }
2898 
GenBitCount(HInvoke * instr,DataType::Type type,ArmVIXLAssembler * assembler)2899 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
2900   DCHECK(DataType::IsIntOrLongType(type)) << type;
2901   DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
2902   DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
2903 
2904   bool is_long = type == DataType::Type::kInt64;
2905   LocationSummary* locations = instr->GetLocations();
2906   Location in = locations->InAt(0);
2907   vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2908   vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2909   vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2910   vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2911   vixl32::Register  out_r = OutputRegister(instr);
2912 
2913   // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2914   // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2915   // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2916   // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2917   __ Vmov(tmp_d, src_1, src_0);     // Temp DReg |--src_1|--src_0|
2918   __ Vcnt(Untyped8, tmp_d, tmp_d);  // Temp DReg |c|c|c|c|c|c|c|c|
2919   __ Vpaddl(U8, tmp_d, tmp_d);      // Temp DReg |--c|--c|--c|--c|
2920   __ Vpaddl(U16, tmp_d, tmp_d);     // Temp DReg |------c|------c|
2921   if (is_long) {
2922     __ Vpaddl(U32, tmp_d, tmp_d);   // Temp DReg |--------------c|
2923   }
2924   __ Vmov(out_r, tmp_s);
2925 }
2926 
VisitIntegerBitCount(HInvoke * invoke)2927 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2928   CreateIntToIntLocations(allocator_, invoke);
2929   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2930 }
2931 
VisitIntegerBitCount(HInvoke * invoke)2932 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2933   GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
2934 }
2935 
VisitLongBitCount(HInvoke * invoke)2936 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2937   VisitIntegerBitCount(invoke);
2938 }
2939 
VisitLongBitCount(HInvoke * invoke)2940 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2941   GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
2942 }
2943 
GenHighestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2944 static void GenHighestOneBit(HInvoke* invoke,
2945                              DataType::Type type,
2946                              CodeGeneratorARMVIXL* codegen) {
2947   DCHECK(DataType::IsIntOrLongType(type));
2948 
2949   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2950   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2951   const vixl32::Register temp = temps.Acquire();
2952 
2953   if (type == DataType::Type::kInt64) {
2954     LocationSummary* locations = invoke->GetLocations();
2955     Location in = locations->InAt(0);
2956     Location out = locations->Out();
2957 
2958     vixl32::Register in_reg_lo = LowRegisterFrom(in);
2959     vixl32::Register in_reg_hi = HighRegisterFrom(in);
2960     vixl32::Register out_reg_lo = LowRegisterFrom(out);
2961     vixl32::Register out_reg_hi = HighRegisterFrom(out);
2962 
2963     __ Mov(temp, 0x80000000);  // Modified immediate.
2964     __ Clz(out_reg_lo, in_reg_lo);
2965     __ Clz(out_reg_hi, in_reg_hi);
2966     __ Lsr(out_reg_lo, temp, out_reg_lo);
2967     __ Lsrs(out_reg_hi, temp, out_reg_hi);
2968 
2969     // Discard result for lowest 32 bits if highest 32 bits are not zero.
2970     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2971     // we check that the output is in a low register, so that a 16-bit MOV
2972     // encoding can be used. If output is in a high register, then we generate
2973     // 4 more bytes of code to avoid a branch.
2974     Operand mov_src(0);
2975     if (!out_reg_lo.IsLow()) {
2976       __ Mov(LeaveFlags, temp, 0);
2977       mov_src = Operand(temp);
2978     }
2979     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2980                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2981                                   CodeBufferCheckScope::kExactSize);
2982     __ it(ne);
2983     __ mov(ne, out_reg_lo, mov_src);
2984   } else {
2985     vixl32::Register out = OutputRegister(invoke);
2986     vixl32::Register in = InputRegisterAt(invoke, 0);
2987 
2988     __ Mov(temp, 0x80000000);  // Modified immediate.
2989     __ Clz(out, in);
2990     __ Lsr(out, temp, out);
2991   }
2992 }
2993 
VisitIntegerHighestOneBit(HInvoke * invoke)2994 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2995   CreateIntToIntLocations(allocator_, invoke);
2996 }
2997 
VisitIntegerHighestOneBit(HInvoke * invoke)2998 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2999   GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
3000 }
3001 
VisitLongHighestOneBit(HInvoke * invoke)3002 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
3003   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
3004 }
3005 
VisitLongHighestOneBit(HInvoke * invoke)3006 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
3007   GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
3008 }
3009 
GenLowestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)3010 static void GenLowestOneBit(HInvoke* invoke,
3011                             DataType::Type type,
3012                             CodeGeneratorARMVIXL* codegen) {
3013   DCHECK(DataType::IsIntOrLongType(type));
3014 
3015   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3016   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3017   const vixl32::Register temp = temps.Acquire();
3018 
3019   if (type == DataType::Type::kInt64) {
3020     LocationSummary* locations = invoke->GetLocations();
3021     Location in = locations->InAt(0);
3022     Location out = locations->Out();
3023 
3024     vixl32::Register in_reg_lo = LowRegisterFrom(in);
3025     vixl32::Register in_reg_hi = HighRegisterFrom(in);
3026     vixl32::Register out_reg_lo = LowRegisterFrom(out);
3027     vixl32::Register out_reg_hi = HighRegisterFrom(out);
3028 
3029     __ Rsb(out_reg_hi, in_reg_hi, 0);
3030     __ Rsb(out_reg_lo, in_reg_lo, 0);
3031     __ And(out_reg_hi, out_reg_hi, in_reg_hi);
3032     // The result of this operation is 0 iff in_reg_lo is 0
3033     __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
3034 
3035     // Discard result for highest 32 bits if lowest 32 bits are not zero.
3036     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
3037     // we check that the output is in a low register, so that a 16-bit MOV
3038     // encoding can be used. If output is in a high register, then we generate
3039     // 4 more bytes of code to avoid a branch.
3040     Operand mov_src(0);
3041     if (!out_reg_lo.IsLow()) {
3042       __ Mov(LeaveFlags, temp, 0);
3043       mov_src = Operand(temp);
3044     }
3045     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
3046                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
3047                                   CodeBufferCheckScope::kExactSize);
3048     __ it(ne);
3049     __ mov(ne, out_reg_hi, mov_src);
3050   } else {
3051     vixl32::Register out = OutputRegister(invoke);
3052     vixl32::Register in = InputRegisterAt(invoke, 0);
3053 
3054     __ Rsb(temp, in, 0);
3055     __ And(out, temp, in);
3056   }
3057 }
3058 
VisitIntegerLowestOneBit(HInvoke * invoke)3059 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
3060   CreateIntToIntLocations(allocator_, invoke);
3061 }
3062 
VisitIntegerLowestOneBit(HInvoke * invoke)3063 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
3064   GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
3065 }
3066 
VisitLongLowestOneBit(HInvoke * invoke)3067 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
3068   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
3069 }
3070 
VisitLongLowestOneBit(HInvoke * invoke)3071 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
3072   GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
3073 }
3074 
VisitStringGetCharsNoCheck(HInvoke * invoke)3075 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
3076   LocationSummary* locations =
3077       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3078   locations->SetInAt(0, Location::RequiresRegister());
3079   locations->SetInAt(1, Location::RequiresRegister());
3080   locations->SetInAt(2, Location::RequiresRegister());
3081   locations->SetInAt(3, Location::RequiresRegister());
3082   locations->SetInAt(4, Location::RequiresRegister());
3083 
3084   // Temporary registers to store lengths of strings and for calculations.
3085   locations->AddTemp(Location::RequiresRegister());
3086   locations->AddTemp(Location::RequiresRegister());
3087   locations->AddTemp(Location::RequiresRegister());
3088 }
3089 
VisitStringGetCharsNoCheck(HInvoke * invoke)3090 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
3091   ArmVIXLAssembler* assembler = GetAssembler();
3092   LocationSummary* locations = invoke->GetLocations();
3093 
3094   // Check assumption that sizeof(Char) is 2 (used in scaling below).
3095   const size_t char_size = DataType::Size(DataType::Type::kUint16);
3096   DCHECK_EQ(char_size, 2u);
3097 
3098   // Location of data in char array buffer.
3099   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
3100 
3101   // Location of char array data in string.
3102   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
3103 
3104   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
3105   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
3106   vixl32::Register srcObj = InputRegisterAt(invoke, 0);
3107   vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
3108   vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
3109   vixl32::Register dstObj = InputRegisterAt(invoke, 3);
3110   vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
3111 
3112   vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
3113   vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
3114   vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
3115 
3116   vixl32::Label done, compressed_string_loop;
3117   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
3118   // dst to be copied.
3119   __ Add(dst_ptr, dstObj, data_offset);
3120   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
3121 
3122   __ Subs(num_chr, srcEnd, srcBegin);
3123   // Early out for valid zero-length retrievals.
3124   __ B(eq, final_label, /* far_target */ false);
3125 
3126   // src range to copy.
3127   __ Add(src_ptr, srcObj, value_offset);
3128 
3129   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3130   vixl32::Register temp;
3131   vixl32::Label compressed_string_preloop;
3132   if (mirror::kUseStringCompression) {
3133     // Location of count in string.
3134     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
3135     temp = temps.Acquire();
3136     // String's length.
3137     __ Ldr(temp, MemOperand(srcObj, count_offset));
3138     __ Tst(temp, 1);
3139     temps.Release(temp);
3140     __ B(eq, &compressed_string_preloop, /* far_target */ false);
3141   }
3142   __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
3143 
3144   // Do the copy.
3145   vixl32::Label loop, remainder;
3146 
3147   temp = temps.Acquire();
3148   // Save repairing the value of num_chr on the < 4 character path.
3149   __ Subs(temp, num_chr, 4);
3150   __ B(lt, &remainder, /* far_target */ false);
3151 
3152   // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
3153   __ Mov(num_chr, temp);
3154 
3155   // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
3156   // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
3157   // to rectify these everywhere this intrinsic applies.)
3158   __ Bind(&loop);
3159   __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
3160   __ Subs(num_chr, num_chr, 4);
3161   __ Str(temp, MemOperand(dst_ptr, char_size * 2));
3162   __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
3163   __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
3164   temps.Release(temp);
3165   __ B(ge, &loop, /* far_target */ false);
3166 
3167   __ Adds(num_chr, num_chr, 4);
3168   __ B(eq, final_label, /* far_target */ false);
3169 
3170   // Main loop for < 4 character case and remainder handling. Loads and stores one
3171   // 16-bit Java character at a time.
3172   __ Bind(&remainder);
3173   temp = temps.Acquire();
3174   __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
3175   __ Subs(num_chr, num_chr, 1);
3176   __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
3177   temps.Release(temp);
3178   __ B(gt, &remainder, /* far_target */ false);
3179 
3180   if (mirror::kUseStringCompression) {
3181     __ B(final_label);
3182 
3183     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
3184     DCHECK_EQ(c_char_size, 1u);
3185     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
3186     __ Bind(&compressed_string_preloop);
3187     __ Add(src_ptr, src_ptr, srcBegin);
3188     __ Bind(&compressed_string_loop);
3189     temp = temps.Acquire();
3190     __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
3191     __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
3192     temps.Release(temp);
3193     __ Subs(num_chr, num_chr, 1);
3194     __ B(gt, &compressed_string_loop, /* far_target */ false);
3195   }
3196 
3197   if (done.IsReferenced()) {
3198     __ Bind(&done);
3199   }
3200 }
3201 
VisitFloatIsInfinite(HInvoke * invoke)3202 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
3203   CreateFPToIntLocations(allocator_, invoke);
3204 }
3205 
VisitFloatIsInfinite(HInvoke * invoke)3206 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
3207   ArmVIXLAssembler* const assembler = GetAssembler();
3208   const vixl32::Register out = OutputRegister(invoke);
3209   // Shifting left by 1 bit makes the value encodable as an immediate operand;
3210   // we don't care about the sign bit anyway.
3211   constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
3212 
3213   __ Vmov(out, InputSRegisterAt(invoke, 0));
3214   // We don't care about the sign bit, so shift left.
3215   __ Lsl(out, out, 1);
3216   __ Eor(out, out, infinity);
3217   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
3218 }
3219 
VisitDoubleIsInfinite(HInvoke * invoke)3220 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
3221   CreateFPToIntLocations(allocator_, invoke);
3222 }
3223 
VisitDoubleIsInfinite(HInvoke * invoke)3224 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
3225   ArmVIXLAssembler* const assembler = GetAssembler();
3226   const vixl32::Register out = OutputRegister(invoke);
3227   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3228   const vixl32::Register temp = temps.Acquire();
3229   // The highest 32 bits of double precision positive infinity separated into
3230   // two constants encodable as immediate operands.
3231   constexpr uint32_t infinity_high  = 0x7f000000U;
3232   constexpr uint32_t infinity_high2 = 0x00f00000U;
3233 
3234   static_assert((infinity_high | infinity_high2) ==
3235                     static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
3236                 "The constants do not add up to the high 32 bits of double "
3237                 "precision positive infinity.");
3238   __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
3239   __ Eor(out, out, infinity_high);
3240   __ Eor(out, out, infinity_high2);
3241   // We don't care about the sign bit, so shift left.
3242   __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
3243   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
3244 }
3245 
VisitMathCeil(HInvoke * invoke)3246 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
3247   if (features_.HasARMv8AInstructions()) {
3248     CreateFPToFPLocations(allocator_, invoke);
3249   }
3250 }
3251 
VisitMathCeil(HInvoke * invoke)3252 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
3253   ArmVIXLAssembler* assembler = GetAssembler();
3254   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
3255   __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
3256 }
3257 
VisitMathFloor(HInvoke * invoke)3258 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
3259   if (features_.HasARMv8AInstructions()) {
3260     CreateFPToFPLocations(allocator_, invoke);
3261   }
3262 }
3263 
VisitMathFloor(HInvoke * invoke)3264 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
3265   ArmVIXLAssembler* assembler = GetAssembler();
3266   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
3267   __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
3268 }
3269 
VisitIntegerValueOf(HInvoke * invoke)3270 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
3271   InvokeRuntimeCallingConventionARMVIXL calling_convention;
3272   IntrinsicVisitor::ComputeIntegerValueOfLocations(
3273       invoke,
3274       codegen_,
3275       LocationFrom(r0),
3276       LocationFrom(calling_convention.GetRegisterAt(0)));
3277 }
3278 
VisitIntegerValueOf(HInvoke * invoke)3279 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
3280   IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
3281   LocationSummary* locations = invoke->GetLocations();
3282   ArmVIXLAssembler* const assembler = GetAssembler();
3283 
3284   vixl32::Register out = RegisterFrom(locations->Out());
3285   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3286   vixl32::Register temp = temps.Acquire();
3287   InvokeRuntimeCallingConventionARMVIXL calling_convention;
3288   vixl32::Register argument = calling_convention.GetRegisterAt(0);
3289   if (invoke->InputAt(0)->IsConstant()) {
3290     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3291     if (value >= info.low && value <= info.high) {
3292       // Just embed the j.l.Integer in the code.
3293       ScopedObjectAccess soa(Thread::Current());
3294       mirror::Object* boxed = info.cache->Get(value + (-info.low));
3295       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
3296       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
3297       __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
3298     } else {
3299       // Allocate and initialize a new j.l.Integer.
3300       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3301       // JIT object table.
3302       uint32_t address =
3303           dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3304       __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
3305       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3306       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3307       __ Mov(temp, value);
3308       assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
3309       // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3310       // one.
3311       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3312     }
3313   } else {
3314     vixl32::Register in = RegisterFrom(locations->InAt(0));
3315     // Check bounds of our cache.
3316     __ Add(out, in, -info.low);
3317     __ Cmp(out, info.high - info.low + 1);
3318     vixl32::Label allocate, done;
3319     __ B(hs, &allocate, /* is_far_target */ false);
3320     // If the value is within the bounds, load the j.l.Integer directly from the array.
3321     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3322     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3323     __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
3324     codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
3325     assembler->MaybeUnpoisonHeapReference(out);
3326     __ B(&done);
3327     __ Bind(&allocate);
3328     // Otherwise allocate and initialize a new j.l.Integer.
3329     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3330     __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
3331     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3332     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3333     assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
3334     // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3335     // one.
3336     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3337     __ Bind(&done);
3338   }
3339 }
3340 
VisitThreadInterrupted(HInvoke * invoke)3341 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
3342   LocationSummary* locations =
3343       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3344   locations->SetOut(Location::RequiresRegister());
3345 }
3346 
VisitThreadInterrupted(HInvoke * invoke)3347 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
3348   ArmVIXLAssembler* assembler = GetAssembler();
3349   vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
3350   int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
3351   __ Ldr(out, MemOperand(tr, offset));
3352   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3353   vixl32::Register temp = temps.Acquire();
3354   vixl32::Label done;
3355   vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
3356   __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
3357   __ Dmb(vixl32::ISH);
3358   __ Mov(temp, 0);
3359   assembler->StoreToOffset(kStoreWord, temp, tr, offset);
3360   __ Dmb(vixl32::ISH);
3361   if (done.IsReferenced()) {
3362     __ Bind(&done);
3363   }
3364 }
3365 
VisitReachabilityFence(HInvoke * invoke)3366 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
3367   LocationSummary* locations =
3368       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3369   locations->SetInAt(0, Location::Any());
3370 }
3371 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3372 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3373 
3374 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
3375 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
3376 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
3377 UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
3378 
3379 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
3380 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
3381 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
3382 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
3383 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
3384 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend);
3385 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
3386 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
3387 
3388 // 1.8.
3389 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
3390 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
3391 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
3392 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
3393 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
3394 
3395 UNREACHABLE_INTRINSICS(ARMVIXL)
3396 
3397 #undef __
3398 
3399 }  // namespace arm
3400 }  // namespace art
3401