• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm_vixl.h"
18 
19 #include "arch/arm/callee_save_frame_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "art_method.h"
22 #include "code_generator_arm_vixl.h"
23 #include "common_arm.h"
24 #include "heap_poisoning.h"
25 #include "intrinsics.h"
26 #include "intrinsics_utils.h"
27 #include "lock_word.h"
28 #include "mirror/array-inl.h"
29 #include "mirror/object_array-inl.h"
30 #include "mirror/reference.h"
31 #include "mirror/string-inl.h"
32 #include "scoped_thread_state_change-inl.h"
33 #include "thread-current-inl.h"
34 
35 #include "aarch32/constants-aarch32.h"
36 
37 namespace art HIDDEN {
38 namespace arm {
39 
40 #define __ assembler->GetVIXLAssembler()->
41 
42 using helpers::DRegisterFrom;
43 using helpers::HighRegisterFrom;
44 using helpers::InputDRegisterAt;
45 using helpers::InputRegisterAt;
46 using helpers::InputSRegisterAt;
47 using helpers::Int32ConstantFrom;
48 using helpers::LocationFrom;
49 using helpers::LowRegisterFrom;
50 using helpers::LowSRegisterFrom;
51 using helpers::HighSRegisterFrom;
52 using helpers::OutputDRegister;
53 using helpers::OutputRegister;
54 using helpers::RegisterFrom;
55 using helpers::SRegisterFrom;
56 
57 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
58 
59 using vixl::ExactAssemblyScope;
60 using vixl::CodeBufferCheckScope;
61 
GetAssembler()62 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
63   return codegen_->GetAssembler();
64 }
65 
GetAllocator()66 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
67   return codegen_->GetGraph()->GetAllocator();
68 }
69 
70 using IntrinsicSlowPathARMVIXL = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARMVIXL,
71                                                    SlowPathCodeARMVIXL,
72                                                    ArmVIXLAssembler>;
73 
74 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(ArmVIXLAssembler * assembler,DataType::Type type,const vixl32::Register & array,const Location & pos,const vixl32::Register & base)75 static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
76                                           DataType::Type type,
77                                           const vixl32::Register& array,
78                                           const Location& pos,
79                                           const vixl32::Register& base) {
80   // This routine is only used by the SystemArrayCopy intrinsic at the
81   // moment. We can allow DataType::Type::kReference as `type` to implement
82   // the SystemArrayCopyChar intrinsic.
83   DCHECK_EQ(type, DataType::Type::kReference);
84   const int32_t element_size = DataType::Size(type);
85   const uint32_t element_size_shift = DataType::SizeShift(type);
86   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
87 
88   if (pos.IsConstant()) {
89     int32_t constant = Int32ConstantFrom(pos);
90     __ Add(base, array, element_size * constant + data_offset);
91   } else {
92     __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
93     __ Add(base, base, data_offset);
94   }
95 }
96 
97 // Compute end address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(ArmVIXLAssembler * assembler,DataType::Type type,const Location & copy_length,const vixl32::Register & base,const vixl32::Register & end)98 static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
99                                          DataType::Type type,
100                                          const Location& copy_length,
101                                          const vixl32::Register& base,
102                                          const vixl32::Register& end) {
103   // This routine is only used by the SystemArrayCopy intrinsic at the
104   // moment. We can allow DataType::Type::kReference as `type` to implement
105   // the SystemArrayCopyChar intrinsic.
106   DCHECK_EQ(type, DataType::Type::kReference);
107   const int32_t element_size = DataType::Size(type);
108   const uint32_t element_size_shift = DataType::SizeShift(type);
109 
110   if (copy_length.IsConstant()) {
111     int32_t constant = Int32ConstantFrom(copy_length);
112     __ Add(end, base, element_size * constant);
113   } else {
114     __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
115   }
116 }
117 
118 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
119 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
120  public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)121   explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
122       : SlowPathCodeARMVIXL(instruction) {
123     DCHECK(gUseReadBarrier);
124     DCHECK(kUseBakerReadBarrier);
125   }
126 
EmitNativeCode(CodeGenerator * codegen)127   void EmitNativeCode(CodeGenerator* codegen) override {
128     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
129     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
130     LocationSummary* locations = instruction_->GetLocations();
131     DCHECK(locations->CanCall());
132     DCHECK(instruction_->IsInvokeStaticOrDirect())
133         << "Unexpected instruction in read barrier arraycopy slow path: "
134         << instruction_->DebugName();
135     DCHECK(instruction_->GetLocations()->Intrinsified());
136     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
137 
138     DataType::Type type = DataType::Type::kReference;
139     const int32_t element_size = DataType::Size(type);
140 
141     vixl32::Register dest = InputRegisterAt(instruction_, 2);
142     Location dest_pos = locations->InAt(3);
143     vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
144     vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
145     vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
146     vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
147 
148     __ Bind(GetEntryLabel());
149     // Compute the base destination address in `dst_curr_addr`.
150     GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
151 
152     vixl32::Label loop;
153     __ Bind(&loop);
154     __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
155     assembler->MaybeUnpoisonHeapReference(tmp);
156     // TODO: Inline the mark bit check before calling the runtime?
157     // tmp = ReadBarrier::Mark(tmp);
158     // No need to save live registers; it's taken care of by the
159     // entrypoint. Also, there is no need to update the stack mask,
160     // as this runtime call will not trigger a garbage collection.
161     // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
162     // explanations.)
163     DCHECK(!tmp.IsSP());
164     DCHECK(!tmp.IsLR());
165     DCHECK(!tmp.IsPC());
166     // IP is used internally by the ReadBarrierMarkRegX entry point
167     // as a temporary (and not preserved).  It thus cannot be used by
168     // any live register in this slow path.
169     DCHECK(!src_curr_addr.Is(ip));
170     DCHECK(!dst_curr_addr.Is(ip));
171     DCHECK(!src_stop_addr.Is(ip));
172     DCHECK(!tmp.Is(ip));
173     DCHECK(tmp.IsRegister()) << tmp;
174     // TODO: Load the entrypoint once before the loop, instead of
175     // loading it at every iteration.
176     int32_t entry_point_offset =
177         Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
178     // This runtime call does not require a stack map.
179     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
180     assembler->MaybePoisonHeapReference(tmp);
181     __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
182     __ Cmp(src_curr_addr, src_stop_addr);
183     __ B(ne, &loop, /* is_far_target= */ false);
184     __ B(GetExitLabel());
185   }
186 
GetDescription() const187   const char* GetDescription() const override {
188     return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
189   }
190 
191  private:
192   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
193 };
194 
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)195 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
196     : allocator_(codegen->GetGraph()->GetAllocator()),
197       codegen_(codegen),
198       assembler_(codegen->GetAssembler()),
199       features_(codegen->GetInstructionSetFeatures()) {}
200 
TryDispatch(HInvoke * invoke)201 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
202   Dispatch(invoke);
203   LocationSummary* res = invoke->GetLocations();
204   if (res == nullptr) {
205     return false;
206   }
207   return res->Intrinsified();
208 }
209 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)210 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
211   LocationSummary* locations =
212       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
213   locations->SetInAt(0, Location::RequiresFpuRegister());
214   locations->SetOut(Location::RequiresRegister());
215 }
216 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)217 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
218   LocationSummary* locations =
219       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
220   locations->SetInAt(0, Location::RequiresRegister());
221   locations->SetOut(Location::RequiresFpuRegister());
222 }
223 
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)224 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
225   Location input = locations->InAt(0);
226   Location output = locations->Out();
227   if (is64bit) {
228     __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
229   } else {
230     __ Vmov(RegisterFrom(output), SRegisterFrom(input));
231   }
232 }
233 
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)234 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
235   Location input = locations->InAt(0);
236   Location output = locations->Out();
237   if (is64bit) {
238     __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
239   } else {
240     __ Vmov(SRegisterFrom(output), RegisterFrom(input));
241   }
242 }
243 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)244 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
245   CreateFPToIntLocations(allocator_, invoke);
246 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)247 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
248   CreateIntToFPLocations(allocator_, invoke);
249 }
250 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)251 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
252   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
253 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)254 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
255   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
256 }
257 
VisitFloatFloatToRawIntBits(HInvoke * invoke)258 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
259   CreateFPToIntLocations(allocator_, invoke);
260 }
VisitFloatIntBitsToFloat(HInvoke * invoke)261 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
262   CreateIntToFPLocations(allocator_, invoke);
263 }
264 
VisitFloatFloatToRawIntBits(HInvoke * invoke)265 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
266   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
267 }
VisitFloatIntBitsToFloat(HInvoke * invoke)268 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
269   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
270 }
271 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)272 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
273   LocationSummary* locations =
274       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
275   locations->SetInAt(0, Location::RequiresRegister());
276   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
277 }
278 
CreateIntIntToIntSlowPathCallLocations(ArenaAllocator * allocator,HInvoke * invoke)279 static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
280   LocationSummary* locations =
281       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
282   locations->SetInAt(0, Location::RequiresRegister());
283   locations->SetInAt(1, Location::RequiresRegister());
284   // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
285   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
286 }
287 
CreateLongToLongLocationsWithOverlap(ArenaAllocator * allocator,HInvoke * invoke)288 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) {
289   LocationSummary* locations =
290       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
291   locations->SetInAt(0, Location::RequiresRegister());
292   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
293 }
294 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)295 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
296   LocationSummary* locations =
297       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
298   locations->SetInAt(0, Location::RequiresFpuRegister());
299   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
300 }
301 
GenNumberOfLeadingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)302 static void GenNumberOfLeadingZeros(HInvoke* invoke,
303                                     DataType::Type type,
304                                     CodeGeneratorARMVIXL* codegen) {
305   ArmVIXLAssembler* assembler = codegen->GetAssembler();
306   LocationSummary* locations = invoke->GetLocations();
307   Location in = locations->InAt(0);
308   vixl32::Register out = RegisterFrom(locations->Out());
309 
310   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
311 
312   if (type == DataType::Type::kInt64) {
313     vixl32::Register in_reg_lo = LowRegisterFrom(in);
314     vixl32::Register in_reg_hi = HighRegisterFrom(in);
315     vixl32::Label end;
316     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
317     __ Clz(out, in_reg_hi);
318     __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false);
319     __ Clz(out, in_reg_lo);
320     __ Add(out, out, 32);
321     if (end.IsReferenced()) {
322       __ Bind(&end);
323     }
324   } else {
325     __ Clz(out, RegisterFrom(in));
326   }
327 }
328 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)329 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
330   CreateIntToIntLocations(allocator_, invoke);
331 }
332 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)333 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
334   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
335 }
336 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)337 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
338   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
339 }
340 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)341 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
342   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
343 }
344 
GenNumberOfTrailingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)345 static void GenNumberOfTrailingZeros(HInvoke* invoke,
346                                      DataType::Type type,
347                                      CodeGeneratorARMVIXL* codegen) {
348   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
349 
350   ArmVIXLAssembler* assembler = codegen->GetAssembler();
351   LocationSummary* locations = invoke->GetLocations();
352   vixl32::Register out = RegisterFrom(locations->Out());
353 
354   if (type == DataType::Type::kInt64) {
355     vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
356     vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
357     vixl32::Label end;
358     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
359     __ Rbit(out, in_reg_lo);
360     __ Clz(out, out);
361     __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false);
362     __ Rbit(out, in_reg_hi);
363     __ Clz(out, out);
364     __ Add(out, out, 32);
365     if (end.IsReferenced()) {
366       __ Bind(&end);
367     }
368   } else {
369     vixl32::Register in = RegisterFrom(locations->InAt(0));
370     __ Rbit(out, in);
371     __ Clz(out, out);
372   }
373 }
374 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)375 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
376   CreateIntToIntLocations(allocator_, invoke);
377 }
378 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)379 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
380   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
381 }
382 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)383 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
384   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
385 }
386 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)387 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
388   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
389 }
390 
VisitMathSqrt(HInvoke * invoke)391 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
392   CreateFPToFPLocations(allocator_, invoke);
393 }
394 
VisitMathSqrt(HInvoke * invoke)395 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
396   ArmVIXLAssembler* assembler = GetAssembler();
397   __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
398 }
399 
VisitMathRint(HInvoke * invoke)400 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
401   if (features_.HasARMv8AInstructions()) {
402     CreateFPToFPLocations(allocator_, invoke);
403   }
404 }
405 
VisitMathRint(HInvoke * invoke)406 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
407   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
408   ArmVIXLAssembler* assembler = GetAssembler();
409   __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
410 }
411 
VisitMathRoundFloat(HInvoke * invoke)412 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
413   if (features_.HasARMv8AInstructions()) {
414     LocationSummary* locations =
415         new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
416     locations->SetInAt(0, Location::RequiresFpuRegister());
417     locations->SetOut(Location::RequiresRegister());
418     locations->AddTemp(Location::RequiresFpuRegister());
419   }
420 }
421 
VisitMathRoundFloat(HInvoke * invoke)422 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
423   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
424 
425   ArmVIXLAssembler* assembler = GetAssembler();
426   vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
427   vixl32::Register out_reg = OutputRegister(invoke);
428   vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
429   vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
430   vixl32::Label done;
431   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
432 
433   // Round to nearest integer, ties away from zero.
434   __ Vcvta(S32, F32, temp1, in_reg);
435   __ Vmov(out_reg, temp1);
436 
437   // For positive, zero or NaN inputs, rounding is done.
438   __ Cmp(out_reg, 0);
439   __ B(ge, final_label, /* is_far_target= */ false);
440 
441   // Handle input < 0 cases.
442   // If input is negative but not a tie, previous result (round to nearest) is valid.
443   // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
444   __ Vrinta(F32, temp1, in_reg);
445   __ Vmov(temp2, 0.5);
446   __ Vsub(F32, temp1, in_reg, temp1);
447   __ Vcmp(F32, temp1, temp2);
448   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
449   {
450     // Use ExactAssemblyScope here because we are using IT.
451     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
452                                 2 * kMaxInstructionSizeInBytes,
453                                 CodeBufferCheckScope::kMaximumSize);
454     __ it(eq);
455     __ add(eq, out_reg, out_reg, 1);
456   }
457 
458   if (done.IsReferenced()) {
459     __ Bind(&done);
460   }
461 }
462 
VisitMemoryPeekByte(HInvoke * invoke)463 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
464   CreateIntToIntLocations(allocator_, invoke);
465 }
466 
VisitMemoryPeekByte(HInvoke * invoke)467 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
468   ArmVIXLAssembler* assembler = GetAssembler();
469   // Ignore upper 4B of long address.
470   __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
471 }
472 
VisitMemoryPeekIntNative(HInvoke * invoke)473 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
474   CreateIntToIntLocations(allocator_, invoke);
475 }
476 
VisitMemoryPeekIntNative(HInvoke * invoke)477 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
478   ArmVIXLAssembler* assembler = GetAssembler();
479   // Ignore upper 4B of long address.
480   __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
481 }
482 
VisitMemoryPeekLongNative(HInvoke * invoke)483 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
484   CreateIntToIntLocations(allocator_, invoke);
485 }
486 
VisitMemoryPeekLongNative(HInvoke * invoke)487 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
488   ArmVIXLAssembler* assembler = GetAssembler();
489   // Ignore upper 4B of long address.
490   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
491   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
492   // exception. So we can't use ldrd as addr may be unaligned.
493   vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
494   vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
495   if (addr.Is(lo)) {
496     __ Ldr(hi, MemOperand(addr, 4));
497     __ Ldr(lo, MemOperand(addr));
498   } else {
499     __ Ldr(lo, MemOperand(addr));
500     __ Ldr(hi, MemOperand(addr, 4));
501   }
502 }
503 
VisitMemoryPeekShortNative(HInvoke * invoke)504 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
505   CreateIntToIntLocations(allocator_, invoke);
506 }
507 
VisitMemoryPeekShortNative(HInvoke * invoke)508 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
509   ArmVIXLAssembler* assembler = GetAssembler();
510   // Ignore upper 4B of long address.
511   __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
512 }
513 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)514 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
515   LocationSummary* locations =
516       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
517   locations->SetInAt(0, Location::RequiresRegister());
518   locations->SetInAt(1, Location::RequiresRegister());
519 }
520 
VisitMemoryPokeByte(HInvoke * invoke)521 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
522   CreateIntIntToVoidLocations(allocator_, invoke);
523 }
524 
VisitMemoryPokeByte(HInvoke * invoke)525 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
526   ArmVIXLAssembler* assembler = GetAssembler();
527   __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
528 }
529 
VisitMemoryPokeIntNative(HInvoke * invoke)530 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
531   CreateIntIntToVoidLocations(allocator_, invoke);
532 }
533 
VisitMemoryPokeIntNative(HInvoke * invoke)534 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
535   ArmVIXLAssembler* assembler = GetAssembler();
536   __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
537 }
538 
VisitMemoryPokeLongNative(HInvoke * invoke)539 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
540   CreateIntIntToVoidLocations(allocator_, invoke);
541 }
542 
VisitMemoryPokeLongNative(HInvoke * invoke)543 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
544   ArmVIXLAssembler* assembler = GetAssembler();
545   // Ignore upper 4B of long address.
546   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
547   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
548   // exception. So we can't use ldrd as addr may be unaligned.
549   __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
550   __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
551 }
552 
VisitMemoryPokeShortNative(HInvoke * invoke)553 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
554   CreateIntIntToVoidLocations(allocator_, invoke);
555 }
556 
VisitMemoryPokeShortNative(HInvoke * invoke)557 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
558   ArmVIXLAssembler* assembler = GetAssembler();
559   __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
560 }
561 
VisitThreadCurrentThread(HInvoke * invoke)562 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
563   LocationSummary* locations =
564       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
565   locations->SetOut(Location::RequiresRegister());
566 }
567 
VisitThreadCurrentThread(HInvoke * invoke)568 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
569   ArmVIXLAssembler* assembler = GetAssembler();
570   __ Ldr(OutputRegister(invoke),
571          MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
572 }
573 
VisitStringCompareTo(HInvoke * invoke)574 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
575   // The inputs plus one temp.
576   LocationSummary* locations =
577       new (allocator_) LocationSummary(invoke,
578                                        invoke->InputAt(1)->CanBeNull()
579                                            ? LocationSummary::kCallOnSlowPath
580                                            : LocationSummary::kNoCall,
581                                        kIntrinsified);
582   locations->SetInAt(0, Location::RequiresRegister());
583   locations->SetInAt(1, Location::RequiresRegister());
584   locations->AddTemp(Location::RequiresRegister());
585   locations->AddTemp(Location::RequiresRegister());
586   locations->AddTemp(Location::RequiresRegister());
587   // Need temporary registers for String compression's feature.
588   if (mirror::kUseStringCompression) {
589     locations->AddTemp(Location::RequiresRegister());
590   }
591   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
592 }
593 
594 // Forward declaration.
595 //
596 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated
597 // by the compiler for every C++ function, and if this function gets inlined in
598 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a
599 // build failure. That is the reason why NO_INLINE attribute is used.
600 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
601                                                   HInvoke* invoke,
602                                                   vixl32::Label* end,
603                                                   vixl32::Label* different_compression);
604 
VisitStringCompareTo(HInvoke * invoke)605 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
606   ArmVIXLAssembler* assembler = GetAssembler();
607   LocationSummary* locations = invoke->GetLocations();
608 
609   const vixl32::Register str = InputRegisterAt(invoke, 0);
610   const vixl32::Register arg = InputRegisterAt(invoke, 1);
611   const vixl32::Register out = OutputRegister(invoke);
612 
613   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
614   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
615   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
616   vixl32::Register temp3;
617   if (mirror::kUseStringCompression) {
618     temp3 = RegisterFrom(locations->GetTemp(3));
619   }
620 
621   vixl32::Label end;
622   vixl32::Label different_compression;
623 
624   // Get offsets of count and value fields within a string object.
625   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
626 
627   // Note that the null check must have been done earlier.
628   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
629 
630   // Take slow path and throw if input can be and is null.
631   SlowPathCodeARMVIXL* slow_path = nullptr;
632   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
633   if (can_slow_path) {
634     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
635     codegen_->AddSlowPath(slow_path);
636     __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
637   }
638 
639   // Reference equality check, return 0 if same reference.
640   __ Subs(out, str, arg);
641   __ B(eq, &end);
642 
643   if (mirror::kUseStringCompression) {
644     // Load `count` fields of this and argument strings.
645     __ Ldr(temp3, MemOperand(str, count_offset));
646     __ Ldr(temp2, MemOperand(arg, count_offset));
647     // Extract lengths from the `count` fields.
648     __ Lsr(temp0, temp3, 1u);
649     __ Lsr(temp1, temp2, 1u);
650   } else {
651     // Load lengths of this and argument strings.
652     __ Ldr(temp0, MemOperand(str, count_offset));
653     __ Ldr(temp1, MemOperand(arg, count_offset));
654   }
655   // out = length diff.
656   __ Subs(out, temp0, temp1);
657   // temp0 = min(len(str), len(arg)).
658 
659   {
660     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
661                            2 * kMaxInstructionSizeInBytes,
662                            CodeBufferCheckScope::kMaximumSize);
663 
664     __ it(gt);
665     __ mov(gt, temp0, temp1);
666   }
667 
668   // Shorter string is empty?
669   // Note that mirror::kUseStringCompression==true introduces lots of instructions,
670   // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
671   __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
672 
673   if (mirror::kUseStringCompression) {
674     // Check if both strings using same compression style to use this comparison loop.
675     __ Eors(temp2, temp2, temp3);
676     __ Lsrs(temp2, temp2, 1u);
677     __ B(cs, &different_compression);
678     // For string compression, calculate the number of bytes to compare (not chars).
679     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
680     __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
681 
682     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
683                            2 * kMaxInstructionSizeInBytes,
684                            CodeBufferCheckScope::kMaximumSize);
685 
686     __ it(ne);
687     __ add(ne, temp0, temp0, temp0);
688   }
689 
690 
691   GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression);
692 
693   __ Bind(&end);
694 
695   if (can_slow_path) {
696     __ Bind(slow_path->GetExitLabel());
697   }
698 }
699 
GenerateStringCompareToLoop(ArmVIXLAssembler * assembler,HInvoke * invoke,vixl32::Label * end,vixl32::Label * different_compression)700 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
701                                         HInvoke* invoke,
702                                         vixl32::Label* end,
703                                         vixl32::Label* different_compression) {
704   LocationSummary* locations = invoke->GetLocations();
705 
706   const vixl32::Register str = InputRegisterAt(invoke, 0);
707   const vixl32::Register arg = InputRegisterAt(invoke, 1);
708   const vixl32::Register out = OutputRegister(invoke);
709 
710   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
711   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
712   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
713   vixl32::Register temp3;
714   if (mirror::kUseStringCompression) {
715     temp3 = RegisterFrom(locations->GetTemp(3));
716   }
717 
718   vixl32::Label loop;
719   vixl32::Label find_char_diff;
720 
721   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
722   // Store offset of string value in preparation for comparison loop.
723   __ Mov(temp1, value_offset);
724 
725   // Assertions that must hold in order to compare multiple characters at a time.
726   CHECK_ALIGNED(value_offset, 8);
727   static_assert(IsAligned<8>(kObjectAlignment),
728                 "String data must be 8-byte aligned for unrolled CompareTo loop.");
729 
730   const unsigned char_size = DataType::Size(DataType::Type::kUint16);
731   DCHECK_EQ(char_size, 2u);
732 
733   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
734 
735   vixl32::Label find_char_diff_2nd_cmp;
736   // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
737   __ Bind(&loop);
738   vixl32::Register temp_reg = temps.Acquire();
739   __ Ldr(temp_reg, MemOperand(str, temp1));
740   __ Ldr(temp2, MemOperand(arg, temp1));
741   __ Cmp(temp_reg, temp2);
742   __ B(ne, &find_char_diff, /* is_far_target= */ false);
743   __ Add(temp1, temp1, char_size * 2);
744 
745   __ Ldr(temp_reg, MemOperand(str, temp1));
746   __ Ldr(temp2, MemOperand(arg, temp1));
747   __ Cmp(temp_reg, temp2);
748   __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false);
749   __ Add(temp1, temp1, char_size * 2);
750   // With string compression, we have compared 8 bytes, otherwise 4 chars.
751   __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
752   __ B(hi, &loop, /* is_far_target= */ false);
753   __ B(end);
754 
755   __ Bind(&find_char_diff_2nd_cmp);
756   if (mirror::kUseStringCompression) {
757     __ Subs(temp0, temp0, 4);  // 4 bytes previously compared.
758     __ B(ls, end, /* is_far_target= */ false);  // Was the second comparison fully beyond the end?
759   } else {
760     // Without string compression, we can start treating temp0 as signed
761     // and rely on the signed comparison below.
762     __ Sub(temp0, temp0, 2);
763   }
764 
765   // Find the single character difference.
766   __ Bind(&find_char_diff);
767   // Get the bit position of the first character that differs.
768   __ Eor(temp1, temp2, temp_reg);
769   __ Rbit(temp1, temp1);
770   __ Clz(temp1, temp1);
771 
772   // temp0 = number of characters remaining to compare.
773   // (Without string compression, it could be < 1 if a difference is found by the second CMP
774   // in the comparison loop, and after the end of the shorter string data).
775 
776   // Without string compression (temp1 >> 4) = character where difference occurs between the last
777   // two words compared, in the interval [0,1].
778   // (0 for low half-word different, 1 for high half-word different).
779   // With string compression, (temp1 << 3) = byte where the difference occurs,
780   // in the interval [0,3].
781 
782   // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
783   // the remaining string data, so just return length diff (out).
784   // The comparison is unsigned for string compression, otherwise signed.
785   __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
786   __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false);
787 
788   // Extract the characters and calculate the difference.
789   if (mirror::kUseStringCompression) {
790     // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
791     // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
792     // The compression flag is now in the highest bit of temp3, so let's play some tricks.
793     __ Orr(temp3, temp3, 0xffu << 23);                  // uncompressed ? 0xff800000u : 0x7ff80000u
794     __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
795     __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
796     __ Lsr(temp2, temp2, temp1);                        // Extract second character.
797     __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
798     __ Lsr(out, temp_reg, temp1);                       // Extract first character.
799     __ And(temp2, temp2, temp3);
800     __ And(out, out, temp3);
801   } else {
802     __ Bic(temp1, temp1, 0xf);
803     __ Lsr(temp2, temp2, temp1);
804     __ Lsr(out, temp_reg, temp1);
805     __ Movt(temp2, 0);
806     __ Movt(out, 0);
807   }
808 
809   __ Sub(out, out, temp2);
810   temps.Release(temp_reg);
811 
812   if (mirror::kUseStringCompression) {
813     __ B(end);
814     __ Bind(different_compression);
815 
816     // Comparison for different compression style.
817     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
818     DCHECK_EQ(c_char_size, 1u);
819 
820     // We want to free up the temp3, currently holding `str.count`, for comparison.
821     // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
822     // need to treat as unsigned. Start by freeing the bit with an ADD and continue
823     // further down by a LSRS+SBC which will flip the meaning of the flag but allow
824     // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
825     __ Add(temp0, temp0, temp0);              // Unlike LSL, this ADD is always 16-bit.
826     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
827     __ Mov(temp1, str);
828     __ Mov(temp2, arg);
829     __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
830     {
831       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
832                              3 * kMaxInstructionSizeInBytes,
833                              CodeBufferCheckScope::kMaximumSize);
834       __ itt(cs);                             // Interleave with selection of temp1 and temp2.
835       __ mov(cs, temp1, arg);                 // Preserves flags.
836       __ mov(cs, temp2, str);                 // Preserves flags.
837     }
838     __ Sbc(temp0, temp0, 0);                  // Complete the move of the compression flag.
839 
840     // Adjust temp1 and temp2 from string pointers to data pointers.
841     __ Add(temp1, temp1, value_offset);
842     __ Add(temp2, temp2, value_offset);
843 
844     vixl32::Label different_compression_loop;
845     vixl32::Label different_compression_diff;
846 
847     // Main loop for different compression.
848     temp_reg = temps.Acquire();
849     __ Bind(&different_compression_loop);
850     __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
851     __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
852     __ Cmp(temp_reg, temp3);
853     __ B(ne, &different_compression_diff, /* is_far_target= */ false);
854     __ Subs(temp0, temp0, 2);
855     __ B(hi, &different_compression_loop, /* is_far_target= */ false);
856     __ B(end);
857 
858     // Calculate the difference.
859     __ Bind(&different_compression_diff);
860     __ Sub(out, temp_reg, temp3);
861     temps.Release(temp_reg);
862     // Flip the difference if the `arg` is compressed.
863     // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
864     __ Lsrs(temp0, temp0, 1u);
865     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
866                   "Expecting 0=compressed, 1=uncompressed");
867 
868     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
869                            2 * kMaxInstructionSizeInBytes,
870                            CodeBufferCheckScope::kMaximumSize);
871     __ it(cc);
872     __ rsb(cc, out, out, 0);
873   }
874 }
875 
876 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
877 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
878 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
879 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
880 // Allow up to 12 instructions (32 bytes) for the unrolled loop.
881 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
882 
GetConstString(HInstruction * candidate,uint32_t * utf16_length)883 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
884   if (candidate->IsLoadString()) {
885     HLoadString* load_string = candidate->AsLoadString();
886     const DexFile& dex_file = load_string->GetDexFile();
887     return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
888   }
889   return nullptr;
890 }
891 
VisitStringEquals(HInvoke * invoke)892 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
893   LocationSummary* locations =
894       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
895   InvokeRuntimeCallingConventionARMVIXL calling_convention;
896   locations->SetInAt(0, Location::RequiresRegister());
897   locations->SetInAt(1, Location::RequiresRegister());
898 
899   // Temporary registers to store lengths of strings and for calculations.
900   // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
901   locations->AddTemp(LocationFrom(r0));
902 
903   // For the generic implementation and for long const strings we need an extra temporary.
904   // We do not need it for short const strings, up to 4 bytes, see code generation below.
905   uint32_t const_string_length = 0u;
906   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
907   if (const_string == nullptr) {
908     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
909   }
910   bool is_compressed =
911       mirror::kUseStringCompression &&
912       const_string != nullptr &&
913       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
914   if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
915     locations->AddTemp(Location::RequiresRegister());
916   }
917 
918   // TODO: If the String.equals() is used only for an immediately following HIf, we can
919   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
920   // Then we shall need an extra temporary register instead of the output register.
921   locations->SetOut(Location::RequiresRegister());
922 }
923 
VisitStringEquals(HInvoke * invoke)924 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
925   ArmVIXLAssembler* assembler = GetAssembler();
926   LocationSummary* locations = invoke->GetLocations();
927 
928   vixl32::Register str = InputRegisterAt(invoke, 0);
929   vixl32::Register arg = InputRegisterAt(invoke, 1);
930   vixl32::Register out = OutputRegister(invoke);
931 
932   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
933 
934   vixl32::Label loop;
935   vixl32::Label end;
936   vixl32::Label return_true;
937   vixl32::Label return_false;
938   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
939 
940   // Get offsets of count, value, and class fields within a string object.
941   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
942   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
943   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
944 
945   // Note that the null check must have been done earlier.
946   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
947 
948   StringEqualsOptimizations optimizations(invoke);
949   if (!optimizations.GetArgumentNotNull()) {
950     // Check if input is null, return false if it is.
951     __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false);
952   }
953 
954   // Reference equality check, return true if same reference.
955   __ Cmp(str, arg);
956   __ B(eq, &return_true, /* is_far_target= */ false);
957 
958   if (!optimizations.GetArgumentIsString()) {
959     // Instanceof check for the argument by comparing class fields.
960     // All string objects must have the same type since String cannot be subclassed.
961     // Receiver must be a string object, so its class field is equal to all strings' class fields.
962     // If the argument is a string object, its class field must be equal to receiver's class field.
963     //
964     // As the String class is expected to be non-movable, we can read the class
965     // field from String.equals' arguments without read barriers.
966     AssertNonMovableStringClass();
967     // /* HeapReference<Class> */ temp = str->klass_
968     __ Ldr(temp, MemOperand(str, class_offset));
969     // /* HeapReference<Class> */ out = arg->klass_
970     __ Ldr(out, MemOperand(arg, class_offset));
971     // Also, because we use the previously loaded class references only in the
972     // following comparison, we don't need to unpoison them.
973     __ Cmp(temp, out);
974     __ B(ne, &return_false, /* is_far_target= */ false);
975   }
976 
977   // Check if one of the inputs is a const string. Do not special-case both strings
978   // being const, such cases should be handled by constant folding if needed.
979   uint32_t const_string_length = 0u;
980   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
981   if (const_string == nullptr) {
982     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
983     if (const_string != nullptr) {
984       std::swap(str, arg);  // Make sure the const string is in `str`.
985     }
986   }
987   bool is_compressed =
988       mirror::kUseStringCompression &&
989       const_string != nullptr &&
990       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
991 
992   if (const_string != nullptr) {
993     // Load `count` field of the argument string and check if it matches the const string.
994     // Also compares the compression style, if differs return false.
995     __ Ldr(temp, MemOperand(arg, count_offset));
996     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
997     __ B(ne, &return_false, /* is_far_target= */ false);
998   } else {
999     // Load `count` fields of this and argument strings.
1000     __ Ldr(temp, MemOperand(str, count_offset));
1001     __ Ldr(out, MemOperand(arg, count_offset));
1002     // Check if `count` fields are equal, return false if they're not.
1003     // Also compares the compression style, if differs return false.
1004     __ Cmp(temp, out);
1005     __ B(ne, &return_false, /* is_far_target= */ false);
1006   }
1007 
1008   // Assertions that must hold in order to compare strings 4 bytes at a time.
1009   // Ok to do this because strings are zero-padded to kObjectAlignment.
1010   DCHECK_ALIGNED(value_offset, 4);
1011   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1012 
1013   if (const_string != nullptr &&
1014       const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1015                                             : kShortConstStringEqualsCutoffInBytes / 2u)) {
1016     // Load and compare the contents. Though we know the contents of the short const string
1017     // at compile time, materializing constants may be more code than loading from memory.
1018     int32_t offset = value_offset;
1019     size_t remaining_bytes =
1020         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
1021     while (remaining_bytes > sizeof(uint32_t)) {
1022       vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1023       UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1024       vixl32::Register temp2 = scratch_scope.Acquire();
1025       __ Ldrd(temp, temp1, MemOperand(str, offset));
1026       __ Ldrd(temp2, out, MemOperand(arg, offset));
1027       __ Cmp(temp, temp2);
1028       __ B(ne, &return_false, /* is_far_target= */ false);
1029       __ Cmp(temp1, out);
1030       __ B(ne, &return_false, /* is_far_target= */ false);
1031       offset += 2u * sizeof(uint32_t);
1032       remaining_bytes -= 2u * sizeof(uint32_t);
1033     }
1034     if (remaining_bytes != 0u) {
1035       __ Ldr(temp, MemOperand(str, offset));
1036       __ Ldr(out, MemOperand(arg, offset));
1037       __ Cmp(temp, out);
1038       __ B(ne, &return_false, /* is_far_target= */ false);
1039     }
1040   } else {
1041     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1042     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1043                   "Expecting 0=compressed, 1=uncompressed");
1044     __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false);
1045 
1046     if (mirror::kUseStringCompression) {
1047       // For string compression, calculate the number of bytes to compare (not chars).
1048       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1049       __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
1050       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1051                              2 * kMaxInstructionSizeInBytes,
1052                              CodeBufferCheckScope::kMaximumSize);
1053       __ it(cs);                                      // If uncompressed,
1054       __ add(cs, temp, temp, temp);                   //   double the byte count.
1055     }
1056 
1057     vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1058     UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1059     vixl32::Register temp2 = scratch_scope.Acquire();
1060 
1061     // Store offset of string value in preparation for comparison loop.
1062     __ Mov(temp1, value_offset);
1063 
1064     // Loop to compare strings 4 bytes at a time starting at the front of the string.
1065     __ Bind(&loop);
1066     __ Ldr(out, MemOperand(str, temp1));
1067     __ Ldr(temp2, MemOperand(arg, temp1));
1068     __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1069     __ Cmp(out, temp2);
1070     __ B(ne, &return_false, /* is_far_target= */ false);
1071     // With string compression, we have compared 4 bytes, otherwise 2 chars.
1072     __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1073     __ B(hi, &loop, /* is_far_target= */ false);
1074   }
1075 
1076   // Return true and exit the function.
1077   // If loop does not result in returning false, we return true.
1078   __ Bind(&return_true);
1079   __ Mov(out, 1);
1080   __ B(final_label);
1081 
1082   // Return false and exit the function.
1083   __ Bind(&return_false);
1084   __ Mov(out, 0);
1085 
1086   if (end.IsReferenced()) {
1087     __ Bind(&end);
1088   }
1089 }
1090 
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,bool start_at_zero)1091 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1092                                        ArmVIXLAssembler* assembler,
1093                                        CodeGeneratorARMVIXL* codegen,
1094                                        bool start_at_zero) {
1095   LocationSummary* locations = invoke->GetLocations();
1096 
1097   // Note that the null check must have been done earlier.
1098   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1099 
1100   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1101   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1102   SlowPathCodeARMVIXL* slow_path = nullptr;
1103   HInstruction* code_point = invoke->InputAt(1);
1104   if (code_point->IsIntConstant()) {
1105     if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1106         std::numeric_limits<uint16_t>::max()) {
1107       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1108       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1109       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1110       codegen->AddSlowPath(slow_path);
1111       __ B(slow_path->GetEntryLabel());
1112       __ Bind(slow_path->GetExitLabel());
1113       return;
1114     }
1115   } else if (code_point->GetType() != DataType::Type::kUint16) {
1116     vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1117     // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1118     __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1119     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1120     codegen->AddSlowPath(slow_path);
1121     __ B(hs, slow_path->GetEntryLabel());
1122   }
1123 
1124   if (start_at_zero) {
1125     vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1126     DCHECK(tmp_reg.Is(r2));
1127     // Start-index = 0.
1128     __ Mov(tmp_reg, 0);
1129   }
1130 
1131   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1132   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1133 
1134   if (slow_path != nullptr) {
1135     __ Bind(slow_path->GetExitLabel());
1136   }
1137 }
1138 
VisitStringIndexOf(HInvoke * invoke)1139 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1140   LocationSummary* locations = new (allocator_) LocationSummary(
1141       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1142   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1143   // best to align the inputs accordingly.
1144   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1145   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1146   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1147   locations->SetOut(LocationFrom(r0));
1148 
1149   // Need to send start-index=0.
1150   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1151 }
1152 
VisitStringIndexOf(HInvoke * invoke)1153 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1154   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1155 }
1156 
VisitStringIndexOfAfter(HInvoke * invoke)1157 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1158   LocationSummary* locations = new (allocator_) LocationSummary(
1159       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1160   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1161   // best to align the inputs accordingly.
1162   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1163   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1164   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1165   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1166   locations->SetOut(LocationFrom(r0));
1167 }
1168 
VisitStringIndexOfAfter(HInvoke * invoke)1169 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1170   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1171 }
1172 
VisitStringNewStringFromBytes(HInvoke * invoke)1173 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1174   LocationSummary* locations = new (allocator_) LocationSummary(
1175       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1176   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1177   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1178   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1179   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1180   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1181   locations->SetOut(LocationFrom(r0));
1182 }
1183 
VisitStringNewStringFromBytes(HInvoke * invoke)1184 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1185   ArmVIXLAssembler* assembler = GetAssembler();
1186   vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1187   __ Cmp(byte_array, 0);
1188   SlowPathCodeARMVIXL* slow_path =
1189       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1190   codegen_->AddSlowPath(slow_path);
1191   __ B(eq, slow_path->GetEntryLabel());
1192 
1193   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1194   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1195   __ Bind(slow_path->GetExitLabel());
1196 }
1197 
VisitStringNewStringFromChars(HInvoke * invoke)1198 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1199   LocationSummary* locations =
1200       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1201   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1202   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1203   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1204   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1205   locations->SetOut(LocationFrom(r0));
1206 }
1207 
VisitStringNewStringFromChars(HInvoke * invoke)1208 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1209   // No need to emit code checking whether `locations->InAt(2)` is a null
1210   // pointer, as callers of the native method
1211   //
1212   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1213   //
1214   // all include a null check on `data` before calling that method.
1215   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1216   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1217 }
1218 
VisitStringNewStringFromString(HInvoke * invoke)1219 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1220   LocationSummary* locations = new (allocator_) LocationSummary(
1221       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1222   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1223   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1224   locations->SetOut(LocationFrom(r0));
1225 }
1226 
VisitStringNewStringFromString(HInvoke * invoke)1227 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1228   ArmVIXLAssembler* assembler = GetAssembler();
1229   vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1230   __ Cmp(string_to_copy, 0);
1231   SlowPathCodeARMVIXL* slow_path =
1232       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1233   codegen_->AddSlowPath(slow_path);
1234   __ B(eq, slow_path->GetEntryLabel());
1235 
1236   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1237   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1238 
1239   __ Bind(slow_path->GetExitLabel());
1240 }
1241 
VisitSystemArrayCopy(HInvoke * invoke)1242 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1243   // The only read barrier implementation supporting the
1244   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1245   if (gUseReadBarrier && !kUseBakerReadBarrier) {
1246     return;
1247   }
1248 
1249   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1250   LocationSummary* locations = invoke->GetLocations();
1251   if (locations == nullptr) {
1252     return;
1253   }
1254 
1255   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1256   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1257   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1258 
1259   if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1260     locations->SetInAt(1, Location::RequiresRegister());
1261   }
1262   if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1263     locations->SetInAt(3, Location::RequiresRegister());
1264   }
1265   if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1266     locations->SetInAt(4, Location::RequiresRegister());
1267   }
1268   if (gUseReadBarrier && kUseBakerReadBarrier) {
1269     // Temporary register IP cannot be used in
1270     // ReadBarrierSystemArrayCopySlowPathARM (because that register
1271     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1272     // temporary register from the register allocator.
1273     locations->AddTemp(Location::RequiresRegister());
1274   }
1275 }
1276 
CheckPosition(ArmVIXLAssembler * assembler,Location pos,vixl32::Register input,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_input_length=false)1277 static void CheckPosition(ArmVIXLAssembler* assembler,
1278                           Location pos,
1279                           vixl32::Register input,
1280                           Location length,
1281                           SlowPathCodeARMVIXL* slow_path,
1282                           vixl32::Register temp,
1283                           bool length_is_input_length = false) {
1284   // Where is the length in the Array?
1285   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1286 
1287   if (pos.IsConstant()) {
1288     int32_t pos_const = Int32ConstantFrom(pos);
1289     if (pos_const == 0) {
1290       if (!length_is_input_length) {
1291         // Check that length(input) >= length.
1292         __ Ldr(temp, MemOperand(input, length_offset));
1293         if (length.IsConstant()) {
1294           __ Cmp(temp, Int32ConstantFrom(length));
1295         } else {
1296           __ Cmp(temp, RegisterFrom(length));
1297         }
1298         __ B(lt, slow_path->GetEntryLabel());
1299       }
1300     } else {
1301       // Check that length(input) >= pos.
1302       __ Ldr(temp, MemOperand(input, length_offset));
1303       __ Subs(temp, temp, pos_const);
1304       __ B(lt, slow_path->GetEntryLabel());
1305 
1306       // Check that (length(input) - pos) >= length.
1307       if (length.IsConstant()) {
1308         __ Cmp(temp, Int32ConstantFrom(length));
1309       } else {
1310         __ Cmp(temp, RegisterFrom(length));
1311       }
1312       __ B(lt, slow_path->GetEntryLabel());
1313     }
1314   } else if (length_is_input_length) {
1315     // The only way the copy can succeed is if pos is zero.
1316     vixl32::Register pos_reg = RegisterFrom(pos);
1317     __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
1318   } else {
1319     // Check that pos >= 0.
1320     vixl32::Register pos_reg = RegisterFrom(pos);
1321     __ Cmp(pos_reg, 0);
1322     __ B(lt, slow_path->GetEntryLabel());
1323 
1324     // Check that pos <= length(input).
1325     __ Ldr(temp, MemOperand(input, length_offset));
1326     __ Subs(temp, temp, pos_reg);
1327     __ B(lt, slow_path->GetEntryLabel());
1328 
1329     // Check that (length(input) - pos) >= length.
1330     if (length.IsConstant()) {
1331       __ Cmp(temp, Int32ConstantFrom(length));
1332     } else {
1333       __ Cmp(temp, RegisterFrom(length));
1334     }
1335     __ B(lt, slow_path->GetEntryLabel());
1336   }
1337 }
1338 
VisitSystemArrayCopy(HInvoke * invoke)1339 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1340   // The only read barrier implementation supporting the
1341   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1342   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
1343 
1344   ArmVIXLAssembler* assembler = GetAssembler();
1345   LocationSummary* locations = invoke->GetLocations();
1346 
1347   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1348   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1349   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1350   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1351   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1352 
1353   vixl32::Register src = InputRegisterAt(invoke, 0);
1354   Location src_pos = locations->InAt(1);
1355   vixl32::Register dest = InputRegisterAt(invoke, 2);
1356   Location dest_pos = locations->InAt(3);
1357   Location length = locations->InAt(4);
1358   Location temp1_loc = locations->GetTemp(0);
1359   vixl32::Register temp1 = RegisterFrom(temp1_loc);
1360   Location temp2_loc = locations->GetTemp(1);
1361   vixl32::Register temp2 = RegisterFrom(temp2_loc);
1362   Location temp3_loc = locations->GetTemp(2);
1363   vixl32::Register temp3 = RegisterFrom(temp3_loc);
1364 
1365   SlowPathCodeARMVIXL* intrinsic_slow_path =
1366       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1367   codegen_->AddSlowPath(intrinsic_slow_path);
1368 
1369   vixl32::Label conditions_on_positions_validated;
1370   SystemArrayCopyOptimizations optimizations(invoke);
1371 
1372   // If source and destination are the same, we go to slow path if we need to do
1373   // forward copying.
1374   if (src_pos.IsConstant()) {
1375     int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1376     if (dest_pos.IsConstant()) {
1377       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1378       if (optimizations.GetDestinationIsSource()) {
1379         // Checked when building locations.
1380         DCHECK_GE(src_pos_constant, dest_pos_constant);
1381       } else if (src_pos_constant < dest_pos_constant) {
1382         __ Cmp(src, dest);
1383         __ B(eq, intrinsic_slow_path->GetEntryLabel());
1384       }
1385 
1386       // Checked when building locations.
1387       DCHECK(!optimizations.GetDestinationIsSource()
1388              || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
1389     } else {
1390       if (!optimizations.GetDestinationIsSource()) {
1391         __ Cmp(src, dest);
1392         __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1393       }
1394       __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1395       __ B(gt, intrinsic_slow_path->GetEntryLabel());
1396     }
1397   } else {
1398     if (!optimizations.GetDestinationIsSource()) {
1399       __ Cmp(src, dest);
1400       __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1401     }
1402     if (dest_pos.IsConstant()) {
1403       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1404       __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
1405     } else {
1406       __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
1407     }
1408     __ B(lt, intrinsic_slow_path->GetEntryLabel());
1409   }
1410 
1411   __ Bind(&conditions_on_positions_validated);
1412 
1413   if (!optimizations.GetSourceIsNotNull()) {
1414     // Bail out if the source is null.
1415     __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
1416   }
1417 
1418   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1419     // Bail out if the destination is null.
1420     __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
1421   }
1422 
1423   // If the length is negative, bail out.
1424   // We have already checked in the LocationsBuilder for the constant case.
1425   if (!length.IsConstant() &&
1426       !optimizations.GetCountIsSourceLength() &&
1427       !optimizations.GetCountIsDestinationLength()) {
1428     __ Cmp(RegisterFrom(length), 0);
1429     __ B(lt, intrinsic_slow_path->GetEntryLabel());
1430   }
1431 
1432   // Validity checks: source.
1433   CheckPosition(assembler,
1434                 src_pos,
1435                 src,
1436                 length,
1437                 intrinsic_slow_path,
1438                 temp1,
1439                 optimizations.GetCountIsSourceLength());
1440 
1441   // Validity checks: dest.
1442   CheckPosition(assembler,
1443                 dest_pos,
1444                 dest,
1445                 length,
1446                 intrinsic_slow_path,
1447                 temp1,
1448                 optimizations.GetCountIsDestinationLength());
1449 
1450   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1451     // Check whether all elements of the source array are assignable to the component
1452     // type of the destination array. We do two checks: the classes are the same,
1453     // or the destination is Object[]. If none of these checks succeed, we go to the
1454     // slow path.
1455 
1456     if (gUseReadBarrier && kUseBakerReadBarrier) {
1457       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1458         // /* HeapReference<Class> */ temp1 = src->klass_
1459         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1460             invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
1461         // Bail out if the source is not a non primitive array.
1462         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1463         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1464             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
1465         __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
1466         // If heap poisoning is enabled, `temp1` has been unpoisoned
1467         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1468         // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
1469         __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
1470         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1471         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1472       }
1473 
1474       // /* HeapReference<Class> */ temp1 = dest->klass_
1475       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1476           invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false);
1477 
1478       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1479         // Bail out if the destination is not a non primitive array.
1480         //
1481         // Register `temp1` is not trashed by the read barrier emitted
1482         // by GenerateFieldLoadWithBakerReadBarrier below, as that
1483         // method produces a call to a ReadBarrierMarkRegX entry point,
1484         // which saves all potentially live registers, including
1485         // temporaries such a `temp1`.
1486         // /* HeapReference<Class> */ temp2 = temp1->component_type_
1487         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1488             invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check= */ false);
1489         __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
1490         // If heap poisoning is enabled, `temp2` has been unpoisoned
1491         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1492         // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
1493         __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
1494         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1495         __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
1496       }
1497 
1498       // For the same reason given earlier, `temp1` is not trashed by the
1499       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1500       // /* HeapReference<Class> */ temp2 = src->klass_
1501       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1502           invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false);
1503       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
1504       __ Cmp(temp1, temp2);
1505 
1506       if (optimizations.GetDestinationIsTypedObjectArray()) {
1507         vixl32::Label do_copy;
1508         __ B(eq, &do_copy, /* is_far_target= */ false);
1509         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1510         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1511             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
1512         // /* HeapReference<Class> */ temp1 = temp1->super_class_
1513         // We do not need to emit a read barrier for the following
1514         // heap reference load, as `temp1` is only used in a
1515         // comparison with null below, and this reference is not
1516         // kept afterwards.
1517         __ Ldr(temp1, MemOperand(temp1, super_offset));
1518         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1519         __ Bind(&do_copy);
1520       } else {
1521         __ B(ne, intrinsic_slow_path->GetEntryLabel());
1522       }
1523     } else {
1524       // Non read barrier code.
1525 
1526       // /* HeapReference<Class> */ temp1 = dest->klass_
1527       __ Ldr(temp1, MemOperand(dest, class_offset));
1528       // /* HeapReference<Class> */ temp2 = src->klass_
1529       __ Ldr(temp2, MemOperand(src, class_offset));
1530       bool did_unpoison = false;
1531       if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1532           !optimizations.GetSourceIsNonPrimitiveArray()) {
1533         // One or two of the references need to be unpoisoned. Unpoison them
1534         // both to make the identity check valid.
1535         assembler->MaybeUnpoisonHeapReference(temp1);
1536         assembler->MaybeUnpoisonHeapReference(temp2);
1537         did_unpoison = true;
1538       }
1539 
1540       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1541         // Bail out if the destination is not a non primitive array.
1542         // /* HeapReference<Class> */ temp3 = temp1->component_type_
1543         __ Ldr(temp3, MemOperand(temp1, component_offset));
1544         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1545         assembler->MaybeUnpoisonHeapReference(temp3);
1546         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1547         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1548         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1549         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1550       }
1551 
1552       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1553         // Bail out if the source is not a non primitive array.
1554         // /* HeapReference<Class> */ temp3 = temp2->component_type_
1555         __ Ldr(temp3, MemOperand(temp2, component_offset));
1556         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1557         assembler->MaybeUnpoisonHeapReference(temp3);
1558         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1559         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1560         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1561         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1562       }
1563 
1564       __ Cmp(temp1, temp2);
1565 
1566       if (optimizations.GetDestinationIsTypedObjectArray()) {
1567         vixl32::Label do_copy;
1568         __ B(eq, &do_copy, /* is_far_target= */ false);
1569         if (!did_unpoison) {
1570           assembler->MaybeUnpoisonHeapReference(temp1);
1571         }
1572         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1573         __ Ldr(temp1, MemOperand(temp1, component_offset));
1574         assembler->MaybeUnpoisonHeapReference(temp1);
1575         // /* HeapReference<Class> */ temp1 = temp1->super_class_
1576         __ Ldr(temp1, MemOperand(temp1, super_offset));
1577         // No need to unpoison the result, we're comparing against null.
1578         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1579         __ Bind(&do_copy);
1580       } else {
1581         __ B(ne, intrinsic_slow_path->GetEntryLabel());
1582       }
1583     }
1584   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1585     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1586     // Bail out if the source is not a non primitive array.
1587     if (gUseReadBarrier && kUseBakerReadBarrier) {
1588       // /* HeapReference<Class> */ temp1 = src->klass_
1589       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1590           invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
1591       // /* HeapReference<Class> */ temp3 = temp1->component_type_
1592       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1593           invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
1594       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1595       // If heap poisoning is enabled, `temp3` has been unpoisoned
1596       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1597     } else {
1598       // /* HeapReference<Class> */ temp1 = src->klass_
1599       __ Ldr(temp1, MemOperand(src, class_offset));
1600       assembler->MaybeUnpoisonHeapReference(temp1);
1601       // /* HeapReference<Class> */ temp3 = temp1->component_type_
1602       __ Ldr(temp3, MemOperand(temp1, component_offset));
1603       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
1604       assembler->MaybeUnpoisonHeapReference(temp3);
1605     }
1606     // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1607     __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1608     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1609     __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
1610   }
1611 
1612   if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
1613     // Null constant length: not need to emit the loop code at all.
1614   } else {
1615     vixl32::Label done;
1616     const DataType::Type type = DataType::Type::kReference;
1617     const int32_t element_size = DataType::Size(type);
1618 
1619     if (length.IsRegister()) {
1620       // Don't enter the copy loop if the length is null.
1621       __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false);
1622     }
1623 
1624     if (gUseReadBarrier && kUseBakerReadBarrier) {
1625       // TODO: Also convert this intrinsic to the IsGcMarking strategy?
1626 
1627       // SystemArrayCopy implementation for Baker read barriers (see
1628       // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
1629       //
1630       //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1631       //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1632       //   bool is_gray = (rb_state == ReadBarrier::GrayState());
1633       //   if (is_gray) {
1634       //     // Slow-path copy.
1635       //     do {
1636       //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1637       //     } while (src_ptr != end_ptr)
1638       //   } else {
1639       //     // Fast-path copy.
1640       //     do {
1641       //       *dest_ptr++ = *src_ptr++;
1642       //     } while (src_ptr != end_ptr)
1643       //   }
1644 
1645       // /* int32_t */ monitor = src->monitor_
1646       __ Ldr(temp2, MemOperand(src, monitor_offset));
1647       // /* LockWord */ lock_word = LockWord(monitor)
1648       static_assert(sizeof(LockWord) == sizeof(int32_t),
1649                     "art::LockWord and int32_t have different sizes.");
1650 
1651       // Introduce a dependency on the lock_word including the rb_state,
1652       // which shall prevent load-load reordering without using
1653       // a memory barrier (which would be more expensive).
1654       // `src` is unchanged by this operation, but its value now depends
1655       // on `temp2`.
1656       __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
1657 
1658       // Compute the base source address in `temp1`.
1659       // Note that `temp1` (the base source address) is computed from
1660       // `src` (and `src_pos`) here, and thus honors the artificial
1661       // dependency of `src` on `temp2`.
1662       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
1663       // Compute the end source address in `temp3`.
1664       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
1665       // The base destination address is computed later, as `temp2` is
1666       // used for intermediate computations.
1667 
1668       // Slow path used to copy array when `src` is gray.
1669       // Note that the base destination address is computed in `temp2`
1670       // by the slow path code.
1671       SlowPathCodeARMVIXL* read_barrier_slow_path =
1672           new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
1673       codegen_->AddSlowPath(read_barrier_slow_path);
1674 
1675       // Given the numeric representation, it's enough to check the low bit of the
1676       // rb_state. We do that by shifting the bit out of the lock word with LSRS
1677       // which can be a 16-bit instruction unlike the TST immediate.
1678       static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1679       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1680       __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
1681       // Carry flag is the last bit shifted out by LSRS.
1682       __ B(cs, read_barrier_slow_path->GetEntryLabel());
1683 
1684       // Fast-path copy.
1685       // Compute the base destination address in `temp2`.
1686       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
1687       // Iterate over the arrays and do a raw copy of the objects. We don't need to
1688       // poison/unpoison.
1689       vixl32::Label loop;
1690       __ Bind(&loop);
1691       {
1692         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1693         const vixl32::Register temp_reg = temps.Acquire();
1694         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
1695         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
1696       }
1697       __ Cmp(temp1, temp3);
1698       __ B(ne, &loop, /* is_far_target= */ false);
1699 
1700       __ Bind(read_barrier_slow_path->GetExitLabel());
1701     } else {
1702       // Non read barrier code.
1703       // Compute the base source address in `temp1`.
1704       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
1705       // Compute the base destination address in `temp2`.
1706       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
1707       // Compute the end source address in `temp3`.
1708       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
1709       // Iterate over the arrays and do a raw copy of the objects. We don't need to
1710       // poison/unpoison.
1711       vixl32::Label loop;
1712       __ Bind(&loop);
1713       {
1714         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1715         const vixl32::Register temp_reg = temps.Acquire();
1716         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
1717         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
1718       }
1719       __ Cmp(temp1, temp3);
1720       __ B(ne, &loop, /* is_far_target= */ false);
1721     }
1722     __ Bind(&done);
1723   }
1724 
1725   // We only need one card marking on the destination array.
1726   codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* emit_null_check= */ false);
1727 
1728   __ Bind(intrinsic_slow_path->GetExitLabel());
1729 }
1730 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1731 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1732   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1733   // the code generator. Furthermore, the register allocator creates fixed live intervals
1734   // for all caller-saved registers because we are doing a function call. As a result, if
1735   // the input and output locations are unallocated, the register allocator runs out of
1736   // registers and fails; however, a debuggable graph is not the common case.
1737   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1738     return;
1739   }
1740 
1741   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1742   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1743   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1744 
1745   LocationSummary* const locations =
1746       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1747   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1748 
1749   locations->SetInAt(0, Location::RequiresFpuRegister());
1750   locations->SetOut(Location::RequiresFpuRegister());
1751   // Native code uses the soft float ABI.
1752   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1753   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1754 }
1755 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1756 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1757   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
1758   // the code generator. Furthermore, the register allocator creates fixed live intervals
1759   // for all caller-saved registers because we are doing a function call. As a result, if
1760   // the input and output locations are unallocated, the register allocator runs out of
1761   // registers and fails; however, a debuggable graph is not the common case.
1762   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
1763     return;
1764   }
1765 
1766   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1767   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
1768   DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
1769   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
1770 
1771   LocationSummary* const locations =
1772       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1773   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
1774 
1775   locations->SetInAt(0, Location::RequiresFpuRegister());
1776   locations->SetInAt(1, Location::RequiresFpuRegister());
1777   locations->SetOut(Location::RequiresFpuRegister());
1778   // Native code uses the soft float ABI.
1779   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
1780   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
1781   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1782   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
1783 }
1784 
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1785 static void GenFPToFPCall(HInvoke* invoke,
1786                           ArmVIXLAssembler* assembler,
1787                           CodeGeneratorARMVIXL* codegen,
1788                           QuickEntrypointEnum entry) {
1789   LocationSummary* const locations = invoke->GetLocations();
1790 
1791   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1792   DCHECK(locations->WillCall() && locations->Intrinsified());
1793 
1794   // Native code uses the soft float ABI.
1795   __ Vmov(RegisterFrom(locations->GetTemp(0)),
1796           RegisterFrom(locations->GetTemp(1)),
1797           InputDRegisterAt(invoke, 0));
1798   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1799   __ Vmov(OutputDRegister(invoke),
1800           RegisterFrom(locations->GetTemp(0)),
1801           RegisterFrom(locations->GetTemp(1)));
1802 }
1803 
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)1804 static void GenFPFPToFPCall(HInvoke* invoke,
1805                             ArmVIXLAssembler* assembler,
1806                             CodeGeneratorARMVIXL* codegen,
1807                             QuickEntrypointEnum entry) {
1808   LocationSummary* const locations = invoke->GetLocations();
1809 
1810   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1811   DCHECK(locations->WillCall() && locations->Intrinsified());
1812 
1813   // Native code uses the soft float ABI.
1814   __ Vmov(RegisterFrom(locations->GetTemp(0)),
1815           RegisterFrom(locations->GetTemp(1)),
1816           InputDRegisterAt(invoke, 0));
1817   __ Vmov(RegisterFrom(locations->GetTemp(2)),
1818           RegisterFrom(locations->GetTemp(3)),
1819           InputDRegisterAt(invoke, 1));
1820   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1821   __ Vmov(OutputDRegister(invoke),
1822           RegisterFrom(locations->GetTemp(0)),
1823           RegisterFrom(locations->GetTemp(1)));
1824 }
1825 
VisitMathCos(HInvoke * invoke)1826 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
1827   CreateFPToFPCallLocations(allocator_, invoke);
1828 }
1829 
VisitMathCos(HInvoke * invoke)1830 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
1831   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
1832 }
1833 
VisitMathSin(HInvoke * invoke)1834 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
1835   CreateFPToFPCallLocations(allocator_, invoke);
1836 }
1837 
VisitMathSin(HInvoke * invoke)1838 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
1839   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
1840 }
1841 
VisitMathAcos(HInvoke * invoke)1842 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
1843   CreateFPToFPCallLocations(allocator_, invoke);
1844 }
1845 
VisitMathAcos(HInvoke * invoke)1846 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
1847   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
1848 }
1849 
VisitMathAsin(HInvoke * invoke)1850 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
1851   CreateFPToFPCallLocations(allocator_, invoke);
1852 }
1853 
VisitMathAsin(HInvoke * invoke)1854 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
1855   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
1856 }
1857 
VisitMathAtan(HInvoke * invoke)1858 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
1859   CreateFPToFPCallLocations(allocator_, invoke);
1860 }
1861 
VisitMathAtan(HInvoke * invoke)1862 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
1863   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
1864 }
1865 
VisitMathCbrt(HInvoke * invoke)1866 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1867   CreateFPToFPCallLocations(allocator_, invoke);
1868 }
1869 
VisitMathCbrt(HInvoke * invoke)1870 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
1871   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
1872 }
1873 
VisitMathCosh(HInvoke * invoke)1874 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
1875   CreateFPToFPCallLocations(allocator_, invoke);
1876 }
1877 
VisitMathCosh(HInvoke * invoke)1878 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
1879   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
1880 }
1881 
VisitMathExp(HInvoke * invoke)1882 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
1883   CreateFPToFPCallLocations(allocator_, invoke);
1884 }
1885 
VisitMathExp(HInvoke * invoke)1886 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
1887   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
1888 }
1889 
VisitMathExpm1(HInvoke * invoke)1890 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1891   CreateFPToFPCallLocations(allocator_, invoke);
1892 }
1893 
VisitMathExpm1(HInvoke * invoke)1894 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
1895   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
1896 }
1897 
VisitMathLog(HInvoke * invoke)1898 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
1899   CreateFPToFPCallLocations(allocator_, invoke);
1900 }
1901 
VisitMathLog(HInvoke * invoke)1902 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
1903   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
1904 }
1905 
VisitMathLog10(HInvoke * invoke)1906 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
1907   CreateFPToFPCallLocations(allocator_, invoke);
1908 }
1909 
VisitMathLog10(HInvoke * invoke)1910 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
1911   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
1912 }
1913 
VisitMathSinh(HInvoke * invoke)1914 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
1915   CreateFPToFPCallLocations(allocator_, invoke);
1916 }
1917 
VisitMathSinh(HInvoke * invoke)1918 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
1919   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
1920 }
1921 
VisitMathTan(HInvoke * invoke)1922 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
1923   CreateFPToFPCallLocations(allocator_, invoke);
1924 }
1925 
VisitMathTan(HInvoke * invoke)1926 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
1927   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
1928 }
1929 
VisitMathTanh(HInvoke * invoke)1930 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
1931   CreateFPToFPCallLocations(allocator_, invoke);
1932 }
1933 
VisitMathTanh(HInvoke * invoke)1934 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
1935   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
1936 }
1937 
VisitMathAtan2(HInvoke * invoke)1938 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1939   CreateFPFPToFPCallLocations(allocator_, invoke);
1940 }
1941 
VisitMathAtan2(HInvoke * invoke)1942 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
1943   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
1944 }
1945 
VisitMathPow(HInvoke * invoke)1946 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
1947   CreateFPFPToFPCallLocations(allocator_, invoke);
1948 }
1949 
VisitMathPow(HInvoke * invoke)1950 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
1951   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
1952 }
1953 
VisitMathHypot(HInvoke * invoke)1954 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
1955   CreateFPFPToFPCallLocations(allocator_, invoke);
1956 }
1957 
VisitMathHypot(HInvoke * invoke)1958 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
1959   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
1960 }
1961 
VisitMathNextAfter(HInvoke * invoke)1962 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1963   CreateFPFPToFPCallLocations(allocator_, invoke);
1964 }
1965 
VisitMathNextAfter(HInvoke * invoke)1966 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
1967   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
1968 }
1969 
VisitIntegerReverse(HInvoke * invoke)1970 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1971   CreateIntToIntLocations(allocator_, invoke);
1972 }
1973 
VisitIntegerReverse(HInvoke * invoke)1974 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
1975   ArmVIXLAssembler* assembler = GetAssembler();
1976   __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
1977 }
1978 
VisitLongReverse(HInvoke * invoke)1979 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
1980   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
1981 }
1982 
VisitLongReverse(HInvoke * invoke)1983 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
1984   ArmVIXLAssembler* assembler = GetAssembler();
1985   LocationSummary* locations = invoke->GetLocations();
1986 
1987   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
1988   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
1989   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
1990   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
1991 
1992   __ Rbit(out_reg_lo, in_reg_hi);
1993   __ Rbit(out_reg_hi, in_reg_lo);
1994 }
1995 
GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler * assembler,Location pair)1996 static void GenerateReverseBytesInPlaceForEachWord(ArmVIXLAssembler* assembler, Location pair) {
1997   DCHECK(pair.IsRegisterPair());
1998   __ Rev(LowRegisterFrom(pair), LowRegisterFrom(pair));
1999   __ Rev(HighRegisterFrom(pair), HighRegisterFrom(pair));
2000 }
2001 
GenerateReverseBytes(ArmVIXLAssembler * assembler,DataType::Type type,Location in,Location out)2002 static void GenerateReverseBytes(ArmVIXLAssembler* assembler,
2003                                  DataType::Type type,
2004                                  Location in,
2005                                  Location out) {
2006   switch (type) {
2007     case DataType::Type::kUint16:
2008       __ Rev16(RegisterFrom(out), RegisterFrom(in));
2009       break;
2010     case DataType::Type::kInt16:
2011       __ Revsh(RegisterFrom(out), RegisterFrom(in));
2012       break;
2013     case DataType::Type::kInt32:
2014       __ Rev(RegisterFrom(out), RegisterFrom(in));
2015       break;
2016     case DataType::Type::kInt64:
2017       DCHECK(!LowRegisterFrom(out).Is(LowRegisterFrom(in)));
2018       __ Rev(LowRegisterFrom(out), HighRegisterFrom(in));
2019       __ Rev(HighRegisterFrom(out), LowRegisterFrom(in));
2020       break;
2021     case DataType::Type::kFloat32:
2022       __ Rev(RegisterFrom(in), RegisterFrom(in));  // Note: Clobbers `in`.
2023       __ Vmov(SRegisterFrom(out), RegisterFrom(in));
2024       break;
2025     case DataType::Type::kFloat64:
2026       GenerateReverseBytesInPlaceForEachWord(assembler, in);  // Note: Clobbers `in`.
2027       __ Vmov(DRegisterFrom(out), HighRegisterFrom(in), LowRegisterFrom(in));  // Swap high/low.
2028       break;
2029     default:
2030       LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
2031       UNREACHABLE();
2032   }
2033 }
2034 
VisitIntegerReverseBytes(HInvoke * invoke)2035 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2036   CreateIntToIntLocations(allocator_, invoke);
2037 }
2038 
VisitIntegerReverseBytes(HInvoke * invoke)2039 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2040   ArmVIXLAssembler* assembler = GetAssembler();
2041   LocationSummary* locations = invoke->GetLocations();
2042   GenerateReverseBytes(assembler, DataType::Type::kInt32, locations->InAt(0), locations->Out());
2043 }
2044 
VisitLongReverseBytes(HInvoke * invoke)2045 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2046   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2047 }
2048 
VisitLongReverseBytes(HInvoke * invoke)2049 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2050   ArmVIXLAssembler* assembler = GetAssembler();
2051   LocationSummary* locations = invoke->GetLocations();
2052   GenerateReverseBytes(assembler, DataType::Type::kInt64, locations->InAt(0), locations->Out());
2053 }
2054 
VisitShortReverseBytes(HInvoke * invoke)2055 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2056   CreateIntToIntLocations(allocator_, invoke);
2057 }
2058 
VisitShortReverseBytes(HInvoke * invoke)2059 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2060   ArmVIXLAssembler* assembler = GetAssembler();
2061   LocationSummary* locations = invoke->GetLocations();
2062   GenerateReverseBytes(assembler, DataType::Type::kInt16, locations->InAt(0), locations->Out());
2063 }
2064 
GenBitCount(HInvoke * instr,DataType::Type type,ArmVIXLAssembler * assembler)2065 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
2066   DCHECK(DataType::IsIntOrLongType(type)) << type;
2067   DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
2068   DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
2069 
2070   bool is_long = type == DataType::Type::kInt64;
2071   LocationSummary* locations = instr->GetLocations();
2072   Location in = locations->InAt(0);
2073   vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2074   vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2075   vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2076   vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2077   vixl32::Register  out_r = OutputRegister(instr);
2078 
2079   // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2080   // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2081   // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2082   // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2083   __ Vmov(tmp_d, src_1, src_0);     // Temp DReg |--src_1|--src_0|
2084   __ Vcnt(Untyped8, tmp_d, tmp_d);  // Temp DReg |c|c|c|c|c|c|c|c|
2085   __ Vpaddl(U8, tmp_d, tmp_d);      // Temp DReg |--c|--c|--c|--c|
2086   __ Vpaddl(U16, tmp_d, tmp_d);     // Temp DReg |------c|------c|
2087   if (is_long) {
2088     __ Vpaddl(U32, tmp_d, tmp_d);   // Temp DReg |--------------c|
2089   }
2090   __ Vmov(out_r, tmp_s);
2091 }
2092 
VisitIntegerBitCount(HInvoke * invoke)2093 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2094   CreateIntToIntLocations(allocator_, invoke);
2095   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2096 }
2097 
VisitIntegerBitCount(HInvoke * invoke)2098 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2099   GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
2100 }
2101 
VisitLongBitCount(HInvoke * invoke)2102 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2103   VisitIntegerBitCount(invoke);
2104 }
2105 
VisitLongBitCount(HInvoke * invoke)2106 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2107   GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
2108 }
2109 
GenHighestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2110 static void GenHighestOneBit(HInvoke* invoke,
2111                              DataType::Type type,
2112                              CodeGeneratorARMVIXL* codegen) {
2113   DCHECK(DataType::IsIntOrLongType(type));
2114 
2115   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2116   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2117   const vixl32::Register temp = temps.Acquire();
2118 
2119   if (type == DataType::Type::kInt64) {
2120     LocationSummary* locations = invoke->GetLocations();
2121     Location in = locations->InAt(0);
2122     Location out = locations->Out();
2123 
2124     vixl32::Register in_reg_lo = LowRegisterFrom(in);
2125     vixl32::Register in_reg_hi = HighRegisterFrom(in);
2126     vixl32::Register out_reg_lo = LowRegisterFrom(out);
2127     vixl32::Register out_reg_hi = HighRegisterFrom(out);
2128 
2129     __ Mov(temp, 0x80000000);  // Modified immediate.
2130     __ Clz(out_reg_lo, in_reg_lo);
2131     __ Clz(out_reg_hi, in_reg_hi);
2132     __ Lsr(out_reg_lo, temp, out_reg_lo);
2133     __ Lsrs(out_reg_hi, temp, out_reg_hi);
2134 
2135     // Discard result for lowest 32 bits if highest 32 bits are not zero.
2136     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2137     // we check that the output is in a low register, so that a 16-bit MOV
2138     // encoding can be used. If output is in a high register, then we generate
2139     // 4 more bytes of code to avoid a branch.
2140     Operand mov_src(0);
2141     if (!out_reg_lo.IsLow()) {
2142       __ Mov(LeaveFlags, temp, 0);
2143       mov_src = Operand(temp);
2144     }
2145     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2146                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2147                                   CodeBufferCheckScope::kExactSize);
2148     __ it(ne);
2149     __ mov(ne, out_reg_lo, mov_src);
2150   } else {
2151     vixl32::Register out = OutputRegister(invoke);
2152     vixl32::Register in = InputRegisterAt(invoke, 0);
2153 
2154     __ Mov(temp, 0x80000000);  // Modified immediate.
2155     __ Clz(out, in);
2156     __ Lsr(out, temp, out);
2157   }
2158 }
2159 
VisitIntegerHighestOneBit(HInvoke * invoke)2160 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2161   CreateIntToIntLocations(allocator_, invoke);
2162 }
2163 
VisitIntegerHighestOneBit(HInvoke * invoke)2164 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2165   GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
2166 }
2167 
VisitLongHighestOneBit(HInvoke * invoke)2168 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2169   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2170 }
2171 
VisitLongHighestOneBit(HInvoke * invoke)2172 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2173   GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
2174 }
2175 
GenLowestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2176 static void GenLowestOneBit(HInvoke* invoke,
2177                             DataType::Type type,
2178                             CodeGeneratorARMVIXL* codegen) {
2179   DCHECK(DataType::IsIntOrLongType(type));
2180 
2181   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2182   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2183   const vixl32::Register temp = temps.Acquire();
2184 
2185   if (type == DataType::Type::kInt64) {
2186     LocationSummary* locations = invoke->GetLocations();
2187     Location in = locations->InAt(0);
2188     Location out = locations->Out();
2189 
2190     vixl32::Register in_reg_lo = LowRegisterFrom(in);
2191     vixl32::Register in_reg_hi = HighRegisterFrom(in);
2192     vixl32::Register out_reg_lo = LowRegisterFrom(out);
2193     vixl32::Register out_reg_hi = HighRegisterFrom(out);
2194 
2195     __ Rsb(out_reg_hi, in_reg_hi, 0);
2196     __ Rsb(out_reg_lo, in_reg_lo, 0);
2197     __ And(out_reg_hi, out_reg_hi, in_reg_hi);
2198     // The result of this operation is 0 iff in_reg_lo is 0
2199     __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
2200 
2201     // Discard result for highest 32 bits if lowest 32 bits are not zero.
2202     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2203     // we check that the output is in a low register, so that a 16-bit MOV
2204     // encoding can be used. If output is in a high register, then we generate
2205     // 4 more bytes of code to avoid a branch.
2206     Operand mov_src(0);
2207     if (!out_reg_lo.IsLow()) {
2208       __ Mov(LeaveFlags, temp, 0);
2209       mov_src = Operand(temp);
2210     }
2211     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2212                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2213                                   CodeBufferCheckScope::kExactSize);
2214     __ it(ne);
2215     __ mov(ne, out_reg_hi, mov_src);
2216   } else {
2217     vixl32::Register out = OutputRegister(invoke);
2218     vixl32::Register in = InputRegisterAt(invoke, 0);
2219 
2220     __ Rsb(temp, in, 0);
2221     __ And(out, temp, in);
2222   }
2223 }
2224 
VisitIntegerLowestOneBit(HInvoke * invoke)2225 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2226   CreateIntToIntLocations(allocator_, invoke);
2227 }
2228 
VisitIntegerLowestOneBit(HInvoke * invoke)2229 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2230   GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
2231 }
2232 
VisitLongLowestOneBit(HInvoke * invoke)2233 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2234   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2235 }
2236 
VisitLongLowestOneBit(HInvoke * invoke)2237 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2238   GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
2239 }
2240 
VisitStringGetCharsNoCheck(HInvoke * invoke)2241 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2242   LocationSummary* locations =
2243       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2244   locations->SetInAt(0, Location::RequiresRegister());
2245   locations->SetInAt(1, Location::RequiresRegister());
2246   locations->SetInAt(2, Location::RequiresRegister());
2247   locations->SetInAt(3, Location::RequiresRegister());
2248   locations->SetInAt(4, Location::RequiresRegister());
2249 
2250   // Temporary registers to store lengths of strings and for calculations.
2251   locations->AddTemp(Location::RequiresRegister());
2252   locations->AddTemp(Location::RequiresRegister());
2253   locations->AddTemp(Location::RequiresRegister());
2254 }
2255 
VisitStringGetCharsNoCheck(HInvoke * invoke)2256 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2257   ArmVIXLAssembler* assembler = GetAssembler();
2258   LocationSummary* locations = invoke->GetLocations();
2259 
2260   // Check assumption that sizeof(Char) is 2 (used in scaling below).
2261   const size_t char_size = DataType::Size(DataType::Type::kUint16);
2262   DCHECK_EQ(char_size, 2u);
2263 
2264   // Location of data in char array buffer.
2265   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2266 
2267   // Location of char array data in string.
2268   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2269 
2270   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2271   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2272   vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2273   vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2274   vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2275   vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2276   vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2277 
2278   vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2279   vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2280   vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2281 
2282   vixl32::Label done, compressed_string_loop;
2283   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2284   // dst to be copied.
2285   __ Add(dst_ptr, dstObj, data_offset);
2286   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2287 
2288   __ Subs(num_chr, srcEnd, srcBegin);
2289   // Early out for valid zero-length retrievals.
2290   __ B(eq, final_label, /* is_far_target= */ false);
2291 
2292   // src range to copy.
2293   __ Add(src_ptr, srcObj, value_offset);
2294 
2295   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2296   vixl32::Register temp;
2297   vixl32::Label compressed_string_preloop;
2298   if (mirror::kUseStringCompression) {
2299     // Location of count in string.
2300     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2301     temp = temps.Acquire();
2302     // String's length.
2303     __ Ldr(temp, MemOperand(srcObj, count_offset));
2304     __ Tst(temp, 1);
2305     temps.Release(temp);
2306     __ B(eq, &compressed_string_preloop, /* is_far_target= */ false);
2307   }
2308   __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2309 
2310   // Do the copy.
2311   vixl32::Label loop, remainder;
2312 
2313   temp = temps.Acquire();
2314   // Save repairing the value of num_chr on the < 4 character path.
2315   __ Subs(temp, num_chr, 4);
2316   __ B(lt, &remainder, /* is_far_target= */ false);
2317 
2318   // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2319   __ Mov(num_chr, temp);
2320 
2321   // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2322   // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2323   // to rectify these everywhere this intrinsic applies.)
2324   __ Bind(&loop);
2325   __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2326   __ Subs(num_chr, num_chr, 4);
2327   __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2328   __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2329   __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2330   temps.Release(temp);
2331   __ B(ge, &loop, /* is_far_target= */ false);
2332 
2333   __ Adds(num_chr, num_chr, 4);
2334   __ B(eq, final_label, /* is_far_target= */ false);
2335 
2336   // Main loop for < 4 character case and remainder handling. Loads and stores one
2337   // 16-bit Java character at a time.
2338   __ Bind(&remainder);
2339   temp = temps.Acquire();
2340   __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2341   __ Subs(num_chr, num_chr, 1);
2342   __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2343   temps.Release(temp);
2344   __ B(gt, &remainder, /* is_far_target= */ false);
2345 
2346   if (mirror::kUseStringCompression) {
2347     __ B(final_label);
2348 
2349     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2350     DCHECK_EQ(c_char_size, 1u);
2351     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2352     __ Bind(&compressed_string_preloop);
2353     __ Add(src_ptr, src_ptr, srcBegin);
2354     __ Bind(&compressed_string_loop);
2355     temp = temps.Acquire();
2356     __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2357     __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2358     temps.Release(temp);
2359     __ Subs(num_chr, num_chr, 1);
2360     __ B(gt, &compressed_string_loop, /* is_far_target= */ false);
2361   }
2362 
2363   if (done.IsReferenced()) {
2364     __ Bind(&done);
2365   }
2366 }
2367 
VisitFloatIsInfinite(HInvoke * invoke)2368 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2369   CreateFPToIntLocations(allocator_, invoke);
2370 }
2371 
VisitFloatIsInfinite(HInvoke * invoke)2372 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2373   ArmVIXLAssembler* const assembler = GetAssembler();
2374   const vixl32::Register out = OutputRegister(invoke);
2375   // Shifting left by 1 bit makes the value encodable as an immediate operand;
2376   // we don't care about the sign bit anyway.
2377   constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2378 
2379   __ Vmov(out, InputSRegisterAt(invoke, 0));
2380   // We don't care about the sign bit, so shift left.
2381   __ Lsl(out, out, 1);
2382   __ Eor(out, out, infinity);
2383   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2384 }
2385 
VisitDoubleIsInfinite(HInvoke * invoke)2386 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2387   CreateFPToIntLocations(allocator_, invoke);
2388 }
2389 
VisitDoubleIsInfinite(HInvoke * invoke)2390 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2391   ArmVIXLAssembler* const assembler = GetAssembler();
2392   const vixl32::Register out = OutputRegister(invoke);
2393   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2394   const vixl32::Register temp = temps.Acquire();
2395   // The highest 32 bits of double precision positive infinity separated into
2396   // two constants encodable as immediate operands.
2397   constexpr uint32_t infinity_high  = 0x7f000000U;
2398   constexpr uint32_t infinity_high2 = 0x00f00000U;
2399 
2400   static_assert((infinity_high | infinity_high2) ==
2401                     static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2402                 "The constants do not add up to the high 32 bits of double "
2403                 "precision positive infinity.");
2404   __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2405   __ Eor(out, out, infinity_high);
2406   __ Eor(out, out, infinity_high2);
2407   // We don't care about the sign bit, so shift left.
2408   __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2409   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2410 }
2411 
VisitMathCeil(HInvoke * invoke)2412 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
2413   if (features_.HasARMv8AInstructions()) {
2414     CreateFPToFPLocations(allocator_, invoke);
2415   }
2416 }
2417 
VisitMathCeil(HInvoke * invoke)2418 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
2419   ArmVIXLAssembler* assembler = GetAssembler();
2420   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2421   __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2422 }
2423 
VisitMathFloor(HInvoke * invoke)2424 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
2425   if (features_.HasARMv8AInstructions()) {
2426     CreateFPToFPLocations(allocator_, invoke);
2427   }
2428 }
2429 
VisitMathFloor(HInvoke * invoke)2430 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
2431   ArmVIXLAssembler* assembler = GetAssembler();
2432   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2433   __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2434 }
2435 
VisitIntegerValueOf(HInvoke * invoke)2436 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2437   InvokeRuntimeCallingConventionARMVIXL calling_convention;
2438   IntrinsicVisitor::ComputeIntegerValueOfLocations(
2439       invoke,
2440       codegen_,
2441       LocationFrom(r0),
2442       LocationFrom(calling_convention.GetRegisterAt(0)));
2443 }
2444 
VisitIntegerValueOf(HInvoke * invoke)2445 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2446   IntrinsicVisitor::IntegerValueOfInfo info =
2447       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2448   LocationSummary* locations = invoke->GetLocations();
2449   ArmVIXLAssembler* const assembler = GetAssembler();
2450 
2451   vixl32::Register out = RegisterFrom(locations->Out());
2452   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2453   vixl32::Register temp = temps.Acquire();
2454   auto allocate_instance = [&]() {
2455     DCHECK(out.Is(InvokeRuntimeCallingConventionARMVIXL().GetRegisterAt(0)));
2456     codegen_->LoadIntrinsicDeclaringClass(out, invoke);
2457     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2458     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2459   };
2460   if (invoke->InputAt(0)->IsConstant()) {
2461     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2462     if (static_cast<uint32_t>(value - info.low) < info.length) {
2463       // Just embed the j.l.Integer in the code.
2464       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2465       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2466     } else {
2467       DCHECK(locations->CanCall());
2468       // Allocate and initialize a new j.l.Integer.
2469       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2470       // JIT object table.
2471       allocate_instance();
2472       __ Mov(temp, value);
2473       assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
2474       // Class pointer and `value` final field stores require a barrier before publication.
2475       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2476     }
2477   } else {
2478     DCHECK(locations->CanCall());
2479     vixl32::Register in = RegisterFrom(locations->InAt(0));
2480     // Check bounds of our cache.
2481     __ Add(out, in, -info.low);
2482     __ Cmp(out, info.length);
2483     vixl32::Label allocate, done;
2484     __ B(hs, &allocate, /* is_far_target= */ false);
2485     // If the value is within the bounds, load the j.l.Integer directly from the array.
2486     codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
2487     codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
2488     assembler->MaybeUnpoisonHeapReference(out);
2489     __ B(&done);
2490     __ Bind(&allocate);
2491     // Otherwise allocate and initialize a new j.l.Integer.
2492     allocate_instance();
2493     assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
2494     // Class pointer and `value` final field stores require a barrier before publication.
2495     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2496     __ Bind(&done);
2497   }
2498 }
2499 
VisitReferenceGetReferent(HInvoke * invoke)2500 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2501   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
2502 }
2503 
VisitReferenceGetReferent(HInvoke * invoke)2504 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2505   ArmVIXLAssembler* assembler = GetAssembler();
2506   LocationSummary* locations = invoke->GetLocations();
2507 
2508   Location obj = locations->InAt(0);
2509   Location out = locations->Out();
2510 
2511   SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2512   codegen_->AddSlowPath(slow_path);
2513 
2514   if (gUseReadBarrier) {
2515     // Check self->GetWeakRefAccessEnabled().
2516     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2517     vixl32::Register temp = temps.Acquire();
2518     __ Ldr(temp,
2519            MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArmPointerSize>().Uint32Value()));
2520     __ Cmp(temp, enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled));
2521     __ B(ne, slow_path->GetEntryLabel());
2522   }
2523 
2524   {
2525     // Load the java.lang.ref.Reference class.
2526     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2527     vixl32::Register temp = temps.Acquire();
2528     codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
2529 
2530     // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
2531     MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
2532     DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
2533     DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
2534               IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
2535     __ Ldrh(temp, MemOperand(temp, disable_intrinsic_offset.Uint32Value()));
2536     __ Cmp(temp, 0);
2537     __ B(ne, slow_path->GetEntryLabel());
2538   }
2539 
2540   // Load the value from the field.
2541   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2542   if (gUseReadBarrier && kUseBakerReadBarrier) {
2543     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2544                                                     out,
2545                                                     RegisterFrom(obj),
2546                                                     referent_offset,
2547                                                     /*maybe_temp=*/ Location::NoLocation(),
2548                                                     /*needs_null_check=*/ true);
2549     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2550   } else {
2551     {
2552       vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2553       __ Ldr(RegisterFrom(out), MemOperand(RegisterFrom(obj), referent_offset));
2554       codegen_->MaybeRecordImplicitNullCheck(invoke);
2555     }
2556     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2557     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
2558   }
2559   __ Bind(slow_path->GetExitLabel());
2560 }
2561 
VisitReferenceRefersTo(HInvoke * invoke)2562 void IntrinsicLocationsBuilderARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2563   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
2564 }
2565 
VisitReferenceRefersTo(HInvoke * invoke)2566 void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
2567   LocationSummary* locations = invoke->GetLocations();
2568   ArmVIXLAssembler* assembler = GetAssembler();
2569   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2570 
2571   vixl32::Register obj = RegisterFrom(locations->InAt(0));
2572   vixl32::Register other = RegisterFrom(locations->InAt(1));
2573   vixl32::Register out = RegisterFrom(locations->Out());
2574   vixl32::Register tmp = temps.Acquire();
2575 
2576   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2577   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2578 
2579   {
2580     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2581     // Loading scratch register always uses 32-bit encoding.
2582     vixl::ExactAssemblyScope eas(assembler->GetVIXLAssembler(),
2583                                  vixl32::k32BitT32InstructionSizeInBytes);
2584     __ ldr(tmp, MemOperand(obj, referent_offset));
2585     codegen_->MaybeRecordImplicitNullCheck(invoke);
2586   }
2587   assembler->MaybeUnpoisonHeapReference(tmp);
2588   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
2589 
2590   if (gUseReadBarrier) {
2591     DCHECK(kUseBakerReadBarrier);
2592 
2593     vixl32::Label calculate_result;
2594     __ Subs(out, tmp, other);
2595     __ B(eq, &calculate_result);  // `out` is 0 if taken.
2596 
2597     // Check if the loaded reference is null.
2598     __ Cmp(tmp, 0);
2599     __ B(eq, &calculate_result);  // `out` is not 0 if taken.
2600 
2601     // For correct memory visibility, we need a barrier before loading the lock word
2602     // but we already have the barrier emitted for volatile load above which is sufficient.
2603 
2604     // Load the lockword and check if it is a forwarding address.
2605     static_assert(LockWord::kStateShift == 30u);
2606     static_assert(LockWord::kStateForwardingAddress == 3u);
2607     __ Ldr(tmp, MemOperand(tmp, monitor_offset));
2608     __ Cmp(tmp, Operand(0xc0000000));
2609     __ B(lo, &calculate_result);   // `out` is not 0 if taken.
2610 
2611     // Extract the forwarding address and subtract from `other`.
2612     __ Sub(out, other, Operand(tmp, LSL, LockWord::kForwardingAddressShift));
2613 
2614     __ Bind(&calculate_result);
2615   } else {
2616     DCHECK(!gUseReadBarrier);
2617     __ Sub(out, tmp, other);
2618   }
2619 
2620   // Convert 0 to 1 and non-zero to 0 for the Boolean result (`out = (out == 0)`).
2621   __ Clz(out, out);
2622   __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
2623 }
2624 
VisitThreadInterrupted(HInvoke * invoke)2625 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2626   LocationSummary* locations =
2627       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2628   locations->SetOut(Location::RequiresRegister());
2629 }
2630 
VisitThreadInterrupted(HInvoke * invoke)2631 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2632   ArmVIXLAssembler* assembler = GetAssembler();
2633   vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
2634   int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
2635   __ Ldr(out, MemOperand(tr, offset));
2636   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2637   vixl32::Register temp = temps.Acquire();
2638   vixl32::Label done;
2639   vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
2640   __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
2641   __ Dmb(vixl32::ISH);
2642   __ Mov(temp, 0);
2643   assembler->StoreToOffset(kStoreWord, temp, tr, offset);
2644   __ Dmb(vixl32::ISH);
2645   if (done.IsReferenced()) {
2646     __ Bind(&done);
2647   }
2648 }
2649 
VisitReachabilityFence(HInvoke * invoke)2650 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
2651   LocationSummary* locations =
2652       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2653   locations->SetInAt(0, Location::Any());
2654 }
2655 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)2656 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
2657 
VisitIntegerDivideUnsigned(HInvoke * invoke)2658 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2659   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
2660 }
2661 
VisitIntegerDivideUnsigned(HInvoke * invoke)2662 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2663   ArmVIXLAssembler* assembler = GetAssembler();
2664   LocationSummary* locations = invoke->GetLocations();
2665   vixl32::Register dividend = RegisterFrom(locations->InAt(0));
2666   vixl32::Register divisor = RegisterFrom(locations->InAt(1));
2667   vixl32::Register out = RegisterFrom(locations->Out());
2668 
2669   // Check if divisor is zero, bail to managed implementation to handle.
2670   SlowPathCodeARMVIXL* slow_path =
2671       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2672   codegen_->AddSlowPath(slow_path);
2673   __ CompareAndBranchIfZero(divisor, slow_path->GetEntryLabel());
2674 
2675   __ Udiv(out, dividend, divisor);
2676 
2677   __ Bind(slow_path->GetExitLabel());
2678 }
2679 
Use64BitExclusiveLoadStore(bool atomic,CodeGeneratorARMVIXL * codegen)2680 static inline bool Use64BitExclusiveLoadStore(bool atomic, CodeGeneratorARMVIXL* codegen) {
2681   return atomic && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
2682 }
2683 
GenerateIntrinsicGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location out,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2684 static void GenerateIntrinsicGet(HInvoke* invoke,
2685                                  CodeGeneratorARMVIXL* codegen,
2686                                  DataType::Type type,
2687                                  std::memory_order order,
2688                                  bool atomic,
2689                                  vixl32::Register base,
2690                                  vixl32::Register offset,
2691                                  Location out,
2692                                  Location maybe_temp,
2693                                  Location maybe_temp2,
2694                                  Location maybe_temp3) {
2695   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2696   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
2697   DCHECK(acquire_barrier || order == std::memory_order_relaxed);
2698   DCHECK(atomic || order == std::memory_order_relaxed);
2699 
2700   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2701   MemOperand address(base, offset);
2702   switch (type) {
2703     case DataType::Type::kBool:
2704       __ Ldrb(RegisterFrom(out), address);
2705       break;
2706     case DataType::Type::kInt8:
2707       __ Ldrsb(RegisterFrom(out), address);
2708       break;
2709     case DataType::Type::kUint16:
2710       __ Ldrh(RegisterFrom(out), address);
2711       break;
2712     case DataType::Type::kInt16:
2713       __ Ldrsh(RegisterFrom(out), address);
2714       break;
2715     case DataType::Type::kInt32:
2716       __ Ldr(RegisterFrom(out), address);
2717       break;
2718     case DataType::Type::kInt64:
2719       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2720         vixl32::Register strexd_tmp = RegisterFrom(maybe_temp);
2721         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2722         const vixl32::Register temp_reg = temps.Acquire();
2723         __ Add(temp_reg, base, offset);
2724         vixl32::Label loop;
2725         __ Bind(&loop);
2726         __ Ldrexd(LowRegisterFrom(out), HighRegisterFrom(out), MemOperand(temp_reg));
2727         __ Strexd(strexd_tmp, LowRegisterFrom(out), HighRegisterFrom(out), MemOperand(temp_reg));
2728         __ Cmp(strexd_tmp, 0);
2729         __ B(ne, &loop);
2730       } else {
2731         __ Ldrd(LowRegisterFrom(out), HighRegisterFrom(out), address);
2732       }
2733       break;
2734     case DataType::Type::kReference:
2735       if (gUseReadBarrier && kUseBakerReadBarrier) {
2736         // Piggy-back on the field load path using introspection for the Baker read barrier.
2737         vixl32::Register temp = RegisterFrom(maybe_temp);
2738         __ Add(temp, base, offset);
2739         codegen->GenerateFieldLoadWithBakerReadBarrier(
2740             invoke, out, base, MemOperand(temp), /* needs_null_check= */ false);
2741       } else {
2742         __ Ldr(RegisterFrom(out), address);
2743       }
2744       break;
2745     case DataType::Type::kFloat32: {
2746       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2747       const vixl32::Register temp_reg = temps.Acquire();
2748       __ Add(temp_reg, base, offset);
2749       __ Vldr(SRegisterFrom(out), MemOperand(temp_reg));
2750       break;
2751     }
2752     case DataType::Type::kFloat64: {
2753       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2754       const vixl32::Register temp_reg = temps.Acquire();
2755       __ Add(temp_reg, base, offset);
2756       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
2757         vixl32::Register lo = RegisterFrom(maybe_temp);
2758         vixl32::Register hi = RegisterFrom(maybe_temp2);
2759         vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
2760         vixl32::Label loop;
2761         __ Bind(&loop);
2762         __ Ldrexd(lo, hi, MemOperand(temp_reg));
2763         __ Strexd(strexd_tmp, lo, hi, MemOperand(temp_reg));
2764         __ Cmp(strexd_tmp, 0);
2765         __ B(ne, &loop);
2766         __ Vmov(DRegisterFrom(out), lo, hi);
2767       } else {
2768         __ Vldr(DRegisterFrom(out), MemOperand(temp_reg));
2769       }
2770       break;
2771     }
2772     default:
2773       LOG(FATAL) << "Unexpected type " << type;
2774       UNREACHABLE();
2775   }
2776   if (acquire_barrier) {
2777     codegen->GenerateMemoryBarrier(
2778         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
2779   }
2780   if (type == DataType::Type::kReference && !(gUseReadBarrier && kUseBakerReadBarrier)) {
2781     Location base_loc = LocationFrom(base);
2782     Location index_loc = LocationFrom(offset);
2783     codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc);
2784   }
2785 }
2786 
UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic)2787 static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
2788   switch (intrinsic) {
2789     case Intrinsics::kUnsafeGetObject:
2790     case Intrinsics::kUnsafeGetObjectVolatile:
2791     case Intrinsics::kJdkUnsafeGetObject:
2792     case Intrinsics::kJdkUnsafeGetObjectVolatile:
2793     case Intrinsics::kJdkUnsafeGetObjectAcquire:
2794       return true;
2795     default:
2796       break;
2797   }
2798   return false;
2799 }
2800 
CreateUnsafeGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)2801 static void CreateUnsafeGetLocations(HInvoke* invoke,
2802                                      CodeGeneratorARMVIXL* codegen,
2803                                      DataType::Type type,
2804                                      bool atomic) {
2805   bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
2806   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2807   LocationSummary* locations =
2808       new (allocator) LocationSummary(invoke,
2809                                       can_call
2810                                           ? LocationSummary::kCallOnSlowPath
2811                                           : LocationSummary::kNoCall,
2812                                       kIntrinsified);
2813   if (can_call && kUseBakerReadBarrier) {
2814     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2815   }
2816   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2817   locations->SetInAt(1, Location::RequiresRegister());
2818   locations->SetInAt(2, Location::RequiresRegister());
2819   locations->SetOut(Location::RequiresRegister(),
2820                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2821   if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
2822       (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2823     // We need a temporary register for the read barrier marking slow
2824     // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier,
2825     // or the STREXD result for LDREXD/STREXD sequence when LDRD is non-atomic.
2826     locations->AddTemp(Location::RequiresRegister());
2827   }
2828 }
2829 
GenUnsafeGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic)2830 static void GenUnsafeGet(HInvoke* invoke,
2831                          CodeGeneratorARMVIXL* codegen,
2832                          DataType::Type type,
2833                          std::memory_order order,
2834                          bool atomic) {
2835   LocationSummary* locations = invoke->GetLocations();
2836   vixl32::Register base = InputRegisterAt(invoke, 1);     // Object pointer.
2837   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
2838   Location out = locations->Out();
2839   Location maybe_temp = Location::NoLocation();
2840   if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
2841       (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
2842     maybe_temp = locations->GetTemp(0);
2843   }
2844   GenerateIntrinsicGet(invoke,
2845                        codegen,
2846                        type,
2847                        order,
2848                        atomic,
2849                        base,
2850                        offset,
2851                        out,
2852                        maybe_temp,
2853                        /*maybe_temp2=*/ Location::NoLocation(),
2854                        /*maybe_temp3=*/ Location::NoLocation());
2855 }
2856 
VisitUnsafeGet(HInvoke * invoke)2857 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2858   VisitJdkUnsafeGet(invoke);
2859 }
2860 
VisitUnsafeGet(HInvoke * invoke)2861 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
2862   VisitJdkUnsafeGet(invoke);
2863 }
2864 
VisitUnsafeGetVolatile(HInvoke * invoke)2865 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2866   VisitJdkUnsafeGetVolatile(invoke);
2867 }
2868 
VisitUnsafeGetVolatile(HInvoke * invoke)2869 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
2870   VisitJdkUnsafeGetVolatile(invoke);
2871 }
2872 
VisitUnsafeGetLong(HInvoke * invoke)2873 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2874   VisitJdkUnsafeGetLong(invoke);
2875 }
2876 
VisitUnsafeGetLong(HInvoke * invoke)2877 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
2878   VisitJdkUnsafeGetLong(invoke);
2879 }
2880 
VisitUnsafeGetLongVolatile(HInvoke * invoke)2881 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2882   VisitJdkUnsafeGetLongVolatile(invoke);
2883 }
2884 
VisitUnsafeGetLongVolatile(HInvoke * invoke)2885 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2886   VisitJdkUnsafeGetLongVolatile(invoke);
2887 }
2888 
VisitUnsafeGetObject(HInvoke * invoke)2889 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2890   VisitJdkUnsafeGetObject(invoke);
2891 }
2892 
VisitUnsafeGetObject(HInvoke * invoke)2893 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
2894   VisitJdkUnsafeGetObject(invoke);
2895 }
2896 
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2897 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2898   VisitJdkUnsafeGetObjectVolatile(invoke);
2899 }
2900 
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2901 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2902   VisitJdkUnsafeGetObjectVolatile(invoke);
2903 }
2904 
VisitJdkUnsafeGet(HInvoke * invoke)2905 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGet(HInvoke* invoke) {
2906   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
2907 }
2908 
VisitJdkUnsafeGet(HInvoke * invoke)2909 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGet(HInvoke* invoke) {
2910   GenUnsafeGet(
2911       invoke, codegen_, DataType::Type::kInt32, std::memory_order_relaxed, /*atomic=*/ false);
2912 }
2913 
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2914 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2915   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
2916 }
2917 
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2918 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2919   GenUnsafeGet(
2920       invoke, codegen_, DataType::Type::kInt32, std::memory_order_seq_cst, /*atomic=*/ true);
2921 }
2922 
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2923 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2924   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
2925 }
2926 
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2927 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2928   GenUnsafeGet(
2929       invoke, codegen_, DataType::Type::kInt32, std::memory_order_acquire, /*atomic=*/ true);
2930 }
2931 
VisitJdkUnsafeGetLong(HInvoke * invoke)2932 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2933   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
2934 }
2935 
VisitJdkUnsafeGetLong(HInvoke * invoke)2936 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2937   GenUnsafeGet(
2938       invoke, codegen_, DataType::Type::kInt64, std::memory_order_relaxed, /*atomic=*/ false);
2939 }
2940 
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2941 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2942   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
2943 }
2944 
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2945 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2946   GenUnsafeGet(
2947       invoke, codegen_, DataType::Type::kInt64, std::memory_order_seq_cst, /*atomic=*/ true);
2948 }
2949 
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2950 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2951   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
2952 }
2953 
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2954 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2955   GenUnsafeGet(
2956       invoke, codegen_, DataType::Type::kInt64, std::memory_order_acquire, /*atomic=*/ true);
2957 }
2958 
VisitJdkUnsafeGetObject(HInvoke * invoke)2959 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetObject(HInvoke* invoke) {
2960   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
2961 }
2962 
VisitJdkUnsafeGetObject(HInvoke * invoke)2963 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetObject(HInvoke* invoke) {
2964   GenUnsafeGet(
2965       invoke, codegen_, DataType::Type::kReference, std::memory_order_relaxed, /*atomic=*/ false);
2966 }
2967 
VisitJdkUnsafeGetObjectVolatile(HInvoke * invoke)2968 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
2969   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
2970 }
2971 
VisitJdkUnsafeGetObjectVolatile(HInvoke * invoke)2972 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
2973   GenUnsafeGet(
2974       invoke, codegen_, DataType::Type::kReference, std::memory_order_seq_cst, /*atomic=*/ true);
2975 }
2976 
VisitJdkUnsafeGetObjectAcquire(HInvoke * invoke)2977 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
2978   CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
2979 }
2980 
VisitJdkUnsafeGetObjectAcquire(HInvoke * invoke)2981 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
2982   GenUnsafeGet(
2983       invoke, codegen_, DataType::Type::kReference, std::memory_order_acquire, /*atomic=*/ true);
2984 }
2985 
GenerateIntrinsicSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,std::memory_order order,bool atomic,vixl32::Register base,vixl32::Register offset,Location value,Location maybe_temp,Location maybe_temp2,Location maybe_temp3)2986 static void GenerateIntrinsicSet(CodeGeneratorARMVIXL* codegen,
2987                                  DataType::Type type,
2988                                  std::memory_order order,
2989                                  bool atomic,
2990                                  vixl32::Register base,
2991                                  vixl32::Register offset,
2992                                  Location value,
2993                                  Location maybe_temp,
2994                                  Location maybe_temp2,
2995                                  Location maybe_temp3) {
2996   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
2997   bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
2998   DCHECK(release_barrier || order == std::memory_order_relaxed);
2999   DCHECK(atomic || order == std::memory_order_relaxed);
3000 
3001   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3002   if (release_barrier) {
3003     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
3004   }
3005   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3006   if (kPoisonHeapReferences && type == DataType::Type::kReference) {
3007     vixl32::Register temp = temps.Acquire();
3008     __ Mov(temp, RegisterFrom(value));
3009     assembler->PoisonHeapReference(temp);
3010     value = LocationFrom(temp);
3011   }
3012   MemOperand address = offset.IsValid() ? MemOperand(base, offset) : MemOperand(base);
3013   if (offset.IsValid() && (DataType::Is64BitType(type) || type == DataType::Type::kFloat32)) {
3014     const vixl32::Register temp_reg = temps.Acquire();
3015     __ Add(temp_reg, base, offset);
3016     address = MemOperand(temp_reg);
3017   }
3018   switch (type) {
3019     case DataType::Type::kBool:
3020     case DataType::Type::kInt8:
3021       __ Strb(RegisterFrom(value), address);
3022       break;
3023     case DataType::Type::kUint16:
3024     case DataType::Type::kInt16:
3025       __ Strh(RegisterFrom(value), address);
3026       break;
3027     case DataType::Type::kReference:
3028     case DataType::Type::kInt32:
3029       __ Str(RegisterFrom(value), address);
3030       break;
3031     case DataType::Type::kInt64:
3032       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
3033         vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
3034         vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
3035         vixl32::Label loop;
3036         __ Bind(&loop);
3037         __ Ldrexd(lo_tmp, hi_tmp, address);  // Ignore the retrieved value.
3038         __ Strexd(lo_tmp, LowRegisterFrom(value), HighRegisterFrom(value), address);
3039         __ Cmp(lo_tmp, 0);
3040         __ B(ne, &loop);
3041       } else {
3042         __ Strd(LowRegisterFrom(value), HighRegisterFrom(value), address);
3043       }
3044       break;
3045     case DataType::Type::kFloat32:
3046       __ Vstr(SRegisterFrom(value), address);
3047       break;
3048     case DataType::Type::kFloat64:
3049       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
3050         vixl32::Register lo_tmp = RegisterFrom(maybe_temp);
3051         vixl32::Register hi_tmp = RegisterFrom(maybe_temp2);
3052         vixl32::Register strexd_tmp = RegisterFrom(maybe_temp3);
3053         vixl32::Label loop;
3054         __ Bind(&loop);
3055         __ Ldrexd(lo_tmp, hi_tmp, address);  // Ignore the retrieved value.
3056         __ Vmov(lo_tmp, hi_tmp, DRegisterFrom(value));
3057         __ Strexd(strexd_tmp, lo_tmp, hi_tmp, address);
3058         __ Cmp(strexd_tmp, 0);
3059         __ B(ne, &loop);
3060       } else {
3061         __ Vstr(DRegisterFrom(value), address);
3062       }
3063       break;
3064     default:
3065       LOG(FATAL) << "Unexpected type " << type;
3066       UNREACHABLE();
3067   }
3068   if (seq_cst_barrier) {
3069     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3070   }
3071 }
3072 
CreateUnsafePutLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,DataType::Type type,bool atomic)3073 static void CreateUnsafePutLocations(HInvoke* invoke,
3074                                      CodeGeneratorARMVIXL* codegen,
3075                                      DataType::Type type,
3076                                      bool atomic) {
3077   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3078   LocationSummary* locations =
3079       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3080   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
3081   locations->SetInAt(1, Location::RequiresRegister());
3082   locations->SetInAt(2, Location::RequiresRegister());
3083   locations->SetInAt(3, Location::RequiresRegister());
3084 
3085   if (type == DataType::Type::kInt64) {
3086     // Potentially need temps for ldrexd-strexd loop.
3087     if (Use64BitExclusiveLoadStore(atomic, codegen)) {
3088       locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
3089       locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
3090     }
3091   } else if (type == DataType::Type::kReference) {
3092     // Temp for card-marking.
3093     locations->AddTemp(Location::RequiresRegister());  // Temp.
3094   }
3095 }
3096 
GenUnsafePut(HInvoke * invoke,DataType::Type type,std::memory_order order,bool atomic,CodeGeneratorARMVIXL * codegen)3097 static void GenUnsafePut(HInvoke* invoke,
3098                          DataType::Type type,
3099                          std::memory_order order,
3100                          bool atomic,
3101                          CodeGeneratorARMVIXL* codegen) {
3102   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3103 
3104   LocationSummary* locations = invoke->GetLocations();
3105   vixl32::Register base = RegisterFrom(locations->InAt(1));       // Object pointer.
3106   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
3107   Location value = locations->InAt(3);
3108   Location maybe_temp = Location::NoLocation();
3109   Location maybe_temp2 = Location::NoLocation();
3110   if (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
3111     maybe_temp = locations->GetTemp(0);
3112     maybe_temp2 = locations->GetTemp(1);
3113   }
3114 
3115   GenerateIntrinsicSet(codegen,
3116                        type,
3117                        order,
3118                        atomic,
3119                        base,
3120                        offset,
3121                        value,
3122                        maybe_temp,
3123                        maybe_temp2,
3124                        /*maybe_temp3=*/ Location::NoLocation());
3125 
3126   if (type == DataType::Type::kReference) {
3127     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3128     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3129     vixl32::Register card = temps.Acquire();
3130     bool value_can_be_null = true;  // TODO: Worth finding out this information?
3131     codegen->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
3132   }
3133 }
3134 
VisitUnsafePut(HInvoke * invoke)3135 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3136   VisitJdkUnsafePut(invoke);
3137 }
3138 
VisitUnsafePut(HInvoke * invoke)3139 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
3140   VisitJdkUnsafePut(invoke);
3141 }
3142 
VisitUnsafePutOrdered(HInvoke * invoke)3143 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3144   VisitJdkUnsafePutOrdered(invoke);
3145 }
3146 
VisitUnsafePutOrdered(HInvoke * invoke)3147 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
3148   VisitJdkUnsafePutOrdered(invoke);
3149 }
3150 
VisitUnsafePutVolatile(HInvoke * invoke)3151 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3152   VisitJdkUnsafePutVolatile(invoke);
3153 }
3154 
VisitUnsafePutVolatile(HInvoke * invoke)3155 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
3156   VisitJdkUnsafePutVolatile(invoke);
3157 }
VisitUnsafePutObject(HInvoke * invoke)3158 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3159   VisitJdkUnsafePutObject(invoke);
3160 }
3161 
VisitUnsafePutObject(HInvoke * invoke)3162 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
3163   VisitJdkUnsafePutObject(invoke);
3164 }
3165 
VisitUnsafePutObjectOrdered(HInvoke * invoke)3166 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3167   VisitJdkUnsafePutObjectOrdered(invoke);
3168 }
3169 
VisitUnsafePutObjectOrdered(HInvoke * invoke)3170 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
3171   VisitJdkUnsafePutObjectOrdered(invoke);
3172 }
3173 
VisitUnsafePutObjectVolatile(HInvoke * invoke)3174 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3175   VisitJdkUnsafePutObjectVolatile(invoke);
3176 }
3177 
VisitUnsafePutObjectVolatile(HInvoke * invoke)3178 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
3179   VisitJdkUnsafePutObjectVolatile(invoke);
3180 }
3181 
VisitUnsafePutLong(HInvoke * invoke)3182 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3183   VisitJdkUnsafePutLong(invoke);
3184 }
3185 
VisitUnsafePutLong(HInvoke * invoke)3186 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
3187   VisitJdkUnsafePutLong(invoke);
3188 }
3189 
VisitUnsafePutLongOrdered(HInvoke * invoke)3190 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3191   VisitJdkUnsafePutLongOrdered(invoke);
3192 }
3193 
VisitUnsafePutLongOrdered(HInvoke * invoke)3194 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
3195   VisitJdkUnsafePutLongOrdered(invoke);
3196 }
3197 
VisitUnsafePutLongVolatile(HInvoke * invoke)3198 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3199   VisitJdkUnsafePutLongVolatile(invoke);
3200 }
3201 
VisitUnsafePutLongVolatile(HInvoke * invoke)3202 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
3203   VisitJdkUnsafePutLongVolatile(invoke);
3204 }
3205 
VisitJdkUnsafePut(HInvoke * invoke)3206 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) {
3207   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
3208 }
3209 
VisitJdkUnsafePut(HInvoke * invoke)3210 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) {
3211   GenUnsafePut(invoke,
3212                DataType::Type::kInt32,
3213                std::memory_order_relaxed,
3214                /*atomic=*/ false,
3215                codegen_);
3216 }
3217 
VisitJdkUnsafePutOrdered(HInvoke * invoke)3218 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
3219   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3220 }
3221 
VisitJdkUnsafePutOrdered(HInvoke * invoke)3222 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
3223   GenUnsafePut(invoke,
3224                DataType::Type::kInt32,
3225                std::memory_order_release,
3226                /*atomic=*/ true,
3227                codegen_);
3228 }
3229 
VisitJdkUnsafePutVolatile(HInvoke * invoke)3230 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
3231   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3232 }
3233 
VisitJdkUnsafePutVolatile(HInvoke * invoke)3234 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
3235   GenUnsafePut(invoke,
3236                DataType::Type::kInt32,
3237                std::memory_order_seq_cst,
3238                /*atomic=*/ true,
3239                codegen_);
3240 }
3241 
VisitJdkUnsafePutRelease(HInvoke * invoke)3242 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutRelease(HInvoke* invoke) {
3243   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
3244 }
3245 
VisitJdkUnsafePutRelease(HInvoke * invoke)3246 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutRelease(HInvoke* invoke) {
3247   GenUnsafePut(invoke,
3248                DataType::Type::kInt32,
3249                std::memory_order_release,
3250                /*atomic=*/ true,
3251                codegen_);
3252 }
3253 
VisitJdkUnsafePutObject(HInvoke * invoke)3254 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObject(HInvoke* invoke) {
3255   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
3256 }
3257 
VisitJdkUnsafePutObject(HInvoke * invoke)3258 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObject(HInvoke* invoke) {
3259   GenUnsafePut(invoke,
3260                DataType::Type::kReference,
3261                std::memory_order_relaxed,
3262                /*atomic=*/ false,
3263                codegen_);
3264 }
3265 
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)3266 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
3267   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3268 }
3269 
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)3270 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
3271   GenUnsafePut(invoke,
3272                DataType::Type::kReference,
3273                std::memory_order_release,
3274                /*atomic=*/ true,
3275                codegen_);
3276 }
3277 
VisitJdkUnsafePutObjectVolatile(HInvoke * invoke)3278 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
3279   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3280 }
3281 
VisitJdkUnsafePutObjectVolatile(HInvoke * invoke)3282 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
3283   GenUnsafePut(invoke,
3284                DataType::Type::kReference,
3285                std::memory_order_seq_cst,
3286                /*atomic=*/ true,
3287                codegen_);
3288 }
3289 
VisitJdkUnsafePutObjectRelease(HInvoke * invoke)3290 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
3291   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
3292 }
3293 
VisitJdkUnsafePutObjectRelease(HInvoke * invoke)3294 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
3295   GenUnsafePut(invoke,
3296                DataType::Type::kReference,
3297                std::memory_order_release,
3298                /*atomic=*/ true,
3299                codegen_);
3300 }
3301 
VisitJdkUnsafePutLong(HInvoke * invoke)3302 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLong(HInvoke* invoke) {
3303   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ false);
3304 }
3305 
VisitJdkUnsafePutLong(HInvoke * invoke)3306 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLong(HInvoke* invoke) {
3307   GenUnsafePut(invoke,
3308                DataType::Type::kInt64,
3309                std::memory_order_relaxed,
3310                /*atomic=*/ false,
3311                codegen_);
3312 }
3313 
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)3314 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
3315   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3316 }
3317 
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)3318 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
3319   GenUnsafePut(invoke,
3320                DataType::Type::kInt64,
3321                std::memory_order_release,
3322                /*atomic=*/ true,
3323                codegen_);
3324 }
3325 
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)3326 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
3327   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3328 }
3329 
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)3330 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
3331   GenUnsafePut(invoke,
3332                DataType::Type::kInt64,
3333                std::memory_order_seq_cst,
3334                /*atomic=*/ true,
3335                codegen_);
3336 }
3337 
VisitJdkUnsafePutLongRelease(HInvoke * invoke)3338 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
3339   CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt64, /*atomic=*/ true);
3340 }
3341 
VisitJdkUnsafePutLongRelease(HInvoke * invoke)3342 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
3343   GenUnsafePut(invoke,
3344                DataType::Type::kInt64,
3345                std::memory_order_release,
3346                /*atomic=*/ true,
3347                codegen_);
3348 }
3349 
EmitLoadExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,Location old_value)3350 static void EmitLoadExclusive(CodeGeneratorARMVIXL* codegen,
3351                               DataType::Type type,
3352                               vixl32::Register ptr,
3353                               Location old_value) {
3354   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3355   switch (type) {
3356     case DataType::Type::kBool:
3357     case DataType::Type::kInt8:
3358       __ Ldrexb(RegisterFrom(old_value), MemOperand(ptr));
3359       break;
3360     case DataType::Type::kUint16:
3361     case DataType::Type::kInt16:
3362       __ Ldrexh(RegisterFrom(old_value), MemOperand(ptr));
3363       break;
3364     case DataType::Type::kInt32:
3365     case DataType::Type::kReference:
3366       __ Ldrex(RegisterFrom(old_value), MemOperand(ptr));
3367       break;
3368     case DataType::Type::kInt64:
3369       __ Ldrexd(LowRegisterFrom(old_value), HighRegisterFrom(old_value), MemOperand(ptr));
3370       break;
3371     default:
3372       LOG(FATAL) << "Unexpected type: " << type;
3373       UNREACHABLE();
3374   }
3375   switch (type) {
3376     case DataType::Type::kInt8:
3377       __ Sxtb(RegisterFrom(old_value), RegisterFrom(old_value));
3378       break;
3379     case DataType::Type::kInt16:
3380       __ Sxth(RegisterFrom(old_value), RegisterFrom(old_value));
3381       break;
3382     case DataType::Type::kReference:
3383       assembler->MaybeUnpoisonHeapReference(RegisterFrom(old_value));
3384       break;
3385     default:
3386       break;
3387   }
3388 }
3389 
EmitStoreExclusive(CodeGeneratorARMVIXL * codegen,DataType::Type type,vixl32::Register ptr,vixl32::Register store_result,Location new_value)3390 static void EmitStoreExclusive(CodeGeneratorARMVIXL* codegen,
3391                                DataType::Type type,
3392                                vixl32::Register ptr,
3393                                vixl32::Register store_result,
3394                                Location new_value) {
3395   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3396   if (type == DataType::Type::kReference) {
3397     assembler->MaybePoisonHeapReference(RegisterFrom(new_value));
3398   }
3399   switch (type) {
3400     case DataType::Type::kBool:
3401     case DataType::Type::kInt8:
3402       __ Strexb(store_result, RegisterFrom(new_value), MemOperand(ptr));
3403       break;
3404     case DataType::Type::kUint16:
3405     case DataType::Type::kInt16:
3406       __ Strexh(store_result, RegisterFrom(new_value), MemOperand(ptr));
3407       break;
3408     case DataType::Type::kInt32:
3409     case DataType::Type::kReference:
3410       __ Strex(store_result, RegisterFrom(new_value), MemOperand(ptr));
3411       break;
3412     case DataType::Type::kInt64:
3413       __ Strexd(
3414           store_result, LowRegisterFrom(new_value), HighRegisterFrom(new_value), MemOperand(ptr));
3415       break;
3416     default:
3417       LOG(FATAL) << "Unexpected type: " << type;
3418       UNREACHABLE();
3419   }
3420   if (type == DataType::Type::kReference) {
3421     assembler->MaybeUnpoisonHeapReference(RegisterFrom(new_value));
3422   }
3423 }
3424 
GenerateCompareAndSet(CodeGeneratorARMVIXL * codegen,DataType::Type type,bool strong,vixl32::Label * cmp_failure,bool cmp_failure_is_far_target,vixl32::Register ptr,Location expected,Location new_value,Location old_value,vixl32::Register store_result,vixl32::Register success)3425 static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen,
3426                                   DataType::Type type,
3427                                   bool strong,
3428                                   vixl32::Label* cmp_failure,
3429                                   bool cmp_failure_is_far_target,
3430                                   vixl32::Register ptr,
3431                                   Location expected,
3432                                   Location new_value,
3433                                   Location old_value,
3434                                   vixl32::Register store_result,
3435                                   vixl32::Register success) {
3436   // For kReference, the `expected` shall be a register pair when called from a read barrier
3437   // slow path, specifying both the original `expected` as well as the unmarked old value from
3438   // the main path attempt to emit CAS when it matched `expected` after marking.
3439   // Otherwise the type of `expected` shall match the type of `new_value` and `old_value`.
3440   if (type == DataType::Type::kInt64) {
3441     DCHECK(expected.IsRegisterPair());
3442     DCHECK(new_value.IsRegisterPair());
3443     DCHECK(old_value.IsRegisterPair());
3444   } else {
3445     DCHECK(expected.IsRegister() ||
3446            (type == DataType::Type::kReference && expected.IsRegisterPair()));
3447     DCHECK(new_value.IsRegister());
3448     DCHECK(old_value.IsRegister());
3449     // Make sure the unmarked old value for reference CAS slow path is not clobbered by STREX.
3450     DCHECK(!expected.Contains(LocationFrom(store_result)));
3451   }
3452 
3453   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3454 
3455   // do {
3456   //   old_value = [ptr];  // Load exclusive.
3457   //   if (old_value != expected) goto cmp_failure;
3458   //   store_result = failed([ptr] <- new_value);  // Store exclusive.
3459   // } while (strong && store_result);
3460   //
3461   // If `success` is a valid register, there are additional instructions in the above code
3462   // to report success with value 1 and failure with value 0 in that register.
3463 
3464   vixl32::Label loop_head;
3465   if (strong) {
3466     __ Bind(&loop_head);
3467   }
3468   EmitLoadExclusive(codegen, type, ptr, old_value);
3469   // We do not need to initialize the failure code for comparison failure if the
3470   // branch goes to the read barrier slow path that clobbers `success` anyway.
3471   bool init_failure_for_cmp =
3472       success.IsValid() &&
3473       !(gUseReadBarrier && type == DataType::Type::kReference && expected.IsRegister());
3474   // Instruction scheduling: Loading a constant between LDREX* and using the loaded value
3475   // is essentially free, so prepare the failure value here if we can.
3476   bool init_failure_for_cmp_early =
3477       init_failure_for_cmp && !old_value.Contains(LocationFrom(success));
3478   if (init_failure_for_cmp_early) {
3479     __ Mov(success, 0);  // Indicate failure if the comparison fails.
3480   }
3481   if (type == DataType::Type::kInt64) {
3482     __ Cmp(LowRegisterFrom(old_value), LowRegisterFrom(expected));
3483     ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3484     __ it(eq);
3485     __ cmp(eq, HighRegisterFrom(old_value), HighRegisterFrom(expected));
3486   } else if (expected.IsRegisterPair()) {
3487     DCHECK_EQ(type, DataType::Type::kReference);
3488     DCHECK(!expected.Contains(old_value));
3489     // Check if the loaded value matches any of the two registers in `expected`.
3490     __ Cmp(RegisterFrom(old_value), LowRegisterFrom(expected));
3491     ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3492     __ it(ne);
3493     __ cmp(ne, RegisterFrom(old_value), HighRegisterFrom(expected));
3494   } else {
3495     __ Cmp(RegisterFrom(old_value), RegisterFrom(expected));
3496   }
3497   if (init_failure_for_cmp && !init_failure_for_cmp_early) {
3498     __ Mov(LeaveFlags, success, 0);  // Indicate failure if the comparison fails.
3499   }
3500   __ B(ne, cmp_failure, /*is_far_target=*/ cmp_failure_is_far_target);
3501   EmitStoreExclusive(codegen, type, ptr, store_result, new_value);
3502   if (strong) {
3503     // Instruction scheduling: Loading a constant between STREX* and using its result
3504     // is essentially free, so prepare the success value here if needed and possible.
3505     if (success.IsValid() && !success.Is(store_result)) {
3506       __ Mov(success, 1);  // Indicate success if the store succeeds.
3507     }
3508     __ Cmp(store_result, 0);
3509     if (success.IsValid() && success.Is(store_result)) {
3510       __ Mov(LeaveFlags, success, 1);  // Indicate success if the store succeeds.
3511     }
3512     __ B(ne, &loop_head, /*is_far_target=*/ false);
3513   } else {
3514     // Weak CAS (VarHandle.CompareAndExchange variants) always indicates success.
3515     DCHECK(success.IsValid());
3516     // Flip the `store_result` to indicate success by 1 and failure by 0.
3517     __ Eor(success, store_result, 1);
3518   }
3519 }
3520 
3521 class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
3522  public:
ReadBarrierCasSlowPathARMVIXL(HInvoke * invoke,bool strong,vixl32::Register base,vixl32::Register offset,vixl32::Register expected,vixl32::Register new_value,vixl32::Register old_value,vixl32::Register old_value_temp,vixl32::Register store_result,vixl32::Register success,CodeGeneratorARMVIXL * arm_codegen)3523   explicit ReadBarrierCasSlowPathARMVIXL(HInvoke* invoke,
3524                                          bool strong,
3525                                          vixl32::Register base,
3526                                          vixl32::Register offset,
3527                                          vixl32::Register expected,
3528                                          vixl32::Register new_value,
3529                                          vixl32::Register old_value,
3530                                          vixl32::Register old_value_temp,
3531                                          vixl32::Register store_result,
3532                                          vixl32::Register success,
3533                                          CodeGeneratorARMVIXL* arm_codegen)
3534       : SlowPathCodeARMVIXL(invoke),
3535         strong_(strong),
3536         base_(base),
3537         offset_(offset),
3538         expected_(expected),
3539         new_value_(new_value),
3540         old_value_(old_value),
3541         old_value_temp_(old_value_temp),
3542         store_result_(store_result),
3543         success_(success),
3544         mark_old_value_slow_path_(nullptr),
3545         update_old_value_slow_path_(nullptr) {
3546     if (!kUseBakerReadBarrier) {
3547       // We need to add the slow path now, it is too late when emitting slow path code.
3548       mark_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3549           invoke,
3550           Location::RegisterLocation(old_value_temp.GetCode()),
3551           Location::RegisterLocation(old_value.GetCode()),
3552           Location::RegisterLocation(base.GetCode()),
3553           /*offset=*/ 0u,
3554           /*index=*/ Location::RegisterLocation(offset.GetCode()));
3555       if (!success.IsValid()) {
3556         update_old_value_slow_path_ = arm_codegen->AddReadBarrierSlowPath(
3557             invoke,
3558             Location::RegisterLocation(old_value.GetCode()),
3559             Location::RegisterLocation(old_value_temp.GetCode()),
3560             Location::RegisterLocation(base.GetCode()),
3561             /*offset=*/ 0u,
3562             /*index=*/ Location::RegisterLocation(offset.GetCode()));
3563       }
3564     }
3565   }
3566 
GetDescription() const3567   const char* GetDescription() const override { return "ReadBarrierCasSlowPathARMVIXL"; }
3568 
EmitNativeCode(CodeGenerator * codegen)3569   void EmitNativeCode(CodeGenerator* codegen) override {
3570     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
3571     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
3572     __ Bind(GetEntryLabel());
3573 
3574     // Mark the `old_value_` from the main path and compare with `expected_`.
3575     if (kUseBakerReadBarrier) {
3576       DCHECK(mark_old_value_slow_path_ == nullptr);
3577       arm_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_temp_, old_value_);
3578     } else {
3579       DCHECK(mark_old_value_slow_path_ != nullptr);
3580       __ B(mark_old_value_slow_path_->GetEntryLabel());
3581       __ Bind(mark_old_value_slow_path_->GetExitLabel());
3582     }
3583     __ Cmp(old_value_temp_, expected_);
3584     if (success_.IsValid()) {
3585       __ Mov(LeaveFlags, success_, 0);  // Indicate failure if we take the branch out.
3586     } else {
3587       // In case of failure, update the `old_value_` with the marked reference.
3588       ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
3589       __ it(ne);
3590       __ mov(ne, old_value_, old_value_temp_);
3591     }
3592     __ B(ne, GetExitLabel());
3593 
3594     // The old value we have read did not match `expected` (which is always a to-space
3595     // reference) but after the read barrier the marked to-space value matched, so the
3596     // old value must be a from-space reference to the same object. Do the same CAS loop
3597     // as the main path but check for both `expected` and the unmarked old value
3598     // representing the to-space and from-space references for the same object.
3599 
3600     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3601     vixl32::Register tmp_ptr = temps.Acquire();
3602 
3603     // Recalculate the `tmp_ptr` clobbered above.
3604     __ Add(tmp_ptr, base_, offset_);
3605 
3606     vixl32::Label mark_old_value;
3607     GenerateCompareAndSet(arm_codegen,
3608                           DataType::Type::kReference,
3609                           strong_,
3610                           /*cmp_failure=*/ success_.IsValid() ? GetExitLabel() : &mark_old_value,
3611                           /*cmp_failure_is_far_target=*/ success_.IsValid(),
3612                           tmp_ptr,
3613                           /*expected=*/ LocationFrom(expected_, old_value_),
3614                           /*new_value=*/ LocationFrom(new_value_),
3615                           /*old_value=*/ LocationFrom(old_value_temp_),
3616                           store_result_,
3617                           success_);
3618     if (!success_.IsValid()) {
3619       // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
3620       // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
3621       __ Mov(old_value_, expected_);
3622     }
3623 
3624     __ B(GetExitLabel());
3625 
3626     if (!success_.IsValid()) {
3627       __ Bind(&mark_old_value);
3628       if (kUseBakerReadBarrier) {
3629         DCHECK(update_old_value_slow_path_ == nullptr);
3630         arm_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_, old_value_temp_);
3631       } else {
3632         // Note: We could redirect the `failure` above directly to the entry label and bind
3633         // the exit label in the main path, but the main path would need to access the
3634         // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
3635         DCHECK(update_old_value_slow_path_ != nullptr);
3636         __ B(update_old_value_slow_path_->GetEntryLabel());
3637         __ Bind(update_old_value_slow_path_->GetExitLabel());
3638       }
3639       __ B(GetExitLabel());
3640     }
3641   }
3642 
3643  private:
3644   bool strong_;
3645   vixl32::Register base_;
3646   vixl32::Register offset_;
3647   vixl32::Register expected_;
3648   vixl32::Register new_value_;
3649   vixl32::Register old_value_;
3650   vixl32::Register old_value_temp_;
3651   vixl32::Register store_result_;
3652   vixl32::Register success_;
3653   SlowPathCodeARMVIXL* mark_old_value_slow_path_;
3654   SlowPathCodeARMVIXL* update_old_value_slow_path_;
3655 };
3656 
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke)3657 static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3658   const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke);
3659   LocationSummary* locations =
3660       new (allocator) LocationSummary(invoke,
3661                                       can_call
3662                                           ? LocationSummary::kCallOnSlowPath
3663                                           : LocationSummary::kNoCall,
3664                                       kIntrinsified);
3665   if (can_call && kUseBakerReadBarrier) {
3666     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3667   }
3668   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
3669   locations->SetInAt(1, Location::RequiresRegister());
3670   locations->SetInAt(2, Location::RequiresRegister());
3671   locations->SetInAt(3, Location::RequiresRegister());
3672   locations->SetInAt(4, Location::RequiresRegister());
3673 
3674   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3675 
3676   // Temporary register used in CAS. In the object case (UnsafeCASObject intrinsic),
3677   // this is also used for card-marking, and possibly for read barrier.
3678   locations->AddTemp(Location::RequiresRegister());
3679 }
3680 
GenUnsafeCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)3681 static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
3682   DCHECK_NE(type, DataType::Type::kInt64);
3683 
3684   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3685   LocationSummary* locations = invoke->GetLocations();
3686 
3687   vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
3688   vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
3689   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));      // Offset (discard high 4B).
3690   vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
3691   vixl32::Register new_value = InputRegisterAt(invoke, 4);            // New value.
3692 
3693   vixl32::Register tmp = RegisterFrom(locations->GetTemp(0));         // Temporary.
3694 
3695   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3696   vixl32::Register tmp_ptr = temps.Acquire();
3697 
3698   if (type == DataType::Type::kReference) {
3699     // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
3700     // object and scan the receiver at the next GC for nothing.
3701     bool value_can_be_null = true;  // TODO: Worth finding out this information?
3702     codegen->MarkGCCard(tmp_ptr, tmp, base, new_value, value_can_be_null);
3703   }
3704 
3705   vixl32::Label exit_loop_label;
3706   vixl32::Label* exit_loop = &exit_loop_label;
3707   vixl32::Label* cmp_failure = &exit_loop_label;
3708 
3709   if (gUseReadBarrier && type == DataType::Type::kReference) {
3710     // If marking, check if the stored reference is a from-space reference to the same
3711     // object as the to-space reference `expected`. If so, perform a custom CAS loop.
3712     ReadBarrierCasSlowPathARMVIXL* slow_path =
3713         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
3714             invoke,
3715             /*strong=*/ true,
3716             base,
3717             offset,
3718             expected,
3719             new_value,
3720             /*old_value=*/ tmp,
3721             /*old_value_temp=*/ out,
3722             /*store_result=*/ out,
3723             /*success=*/ out,
3724             codegen);
3725     codegen->AddSlowPath(slow_path);
3726     exit_loop = slow_path->GetExitLabel();
3727     cmp_failure = slow_path->GetEntryLabel();
3728   }
3729 
3730   // Unsafe CAS operations have std::memory_order_seq_cst semantics.
3731   codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3732   __ Add(tmp_ptr, base, offset);
3733   GenerateCompareAndSet(codegen,
3734                         type,
3735                         /*strong=*/ true,
3736                         cmp_failure,
3737                         /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
3738                         tmp_ptr,
3739                         /*expected=*/ LocationFrom(expected),  // TODO: Int64
3740                         /*new_value=*/ LocationFrom(new_value),  // TODO: Int64
3741                         /*old_value=*/ LocationFrom(tmp),  // TODO: Int64
3742                         /*store_result=*/ tmp,
3743                         /*success=*/ out);
3744   __ Bind(exit_loop);
3745   codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
3746 
3747   if (type == DataType::Type::kReference) {
3748     codegen->MaybeGenerateMarkingRegisterCheck(/*code=*/ 128, /*temp_loc=*/ LocationFrom(tmp_ptr));
3749   }
3750 }
3751 
VisitUnsafeCASInt(HInvoke * invoke)3752 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3753   VisitJdkUnsafeCASInt(invoke);
3754 }
VisitUnsafeCASObject(HInvoke * invoke)3755 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3756   VisitJdkUnsafeCASObject(invoke);
3757 }
3758 
VisitJdkUnsafeCASInt(HInvoke * invoke)3759 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) {
3760   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
3761   VisitJdkUnsafeCompareAndSetInt(invoke);
3762 }
VisitJdkUnsafeCASObject(HInvoke * invoke)3763 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) {
3764   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
3765   VisitJdkUnsafeCompareAndSetObject(invoke);
3766 }
3767 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)3768 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
3769   CreateUnsafeCASLocations(allocator_, invoke);
3770 }
VisitJdkUnsafeCompareAndSetObject(HInvoke * invoke)3771 void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
3772   // The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
3773   if (gUseReadBarrier && !kUseBakerReadBarrier) {
3774     return;
3775   }
3776 
3777   CreateUnsafeCASLocations(allocator_, invoke);
3778 }
3779 
VisitUnsafeCASInt(HInvoke * invoke)3780 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
3781   VisitJdkUnsafeCASInt(invoke);
3782 }
VisitUnsafeCASObject(HInvoke * invoke)3783 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
3784   VisitJdkUnsafeCASObject(invoke);
3785 }
3786 
VisitJdkUnsafeCASInt(HInvoke * invoke)3787 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) {
3788   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
3789   VisitJdkUnsafeCompareAndSetInt(invoke);
3790 }
VisitJdkUnsafeCASObject(HInvoke * invoke)3791 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) {
3792   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
3793   VisitJdkUnsafeCompareAndSetObject(invoke);
3794 }
3795 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)3796 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
3797   GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
3798 }
VisitJdkUnsafeCompareAndSetObject(HInvoke * invoke)3799 void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
3800   // The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
3801   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
3802 
3803   GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
3804 }
3805 
3806 enum class GetAndUpdateOp {
3807   kSet,
3808   kAdd,
3809   kAddWithByteSwap,
3810   kAnd,
3811   kOr,
3812   kXor
3813 };
3814 
GenerateGetAndUpdate(CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,DataType::Type load_store_type,vixl32::Register ptr,Location arg,Location old_value,vixl32::Register store_result,Location maybe_temp,Location maybe_vreg_temp)3815 static void GenerateGetAndUpdate(CodeGeneratorARMVIXL* codegen,
3816                                  GetAndUpdateOp get_and_update_op,
3817                                  DataType::Type load_store_type,
3818                                  vixl32::Register ptr,
3819                                  Location arg,
3820                                  Location old_value,
3821                                  vixl32::Register store_result,
3822                                  Location maybe_temp,
3823                                  Location maybe_vreg_temp) {
3824   ArmVIXLAssembler* assembler = codegen->GetAssembler();
3825 
3826   Location loaded_value;
3827   Location new_value;
3828   switch (get_and_update_op) {
3829     case GetAndUpdateOp::kSet:
3830       loaded_value = old_value;
3831       new_value = arg;
3832       break;
3833     case GetAndUpdateOp::kAddWithByteSwap:
3834       if (old_value.IsRegisterPair()) {
3835         // To avoid register overlap when reversing bytes, load into temps.
3836         DCHECK(maybe_temp.IsRegisterPair());
3837         loaded_value = maybe_temp;
3838         new_value = loaded_value;  // Use the same temporaries for the new value.
3839         break;
3840       }
3841       FALLTHROUGH_INTENDED;
3842     case GetAndUpdateOp::kAdd:
3843       if (old_value.IsFpuRegisterPair()) {
3844         DCHECK(maybe_temp.IsRegisterPair());
3845         loaded_value = maybe_temp;
3846         new_value = loaded_value;  // Use the same temporaries for the new value.
3847         break;
3848       }
3849       if (old_value.IsFpuRegister()) {
3850         DCHECK(maybe_temp.IsRegister());
3851         loaded_value = maybe_temp;
3852         new_value = loaded_value;  // Use the same temporary for the new value.
3853         break;
3854       }
3855       FALLTHROUGH_INTENDED;
3856     case GetAndUpdateOp::kAnd:
3857     case GetAndUpdateOp::kOr:
3858     case GetAndUpdateOp::kXor:
3859       loaded_value = old_value;
3860       new_value = maybe_temp;
3861       break;
3862   }
3863 
3864   vixl32::Label loop_label;
3865   __ Bind(&loop_label);
3866   EmitLoadExclusive(codegen, load_store_type, ptr, loaded_value);
3867   switch (get_and_update_op) {
3868     case GetAndUpdateOp::kSet:
3869       break;
3870     case GetAndUpdateOp::kAddWithByteSwap:
3871       if (arg.IsFpuRegisterPair()) {
3872         GenerateReverseBytes(assembler, DataType::Type::kFloat64, loaded_value, old_value);
3873         vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3874         __ Vadd(sum, DRegisterFrom(old_value), DRegisterFrom(arg));
3875         __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), sum);  // Swap low/high.
3876       } else if (arg.IsFpuRegister()) {
3877         GenerateReverseBytes(assembler, DataType::Type::kFloat32, loaded_value, old_value);
3878         vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp);  // The temporary is a pair.
3879         __ Vadd(sum, SRegisterFrom(old_value), SRegisterFrom(arg));
3880         __ Vmov(RegisterFrom(new_value), sum);
3881       } else if (load_store_type == DataType::Type::kInt64) {
3882         GenerateReverseBytes(assembler, DataType::Type::kInt64, loaded_value, old_value);
3883         // Swap low/high registers for the addition results.
3884         __ Adds(HighRegisterFrom(new_value), LowRegisterFrom(old_value), LowRegisterFrom(arg));
3885         __ Adc(LowRegisterFrom(new_value), HighRegisterFrom(old_value), HighRegisterFrom(arg));
3886       } else {
3887         GenerateReverseBytes(assembler, DataType::Type::kInt32, loaded_value, old_value);
3888         __ Add(RegisterFrom(new_value), RegisterFrom(old_value), RegisterFrom(arg));
3889       }
3890       if (load_store_type == DataType::Type::kInt64) {
3891         // The `new_value` already has the high and low word swapped. Reverse bytes in each.
3892         GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
3893       } else {
3894         GenerateReverseBytes(assembler, load_store_type, new_value, new_value);
3895       }
3896       break;
3897     case GetAndUpdateOp::kAdd:
3898       if (arg.IsFpuRegisterPair()) {
3899         vixl32::DRegister old_value_vreg = DRegisterFrom(old_value);
3900         vixl32::DRegister sum = DRegisterFrom(maybe_vreg_temp);
3901         __ Vmov(old_value_vreg, LowRegisterFrom(loaded_value), HighRegisterFrom(loaded_value));
3902         __ Vadd(sum, old_value_vreg, DRegisterFrom(arg));
3903         __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), sum);
3904       } else if (arg.IsFpuRegister()) {
3905         vixl32::SRegister old_value_vreg = SRegisterFrom(old_value);
3906         vixl32::SRegister sum = LowSRegisterFrom(maybe_vreg_temp);  // The temporary is a pair.
3907         __ Vmov(old_value_vreg, RegisterFrom(loaded_value));
3908         __ Vadd(sum, old_value_vreg, SRegisterFrom(arg));
3909         __ Vmov(RegisterFrom(new_value), sum);
3910       } else if (load_store_type == DataType::Type::kInt64) {
3911         __ Adds(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3912         __ Adc(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3913       } else {
3914         __ Add(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3915       }
3916       break;
3917     case GetAndUpdateOp::kAnd:
3918       if (load_store_type == DataType::Type::kInt64) {
3919         __ And(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3920         __ And(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3921       } else {
3922         __ And(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3923       }
3924       break;
3925     case GetAndUpdateOp::kOr:
3926       if (load_store_type == DataType::Type::kInt64) {
3927         __ Orr(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3928         __ Orr(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3929       } else {
3930         __ Orr(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3931       }
3932       break;
3933     case GetAndUpdateOp::kXor:
3934       if (load_store_type == DataType::Type::kInt64) {
3935         __ Eor(LowRegisterFrom(new_value), LowRegisterFrom(loaded_value), LowRegisterFrom(arg));
3936         __ Eor(HighRegisterFrom(new_value), HighRegisterFrom(loaded_value), HighRegisterFrom(arg));
3937       } else {
3938         __ Eor(RegisterFrom(new_value), RegisterFrom(loaded_value), RegisterFrom(arg));
3939       }
3940       break;
3941   }
3942   EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value);
3943   __ Cmp(store_result, 0);
3944   __ B(ne, &loop_label);
3945 }
3946 
3947 class VarHandleSlowPathARMVIXL : public IntrinsicSlowPathARMVIXL {
3948  public:
VarHandleSlowPathARMVIXL(HInvoke * invoke,std::memory_order order)3949   VarHandleSlowPathARMVIXL(HInvoke* invoke, std::memory_order order)
3950       : IntrinsicSlowPathARMVIXL(invoke),
3951         order_(order),
3952         atomic_(false),
3953         return_success_(false),
3954         strong_(false),
3955         get_and_update_op_(GetAndUpdateOp::kAdd) {
3956   }
3957 
GetByteArrayViewCheckLabel()3958   vixl32::Label* GetByteArrayViewCheckLabel() {
3959     return &byte_array_view_check_label_;
3960   }
3961 
GetNativeByteOrderLabel()3962   vixl32::Label* GetNativeByteOrderLabel() {
3963     return &native_byte_order_label_;
3964   }
3965 
SetAtomic(bool atomic)3966   void SetAtomic(bool atomic) {
3967     DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGet ||
3968            GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kSet);
3969     atomic_ = atomic;
3970   }
3971 
SetCompareAndSetOrExchangeArgs(bool return_success,bool strong)3972   void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
3973     if (return_success) {
3974       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
3975     } else {
3976       DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
3977     }
3978     return_success_ = return_success;
3979     strong_ = strong;
3980   }
3981 
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3982   void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3983     DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
3984     get_and_update_op_ = get_and_update_op;
3985   }
3986 
EmitNativeCode(CodeGenerator * codegen_in)3987   void EmitNativeCode(CodeGenerator* codegen_in) override {
3988     if (GetByteArrayViewCheckLabel()->IsReferenced()) {
3989       EmitByteArrayViewCode(codegen_in);
3990     }
3991     IntrinsicSlowPathARMVIXL::EmitNativeCode(codegen_in);
3992   }
3993 
3994  private:
GetInvoke() const3995   HInvoke* GetInvoke() const {
3996     return GetInstruction()->AsInvoke();
3997   }
3998 
GetAccessModeTemplate() const3999   mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
4000     return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
4001   }
4002 
4003   void EmitByteArrayViewCode(CodeGenerator* codegen_in);
4004 
4005   vixl32::Label byte_array_view_check_label_;
4006   vixl32::Label native_byte_order_label_;
4007   // Shared parameter for all VarHandle intrinsics.
4008   std::memory_order order_;
4009   // Extra argument for GenerateVarHandleGet() and GenerateVarHandleSet().
4010   bool atomic_;
4011   // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
4012   bool return_success_;
4013   bool strong_;
4014   // Extra argument for GenerateVarHandleGetAndUpdate().
4015   GetAndUpdateOp get_and_update_op_;
4016 };
4017 
4018 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,vixl32::Register object,vixl32::Register type,bool object_can_be_null=true)4019 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARMVIXL* codegen,
4020                                                     SlowPathCodeARMVIXL* slow_path,
4021                                                     vixl32::Register object,
4022                                                     vixl32::Register type,
4023                                                     bool object_can_be_null = true) {
4024   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4025 
4026   const MemberOffset class_offset = mirror::Object::ClassOffset();
4027   const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
4028 
4029   vixl32::Label success;
4030   if (object_can_be_null) {
4031     __ CompareAndBranchIfZero(object, &success, /*is_far_target=*/ false);
4032   }
4033 
4034   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4035   vixl32::Register temp = temps.Acquire();
4036 
4037   __ Ldr(temp, MemOperand(object, class_offset.Int32Value()));
4038   assembler->MaybeUnpoisonHeapReference(temp);
4039   vixl32::Label loop;
4040   __ Bind(&loop);
4041   __ Cmp(type, temp);
4042   __ B(eq, &success, /*is_far_target=*/ false);
4043   __ Ldr(temp, MemOperand(temp, super_class_offset.Int32Value()));
4044   assembler->MaybeUnpoisonHeapReference(temp);
4045   __ Cmp(temp, 0);
4046   __ B(eq, slow_path->GetEntryLabel());
4047   __ B(&loop);
4048   __ Bind(&success);
4049 }
4050 
4051 // Check access mode and the primitive type from VarHandle.varType.
4052 // Check reference arguments against the VarHandle.varType; for references this is a subclass
4053 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path,DataType::Type type)4054 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
4055                                                         CodeGeneratorARMVIXL* codegen,
4056                                                         SlowPathCodeARMVIXL* slow_path,
4057                                                         DataType::Type type) {
4058   mirror::VarHandle::AccessMode access_mode =
4059       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
4060   Primitive::Type primitive_type = DataTypeToPrimitive(type);
4061 
4062   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4063   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4064 
4065   const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
4066   const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
4067   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4068 
4069   // Use the temporary register reserved for offset. It is not used yet at this point.
4070   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4071   vixl32::Register var_type_no_rb =
4072       RegisterFrom(invoke->GetLocations()->GetTemp(expected_coordinates_count == 0u ? 1u : 0u));
4073 
4074   // Check that the operation is permitted and the primitive type of varhandle.varType.
4075   // We do not need a read barrier when loading a reference only for loading constant
4076   // primitive field through the reference. Use LDRD to load the fields together.
4077   {
4078     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4079     vixl32::Register temp2 = temps.Acquire();
4080     DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
4081     __ Ldrd(var_type_no_rb, temp2, MemOperand(varhandle, var_type_offset.Int32Value()));
4082     assembler->MaybeUnpoisonHeapReference(var_type_no_rb);
4083     __ Tst(temp2, 1u << static_cast<uint32_t>(access_mode));
4084     __ B(eq, slow_path->GetEntryLabel());
4085     __ Ldrh(temp2, MemOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
4086     __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4087     __ B(ne, slow_path->GetEntryLabel());
4088   }
4089 
4090   if (type == DataType::Type::kReference) {
4091     // Check reference arguments against the varType.
4092     // False negatives due to varType being an interface or array type
4093     // or due to the missing read barrier are handled by the slow path.
4094     uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4095     uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4096     for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4097       HInstruction* arg = invoke->InputAt(arg_index);
4098       DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
4099       if (!arg->IsNullConstant()) {
4100         vixl32::Register arg_reg = RegisterFrom(invoke->GetLocations()->InAt(arg_index));
4101         GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
4102       }
4103     }
4104   }
4105 }
4106 
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)4107 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
4108                                               CodeGeneratorARMVIXL* codegen,
4109                                               SlowPathCodeARMVIXL* slow_path) {
4110   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4111   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4112 
4113   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4114 
4115   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4116   vixl32::Register temp = temps.Acquire();
4117 
4118   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
4119   // Do not emit read barrier (or unpoison the reference) for comparing to null.
4120   __ Ldr(temp, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4121   __ Cmp(temp, 0);
4122   __ B(ne, slow_path->GetEntryLabel());
4123 }
4124 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,SlowPathCodeARMVIXL * slow_path)4125 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
4126                                                  CodeGeneratorARMVIXL* codegen,
4127                                                  SlowPathCodeARMVIXL* slow_path) {
4128   VarHandleOptimizations optimizations(invoke);
4129   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4130   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4131   vixl32::Register object = InputRegisterAt(invoke, 1);
4132 
4133   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4134   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4135 
4136   // Null-check the object.
4137   if (!optimizations.GetSkipObjectNullCheck()) {
4138     __ Cmp(object, 0);
4139     __ B(eq, slow_path->GetEntryLabel());
4140   }
4141 
4142   if (!optimizations.GetUseKnownBootImageVarHandle()) {
4143     // Use the first temporary register, whether it's for the declaring class or the offset.
4144     // It is not used yet at this point.
4145     vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
4146 
4147     // Check that the VarHandle references an instance field by checking that
4148     // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
4149     // type compatibility check with the source object's type, which will fail for null.
4150     {
4151       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4152       vixl32::Register temp2 = temps.Acquire();
4153       DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4154       __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4155       assembler->MaybeUnpoisonHeapReference(temp);
4156       // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4157       __ Cmp(temp2, 0);
4158       __ B(ne, slow_path->GetEntryLabel());
4159     }
4160 
4161     // Check that the object has the correct type.
4162     // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
4163     GenerateSubTypeObjectCheckNoReadBarrier(
4164         codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
4165   }
4166 }
4167 
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)4168 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
4169                                          CodeGeneratorARMVIXL* codegen,
4170                                          VarHandleSlowPathARMVIXL* slow_path) {
4171   VarHandleOptimizations optimizations(invoke);
4172   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4173   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4174   vixl32::Register object = InputRegisterAt(invoke, 1);
4175   vixl32::Register index = InputRegisterAt(invoke, 2);
4176   DataType::Type value_type =
4177       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4178   Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
4179 
4180   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
4181   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
4182   const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
4183   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
4184   const MemberOffset class_offset = mirror::Object::ClassOffset();
4185   const MemberOffset array_length_offset = mirror::Array::LengthOffset();
4186 
4187   // Null-check the object.
4188   if (!optimizations.GetSkipObjectNullCheck()) {
4189     __ Cmp(object, 0);
4190     __ B(eq, slow_path->GetEntryLabel());
4191   }
4192 
4193   // Use the offset temporary register. It is not used yet at this point.
4194   vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
4195 
4196   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4197   vixl32::Register temp2 = temps.Acquire();
4198 
4199   // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
4200   // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
4201   // coordinateType0 shall not be null but we do not explicitly verify that.
4202   DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
4203   __ Ldrd(temp, temp2, MemOperand(varhandle, coordinate_type0_offset.Int32Value()));
4204   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
4205   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
4206   __ Cmp(temp2, 0);
4207   __ B(eq, slow_path->GetEntryLabel());
4208 
4209   // Check object class against componentType0.
4210   //
4211   // This is an exact check and we defer other cases to the runtime. This includes
4212   // conversion to array of superclass references, which is valid but subsequently
4213   // requires all update operations to check that the value can indeed be stored.
4214   // We do not want to perform such extra checks in the intrinsified code.
4215   //
4216   // We do this check without read barrier, so there can be false negatives which we
4217   // defer to the slow path. There shall be no false negatives for array classes in the
4218   // boot image (including Object[] and primitive arrays) because they are non-movable.
4219   __ Ldr(temp2, MemOperand(object, class_offset.Int32Value()));
4220   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4221   __ Cmp(temp, temp2);
4222   __ B(ne, slow_path->GetEntryLabel());
4223 
4224   // Check that the coordinateType0 is an array type. We do not need a read barrier
4225   // for loading constant reference fields (or chains of them) for comparison with null,
4226   // nor for finally loading a constant primitive field (primitive type) below.
4227   __ Ldr(temp2, MemOperand(temp, component_type_offset.Int32Value()));
4228   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
4229   __ Cmp(temp2, 0);
4230   __ B(eq, slow_path->GetEntryLabel());
4231 
4232   // Check that the array component type matches the primitive type.
4233   // With the exception of `kPrimNot`, `kPrimByte` and `kPrimBoolean`,
4234   // we shall check for a byte array view in the slow path.
4235   // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
4236   // so we cannot emit that if we're JITting without boot image.
4237   bool boot_image_available =
4238       codegen->GetCompilerOptions().IsBootImage() ||
4239       !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
4240   bool can_be_view =
4241       ((value_type != DataType::Type::kReference) && (DataType::Size(value_type) != 1u)) &&
4242       boot_image_available;
4243   vixl32::Label* slow_path_label =
4244       can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
4245   __ Ldrh(temp2, MemOperand(temp2, primitive_type_offset.Int32Value()));
4246   __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
4247   __ B(ne, slow_path_label);
4248 
4249   // Check for array index out of bounds.
4250   __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
4251   __ Cmp(index, temp);
4252   __ B(hs, slow_path->GetEntryLabel());
4253 }
4254 
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,VarHandleSlowPathARMVIXL * slow_path)4255 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4256                                               CodeGeneratorARMVIXL* codegen,
4257                                               VarHandleSlowPathARMVIXL* slow_path) {
4258   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4259   if (expected_coordinates_count == 0u) {
4260     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4261   } else if (expected_coordinates_count == 1u) {
4262     GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4263   } else {
4264     DCHECK_EQ(expected_coordinates_count, 2u);
4265     GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4266   }
4267 }
4268 
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,DataType::Type type)4269 static VarHandleSlowPathARMVIXL* GenerateVarHandleChecks(HInvoke* invoke,
4270                                                          CodeGeneratorARMVIXL* codegen,
4271                                                          std::memory_order order,
4272                                                          DataType::Type type) {
4273   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4274   VarHandleOptimizations optimizations(invoke);
4275   if (optimizations.GetUseKnownBootImageVarHandle()) {
4276     DCHECK_NE(expected_coordinates_count, 2u);
4277     if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
4278       return nullptr;
4279     }
4280   }
4281 
4282   VarHandleSlowPathARMVIXL* slow_path =
4283       new (codegen->GetScopedAllocator()) VarHandleSlowPathARMVIXL(invoke, order);
4284   codegen->AddSlowPath(slow_path);
4285 
4286   if (!optimizations.GetUseKnownBootImageVarHandle()) {
4287     GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4288   }
4289   GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4290 
4291   return slow_path;
4292 }
4293 
4294 struct VarHandleTarget {
4295   vixl32::Register object;  // The object holding the value to operate on.
4296   vixl32::Register offset;  // The offset of the value to operate on.
4297 };
4298 
GetVarHandleTarget(HInvoke * invoke)4299 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4300   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4301   LocationSummary* locations = invoke->GetLocations();
4302 
4303   VarHandleTarget target;
4304   // The temporary allocated for loading the offset.
4305   target.offset = RegisterFrom(locations->GetTemp(0u));
4306   // The reference to the object that holds the value to operate on.
4307   target.object = (expected_coordinates_count == 0u)
4308       ? RegisterFrom(locations->GetTemp(1u))
4309       : InputRegisterAt(invoke, 1);
4310   return target;
4311 }
4312 
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorARMVIXL * codegen)4313 static void GenerateVarHandleTarget(HInvoke* invoke,
4314                                     const VarHandleTarget& target,
4315                                     CodeGeneratorARMVIXL* codegen) {
4316   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4317   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
4318   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4319 
4320   if (expected_coordinates_count <= 1u) {
4321     if (VarHandleOptimizations(invoke).GetUseKnownBootImageVarHandle()) {
4322       ScopedObjectAccess soa(Thread::Current());
4323       ArtField* target_field = GetBootImageVarHandleField(invoke);
4324       if (expected_coordinates_count == 0u) {
4325         ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
4326         if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
4327           uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class);
4328           codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset);
4329         } else {
4330           codegen->LoadTypeForBootImageIntrinsic(
4331               target.object,
4332               TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex()));
4333         }
4334       }
4335       __ Mov(target.offset, target_field->GetOffset().Uint32Value());
4336     } else {
4337       // For static fields, we need to fill the `target.object` with the declaring class,
4338       // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
4339       // we do not need the declaring class, so we can forget the `ArtMethod*` when
4340       // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
4341       vixl32::Register method = (expected_coordinates_count == 0) ? target.object : target.offset;
4342 
4343       const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4344       const MemberOffset offset_offset = ArtField::OffsetOffset();
4345 
4346       // Load the ArtField, the offset and, if needed, declaring class.
4347       __ Ldr(method, MemOperand(varhandle, art_field_offset.Int32Value()));
4348       __ Ldr(target.offset, MemOperand(method, offset_offset.Int32Value()));
4349       if (expected_coordinates_count == 0u) {
4350         codegen->GenerateGcRootFieldLoad(invoke,
4351                                          LocationFrom(target.object),
4352                                          method,
4353                                          ArtField::DeclaringClassOffset().Int32Value(),
4354                                          gCompilerReadBarrierOption);
4355       }
4356     }
4357   } else {
4358     DCHECK_EQ(expected_coordinates_count, 2u);
4359     DataType::Type value_type =
4360         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4361     uint32_t size_shift = DataType::SizeShift(value_type);
4362     MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4363 
4364     vixl32::Register index = InputRegisterAt(invoke, 2);
4365     vixl32::Register shifted_index = index;
4366     if (size_shift != 0u) {
4367       shifted_index = target.offset;
4368       __ Lsl(shifted_index, index, size_shift);
4369     }
4370     __ Add(target.offset, shifted_index, data_offset.Int32Value());
4371   }
4372 }
4373 
CreateVarHandleCommonLocations(HInvoke * invoke)4374 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
4375   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4376   DataType::Type return_type = invoke->GetType();
4377 
4378   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4379   LocationSummary* locations =
4380       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4381   locations->SetInAt(0, Location::RequiresRegister());
4382   // Require coordinates in registers. These are the object holding the value
4383   // to operate on (except for static fields) and index (for arrays and views).
4384   for (size_t i = 0; i != expected_coordinates_count; ++i) {
4385     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4386   }
4387   if (return_type != DataType::Type::kVoid) {
4388     if (DataType::IsFloatingPointType(return_type)) {
4389       locations->SetOut(Location::RequiresFpuRegister());
4390     } else {
4391       locations->SetOut(Location::RequiresRegister());
4392     }
4393   }
4394   uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4395   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4396   for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4397     HInstruction* arg = invoke->InputAt(arg_index);
4398     if (DataType::IsFloatingPointType(arg->GetType())) {
4399       locations->SetInAt(arg_index, Location::RequiresFpuRegister());
4400     } else {
4401       locations->SetInAt(arg_index, Location::RequiresRegister());
4402     }
4403   }
4404 
4405   // Add a temporary for offset.
4406   if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
4407       GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
4408     // To preserve the offset value across the non-Baker read barrier slow path
4409     // for loading the declaring class, use a fixed callee-save register.
4410     constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
4411     locations->AddTemp(Location::RegisterLocation(first_callee_save));
4412   } else {
4413     locations->AddTemp(Location::RequiresRegister());
4414   }
4415   if (expected_coordinates_count == 0u) {
4416     // Add a temporary to hold the declaring class.
4417     locations->AddTemp(Location::RequiresRegister());
4418   }
4419 
4420   return locations;
4421 }
4422 
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4423 static void CreateVarHandleGetLocations(HInvoke* invoke,
4424                                         CodeGeneratorARMVIXL* codegen,
4425                                         bool atomic) {
4426   VarHandleOptimizations optimizations(invoke);
4427   if (optimizations.GetDoNotIntrinsify()) {
4428     return;
4429   }
4430 
4431   if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
4432       invoke->GetType() == DataType::Type::kReference &&
4433       invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
4434       invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
4435     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4436     // the passed reference and reloads it from the field. This gets the memory visibility
4437     // wrong for Acquire/Volatile operations. b/173104084
4438     return;
4439   }
4440 
4441   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4442 
4443   DataType::Type type = invoke->GetType();
4444   if (type == DataType::Type::kFloat64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
4445     // We need 3 temporaries for GenerateIntrinsicGet() but we can reuse the
4446     // declaring class (if present) and offset temporary.
4447     DCHECK_EQ(locations->GetTempCount(),
4448               (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4449     locations->AddRegisterTemps(3u - locations->GetTempCount());
4450   }
4451 }
4452 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4453 static void GenerateVarHandleGet(HInvoke* invoke,
4454                                  CodeGeneratorARMVIXL* codegen,
4455                                  std::memory_order order,
4456                                  bool atomic,
4457                                  bool byte_swap = false) {
4458   DataType::Type type = invoke->GetType();
4459   DCHECK_NE(type, DataType::Type::kVoid);
4460 
4461   LocationSummary* locations = invoke->GetLocations();
4462   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4463   Location out = locations->Out();
4464 
4465   VarHandleTarget target = GetVarHandleTarget(invoke);
4466   VarHandleSlowPathARMVIXL* slow_path = nullptr;
4467   if (!byte_swap) {
4468     slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
4469     GenerateVarHandleTarget(invoke, target, codegen);
4470     if (slow_path != nullptr) {
4471       slow_path->SetAtomic(atomic);
4472       __ Bind(slow_path->GetNativeByteOrderLabel());
4473     }
4474   }
4475 
4476   Location maybe_temp = Location::NoLocation();
4477   Location maybe_temp2 = Location::NoLocation();
4478   Location maybe_temp3 = Location::NoLocation();
4479   if (gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) {
4480     // Reuse the offset temporary.
4481     maybe_temp = LocationFrom(target.offset);
4482   } else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4483     // Reuse the offset temporary and declaring class (if present).
4484     // The address shall be constructed in the scratch register before they are clobbered.
4485     maybe_temp = LocationFrom(target.offset);
4486     DCHECK(maybe_temp.Equals(locations->GetTemp(0)));
4487     if (type == DataType::Type::kFloat64) {
4488       maybe_temp2 = locations->GetTemp(1);
4489       maybe_temp3 = locations->GetTemp(2);
4490     }
4491   }
4492 
4493   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4494   Location loaded_value = out;
4495   DataType::Type load_type = type;
4496   if (byte_swap) {
4497     if (type == DataType::Type::kFloat64) {
4498       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4499         // Change load type to Int64 and promote `maybe_temp2` and `maybe_temp3` to `loaded_value`.
4500         loaded_value = LocationFrom(RegisterFrom(maybe_temp2), RegisterFrom(maybe_temp3));
4501         maybe_temp2 = Location::NoLocation();
4502         maybe_temp3 = Location::NoLocation();
4503       } else {
4504         // Use the offset temporary and the scratch register.
4505         loaded_value = LocationFrom(target.offset, temps.Acquire());
4506       }
4507       load_type = DataType::Type::kInt64;
4508     } else if (type == DataType::Type::kFloat32) {
4509       // Reuse the offset temporary.
4510       loaded_value = LocationFrom(target.offset);
4511       load_type = DataType::Type::kInt32;
4512     } else if (type == DataType::Type::kInt64) {
4513       // Swap the high and low registers and reverse the bytes in each after the load.
4514       loaded_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
4515     }
4516   }
4517 
4518   GenerateIntrinsicGet(invoke,
4519                        codegen,
4520                        load_type,
4521                        order,
4522                        atomic,
4523                        target.object,
4524                        target.offset,
4525                        loaded_value,
4526                        maybe_temp,
4527                        maybe_temp2,
4528                        maybe_temp3);
4529   if (byte_swap) {
4530     if (type == DataType::Type::kInt64) {
4531       GenerateReverseBytesInPlaceForEachWord(assembler, loaded_value);
4532     } else {
4533       GenerateReverseBytes(assembler, type, loaded_value, out);
4534     }
4535   }
4536 
4537   if (slow_path != nullptr) {
4538     DCHECK(!byte_swap);
4539     __ Bind(slow_path->GetExitLabel());
4540   }
4541 }
4542 
VisitVarHandleGet(HInvoke * invoke)4543 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4544   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ false);
4545 }
4546 
VisitVarHandleGet(HInvoke * invoke)4547 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGet(HInvoke* invoke) {
4548   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4549 }
4550 
VisitVarHandleGetOpaque(HInvoke * invoke)4551 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4552   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4553 }
4554 
VisitVarHandleGetOpaque(HInvoke * invoke)4555 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetOpaque(HInvoke* invoke) {
4556   GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
4557 }
4558 
VisitVarHandleGetAcquire(HInvoke * invoke)4559 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4560   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4561 }
4562 
VisitVarHandleGetAcquire(HInvoke * invoke)4563 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAcquire(HInvoke* invoke) {
4564   GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire, /*atomic=*/ true);
4565 }
4566 
VisitVarHandleGetVolatile(HInvoke * invoke)4567 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4568   CreateVarHandleGetLocations(invoke, codegen_, /*atomic=*/ true);
4569 }
4570 
VisitVarHandleGetVolatile(HInvoke * invoke)4571 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetVolatile(HInvoke* invoke) {
4572   GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
4573 }
4574 
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,bool atomic)4575 static void CreateVarHandleSetLocations(HInvoke* invoke,
4576                                         CodeGeneratorARMVIXL* codegen,
4577                                         bool atomic) {
4578   VarHandleOptimizations optimizations(invoke);
4579   if (optimizations.GetDoNotIntrinsify()) {
4580     return;
4581   }
4582 
4583   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4584 
4585   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4586   DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4587   if (DataType::Is64BitType(value_type)) {
4588     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4589     DCHECK_EQ(locations->GetTempCount(), (expected_coordinates_count == 0) ? 2u : 1u);
4590     HInstruction* arg = invoke->InputAt(number_of_arguments - 1u);
4591     bool has_reverse_bytes_slow_path =
4592         (expected_coordinates_count == 2u) &&
4593         !IsZeroBitPattern(arg);
4594     if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4595       // We need 4 temporaries in the byte array view slow path. Otherwise, we need
4596       // 2 or 3 temporaries for GenerateIntrinsicSet() depending on the value type.
4597       // We can reuse the offset temporary and declaring class (if present).
4598       size_t temps_needed = has_reverse_bytes_slow_path
4599           ? 4u
4600           : ((value_type == DataType::Type::kFloat64) ? 3u : 2u);
4601       locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4602     } else if (has_reverse_bytes_slow_path) {
4603       // We need 2 temps for the value with reversed bytes in the byte array view slow path.
4604       // We can reuse the offset temporary.
4605       DCHECK_EQ(locations->GetTempCount(), 1u);
4606       locations->AddTemp(Location::RequiresRegister());
4607     }
4608   }
4609 }
4610 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool atomic,bool byte_swap=false)4611 static void GenerateVarHandleSet(HInvoke* invoke,
4612                                  CodeGeneratorARMVIXL* codegen,
4613                                  std::memory_order order,
4614                                  bool atomic,
4615                                  bool byte_swap = false) {
4616   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4617   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4618 
4619   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4620   LocationSummary* locations = invoke->GetLocations();
4621   Location value = locations->InAt(value_index);
4622 
4623   VarHandleTarget target = GetVarHandleTarget(invoke);
4624   VarHandleSlowPathARMVIXL* slow_path = nullptr;
4625   if (!byte_swap) {
4626     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4627     GenerateVarHandleTarget(invoke, target, codegen);
4628     if (slow_path != nullptr) {
4629       slow_path->SetAtomic(atomic);
4630       __ Bind(slow_path->GetNativeByteOrderLabel());
4631     }
4632   }
4633 
4634   Location maybe_temp = Location::NoLocation();
4635   Location maybe_temp2 = Location::NoLocation();
4636   Location maybe_temp3 = Location::NoLocation();
4637   if (DataType::Is64BitType(value_type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
4638     // Reuse the offset temporary and declaring class (if present).
4639     // The address shall be constructed in the scratch register before they are clobbered.
4640     maybe_temp = locations->GetTemp(0);
4641     maybe_temp2 = locations->GetTemp(1);
4642     if (value_type == DataType::Type::kFloat64) {
4643       maybe_temp3 = locations->GetTemp(2);
4644     }
4645   }
4646 
4647   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4648   if (byte_swap) {
4649     if (DataType::Is64BitType(value_type) || value_type == DataType::Type::kFloat32) {
4650       // Calculate the address in scratch register, so that we can use the offset temporary.
4651       vixl32::Register base = temps.Acquire();
4652       __ Add(base, target.object, target.offset);
4653       target.object = base;
4654       target.offset = vixl32::Register();
4655     }
4656     Location original_value = value;
4657     if (DataType::Is64BitType(value_type)) {
4658       size_t temp_start = 0u;
4659       if (Use64BitExclusiveLoadStore(atomic, codegen)) {
4660         // Clear `maybe_temp3` which was initialized above for Float64.
4661         DCHECK_IMPLIES(value_type == DataType::Type::kFloat64,
4662                        maybe_temp3.Equals(locations->GetTemp(2)));
4663         maybe_temp3 = Location::NoLocation();
4664         temp_start = 2u;
4665       }
4666       value = LocationFrom(RegisterFrom(locations->GetTemp(temp_start)),
4667                            RegisterFrom(locations->GetTemp(temp_start + 1u)));
4668       if (value_type == DataType::Type::kFloat64) {
4669         __ Vmov(HighRegisterFrom(value), LowRegisterFrom(value), DRegisterFrom(original_value));
4670         GenerateReverseBytesInPlaceForEachWord(assembler, value);
4671         value_type = DataType::Type::kInt64;
4672       } else {
4673         GenerateReverseBytes(assembler, value_type, original_value, value);
4674       }
4675     } else if (value_type == DataType::Type::kFloat32) {
4676       value = locations->GetTemp(0);  // Use the offset temporary which was freed above.
4677       __ Vmov(RegisterFrom(value), SRegisterFrom(original_value));
4678       GenerateReverseBytes(assembler, DataType::Type::kInt32, value, value);
4679       value_type = DataType::Type::kInt32;
4680     } else {
4681       value = LocationFrom(temps.Acquire());
4682       GenerateReverseBytes(assembler, value_type, original_value, value);
4683     }
4684   }
4685 
4686   GenerateIntrinsicSet(codegen,
4687                        value_type,
4688                        order,
4689                        atomic,
4690                        target.object,
4691                        target.offset,
4692                        value,
4693                        maybe_temp,
4694                        maybe_temp2,
4695                        maybe_temp3);
4696 
4697   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
4698     // Reuse the offset temporary for MarkGCCard.
4699     vixl32::Register temp = target.offset;
4700     vixl32::Register card = temps.Acquire();
4701     vixl32::Register value_reg = RegisterFrom(value);
4702     codegen->MarkGCCard(temp, card, target.object, value_reg, /* emit_null_check= */ true);
4703   }
4704 
4705   if (slow_path != nullptr) {
4706     DCHECK(!byte_swap);
4707     __ Bind(slow_path->GetExitLabel());
4708   }
4709 }
4710 
VisitVarHandleSet(HInvoke * invoke)4711 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4712   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ false);
4713 }
4714 
VisitVarHandleSet(HInvoke * invoke)4715 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSet(HInvoke* invoke) {
4716   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ false);
4717 }
4718 
VisitVarHandleSetOpaque(HInvoke * invoke)4719 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
4720   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4721 }
4722 
VisitVarHandleSetOpaque(HInvoke * invoke)4723 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetOpaque(HInvoke* invoke) {
4724   GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed, /*atomic=*/ true);
4725 }
4726 
VisitVarHandleSetRelease(HInvoke * invoke)4727 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
4728   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4729 }
4730 
VisitVarHandleSetRelease(HInvoke * invoke)4731 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetRelease(HInvoke* invoke) {
4732   GenerateVarHandleSet(invoke, codegen_, std::memory_order_release, /*atomic=*/ true);
4733 }
4734 
VisitVarHandleSetVolatile(HInvoke * invoke)4735 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
4736   CreateVarHandleSetLocations(invoke, codegen_, /*atomic=*/ true);
4737 }
4738 
VisitVarHandleSetVolatile(HInvoke * invoke)4739 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
4740   // ARM store-release instructions are implicitly sequentially consistent.
4741   GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
4742 }
4743 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,bool return_success)4744 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, bool return_success) {
4745   VarHandleOptimizations optimizations(invoke);
4746   if (optimizations.GetDoNotIntrinsify()) {
4747     return;
4748   }
4749 
4750   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4751   DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
4752   if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
4753       value_type == DataType::Type::kReference) {
4754     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
4755     // the passed reference and reloads it from the field. This breaks the read barriers
4756     // in slow path in different ways. The marked old value may not actually be a to-space
4757     // reference to the same object as `old_value`, breaking slow path assumptions. And
4758     // for CompareAndExchange, marking the old value after comparison failure may actually
4759     // return the reference to `expected`, erroneously indicating success even though we
4760     // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
4761     return;
4762   }
4763 
4764   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4765 
4766   if (gUseReadBarrier && !kUseBakerReadBarrier) {
4767     // We need callee-save registers for both the class object and offset instead of
4768     // the temporaries reserved in CreateVarHandleCommonLocations().
4769     static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u);
4770     constexpr int first_callee_save = CTZ(kArmCalleeSaveRefSpills);
4771     constexpr int second_callee_save = CTZ(kArmCalleeSaveRefSpills ^ (1u << first_callee_save));
4772     if (GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {  // For static fields.
4773       DCHECK_EQ(locations->GetTempCount(), 2u);
4774       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4775       DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
4776       locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
4777     } else {
4778       DCHECK_EQ(locations->GetTempCount(), 1u);
4779       DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
4780       locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
4781     }
4782   }
4783 
4784   if (DataType::IsFloatingPointType(value_type)) {
4785     // We can reuse the declaring class (if present) and offset temporary.
4786     DCHECK_EQ(locations->GetTempCount(),
4787               (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
4788     size_t temps_needed = (value_type == DataType::Type::kFloat64)
4789         ? (return_success ? 5u : 7u)
4790         : (return_success ? 3u : 4u);
4791     locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
4792   } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
4793     // Add temps for the byte-reversed `expected` and `new_value` in the byte array view slow path.
4794     DCHECK_EQ(locations->GetTempCount(), 1u);
4795     if (value_type == DataType::Type::kInt64) {
4796       // We would ideally add 4 temps for Int64 but that would simply run out of registers,
4797       // so we instead need to reverse bytes in actual arguments and undo it at the end.
4798     } else {
4799       locations->AddRegisterTemps(2u);
4800     }
4801   }
4802   if (gUseReadBarrier && value_type == DataType::Type::kReference) {
4803     // Add a temporary for store result, also used for the `old_value_temp` in slow path.
4804     locations->AddTemp(Location::RequiresRegister());
4805   }
4806 }
4807 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,std::memory_order order,bool return_success,bool strong,bool byte_swap=false)4808 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4809                                                      CodeGeneratorARMVIXL* codegen,
4810                                                      std::memory_order order,
4811                                                      bool return_success,
4812                                                      bool strong,
4813                                                      bool byte_swap = false) {
4814   DCHECK(return_success || strong);
4815 
4816   uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
4817   uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
4818   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4819   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
4820 
4821   ArmVIXLAssembler* assembler = codegen->GetAssembler();
4822   LocationSummary* locations = invoke->GetLocations();
4823   Location expected = locations->InAt(expected_index);
4824   Location new_value = locations->InAt(new_value_index);
4825   Location out = locations->Out();
4826 
4827   VarHandleTarget target = GetVarHandleTarget(invoke);
4828   VarHandleSlowPathARMVIXL* slow_path = nullptr;
4829   if (!byte_swap) {
4830     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
4831     GenerateVarHandleTarget(invoke, target, codegen);
4832     if (slow_path != nullptr) {
4833       slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
4834       __ Bind(slow_path->GetNativeByteOrderLabel());
4835     }
4836   }
4837 
4838   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
4839   bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
4840   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
4841   DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
4842 
4843   if (release_barrier) {
4844     codegen->GenerateMemoryBarrier(
4845         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
4846   }
4847 
4848   // Calculate the pointer to the value.
4849   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
4850   vixl32::Register tmp_ptr = temps.Acquire();
4851   __ Add(tmp_ptr, target.object, target.offset);
4852 
4853   // Move floating point values to temporaries and prepare output registers.
4854   // Note that float/double CAS uses bitwise comparison, rather than the operator==.
4855   // Reuse the declaring class (if present) and offset temporary for non-reference types,
4856   // the address has already been constructed in the scratch register. We are more careful
4857   // for references due to read and write barrier, see below.
4858   Location old_value;
4859   vixl32::Register store_result;
4860   vixl32::Register success = return_success ? RegisterFrom(out) : vixl32::Register();
4861   DataType::Type cas_type = value_type;
4862   if (value_type == DataType::Type::kFloat64) {
4863     vixl32::DRegister expected_vreg = DRegisterFrom(expected);
4864     vixl32::DRegister new_value_vreg = DRegisterFrom(new_value);
4865     expected =
4866         LocationFrom(RegisterFrom(locations->GetTemp(0)), RegisterFrom(locations->GetTemp(1)));
4867     new_value =
4868         LocationFrom(RegisterFrom(locations->GetTemp(2)), RegisterFrom(locations->GetTemp(3)));
4869     store_result = RegisterFrom(locations->GetTemp(4));
4870     old_value = return_success
4871         ? LocationFrom(success, store_result)
4872         : LocationFrom(RegisterFrom(locations->GetTemp(5)), RegisterFrom(locations->GetTemp(6)));
4873     if (byte_swap) {
4874       __ Vmov(HighRegisterFrom(expected), LowRegisterFrom(expected), expected_vreg);
4875       __ Vmov(HighRegisterFrom(new_value), LowRegisterFrom(new_value), new_value_vreg);
4876       GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4877       GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4878     } else {
4879       __ Vmov(LowRegisterFrom(expected), HighRegisterFrom(expected), expected_vreg);
4880       __ Vmov(LowRegisterFrom(new_value), HighRegisterFrom(new_value), new_value_vreg);
4881     }
4882     cas_type = DataType::Type::kInt64;
4883   } else if (value_type == DataType::Type::kFloat32) {
4884     vixl32::SRegister expected_vreg = SRegisterFrom(expected);
4885     vixl32::SRegister new_value_vreg = SRegisterFrom(new_value);
4886     expected = locations->GetTemp(0);
4887     new_value = locations->GetTemp(1);
4888     store_result = RegisterFrom(locations->GetTemp(2));
4889     old_value = return_success ? LocationFrom(store_result) : locations->GetTemp(3);
4890     __ Vmov(RegisterFrom(expected), expected_vreg);
4891     __ Vmov(RegisterFrom(new_value), new_value_vreg);
4892     if (byte_swap) {
4893       GenerateReverseBytes(assembler, DataType::Type::kInt32, expected, expected);
4894       GenerateReverseBytes(assembler, DataType::Type::kInt32, new_value, new_value);
4895     }
4896     cas_type = DataType::Type::kInt32;
4897   } else if (value_type == DataType::Type::kInt64) {
4898     store_result = RegisterFrom(locations->GetTemp(0));
4899     old_value = return_success
4900         ? LocationFrom(success, store_result)
4901         // If swapping bytes, swap the high/low regs and reverse the bytes in each after the load.
4902         : byte_swap ? LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out)) : out;
4903     if (byte_swap) {
4904       // Due to lack of registers, reverse bytes in `expected` and `new_value` and undo that later.
4905       GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4906       expected = LocationFrom(HighRegisterFrom(expected), LowRegisterFrom(expected));
4907       GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4908       new_value = LocationFrom(HighRegisterFrom(new_value), LowRegisterFrom(new_value));
4909     }
4910   } else {
4911     // Use the last temp. For references with read barriers, this is an extra temporary
4912     // allocated to avoid overwriting the temporaries for declaring class (if present)
4913     // and offset as they are needed in the slow path. Otherwise, this is the offset
4914     // temporary which also works for references without read barriers that need the
4915     // object register preserved for the write barrier.
4916     store_result = RegisterFrom(locations->GetTemp(locations->GetTempCount() - 1u));
4917     old_value = return_success ? LocationFrom(store_result) : out;
4918     if (byte_swap) {
4919       DCHECK_EQ(locations->GetTempCount(), 3u);
4920       Location original_expected = expected;
4921       Location original_new_value = new_value;
4922       expected = locations->GetTemp(0);
4923       new_value = locations->GetTemp(1);
4924       GenerateReverseBytes(assembler, value_type, original_expected, expected);
4925       GenerateReverseBytes(assembler, value_type, original_new_value, new_value);
4926     }
4927   }
4928 
4929   vixl32::Label exit_loop_label;
4930   vixl32::Label* exit_loop = &exit_loop_label;
4931   vixl32::Label* cmp_failure = &exit_loop_label;
4932 
4933   if (gUseReadBarrier && value_type == DataType::Type::kReference) {
4934     // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
4935     // reloaded old value for subsequent CAS in the slow path. This must not clobber `old_value`.
4936     vixl32::Register old_value_temp = return_success ? RegisterFrom(out) : store_result;
4937     // The slow path store result must not clobber `old_value`.
4938     vixl32::Register slow_path_store_result = old_value_temp;
4939     ReadBarrierCasSlowPathARMVIXL* rb_slow_path =
4940         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
4941             invoke,
4942             strong,
4943             target.object,
4944             target.offset,
4945             RegisterFrom(expected),
4946             RegisterFrom(new_value),
4947             RegisterFrom(old_value),
4948             old_value_temp,
4949             slow_path_store_result,
4950             success,
4951             codegen);
4952     codegen->AddSlowPath(rb_slow_path);
4953     exit_loop = rb_slow_path->GetExitLabel();
4954     cmp_failure = rb_slow_path->GetEntryLabel();
4955   }
4956 
4957   GenerateCompareAndSet(codegen,
4958                         cas_type,
4959                         strong,
4960                         cmp_failure,
4961                         /*cmp_failure_is_far_target=*/ cmp_failure != &exit_loop_label,
4962                         tmp_ptr,
4963                         expected,
4964                         new_value,
4965                         old_value,
4966                         store_result,
4967                         success);
4968   __ Bind(exit_loop);
4969 
4970   if (acquire_barrier) {
4971     codegen->GenerateMemoryBarrier(
4972         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
4973   }
4974 
4975   if (byte_swap && value_type == DataType::Type::kInt64) {
4976     // Undo byte swapping in `expected` and `new_value`. We do not have the
4977     // information whether the value in these registers shall be needed later.
4978     GenerateReverseBytesInPlaceForEachWord(assembler, expected);
4979     GenerateReverseBytesInPlaceForEachWord(assembler, new_value);
4980   }
4981   if (!return_success) {
4982     if (byte_swap) {
4983       if (value_type == DataType::Type::kInt64) {
4984         GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
4985       } else {
4986         GenerateReverseBytes(assembler, value_type, old_value, out);
4987       }
4988     } else if (value_type == DataType::Type::kFloat64) {
4989       __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
4990     } else if (value_type == DataType::Type::kFloat32) {
4991       __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
4992     }
4993   }
4994 
4995   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
4996     // Reuse the offset temporary and scratch register for MarkGCCard.
4997     vixl32::Register temp = target.offset;
4998     vixl32::Register card = tmp_ptr;
4999     // Mark card for object assuming new value is stored.
5000     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
5001     codegen->MarkGCCard(temp, card, target.object, RegisterFrom(new_value), new_value_can_be_null);
5002   }
5003 
5004   if (slow_path != nullptr) {
5005     DCHECK(!byte_swap);
5006     __ Bind(slow_path->GetExitLabel());
5007   }
5008 }
5009 
VisitVarHandleCompareAndExchange(HInvoke * invoke)5010 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5011   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
5012 }
5013 
VisitVarHandleCompareAndExchange(HInvoke * invoke)5014 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
5015   GenerateVarHandleCompareAndSetOrExchange(
5016       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
5017 }
5018 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5019 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5020   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
5021 }
5022 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)5023 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
5024   GenerateVarHandleCompareAndSetOrExchange(
5025       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
5026 }
5027 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5028 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5029   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
5030 }
5031 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)5032 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
5033   GenerateVarHandleCompareAndSetOrExchange(
5034       invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
5035 }
5036 
VisitVarHandleCompareAndSet(HInvoke * invoke)5037 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5038   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5039 }
5040 
VisitVarHandleCompareAndSet(HInvoke * invoke)5041 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
5042   GenerateVarHandleCompareAndSetOrExchange(
5043       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
5044 }
5045 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5046 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5047   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5048 }
5049 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)5050 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
5051   GenerateVarHandleCompareAndSetOrExchange(
5052       invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
5053 }
5054 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5055 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5056   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5057 }
5058 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)5059 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
5060   GenerateVarHandleCompareAndSetOrExchange(
5061       invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
5062 }
5063 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5064 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5065   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5066 }
5067 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)5068 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
5069   GenerateVarHandleCompareAndSetOrExchange(
5070       invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
5071 }
5072 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5073 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5074   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
5075 }
5076 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)5077 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
5078   GenerateVarHandleCompareAndSetOrExchange(
5079       invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
5080 }
5081 
CreateVarHandleGetAndUpdateLocations(HInvoke * invoke,GetAndUpdateOp get_and_update_op)5082 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
5083                                                  GetAndUpdateOp get_and_update_op) {
5084   VarHandleOptimizations optimizations(invoke);
5085   if (optimizations.GetDoNotIntrinsify()) {
5086     return;
5087   }
5088 
5089   if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
5090       invoke->GetType() == DataType::Type::kReference) {
5091     // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
5092     // the passed reference and reloads it from the field, thus seeing the new value
5093     // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
5094     return;
5095   }
5096 
5097   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
5098 
5099   // We can reuse the declaring class (if present) and offset temporary, except for
5100   // non-Baker read barriers that need them for the slow path.
5101   DCHECK_EQ(locations->GetTempCount(),
5102             (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
5103 
5104   DataType::Type value_type = invoke->GetType();
5105   if (get_and_update_op == GetAndUpdateOp::kSet) {
5106     if (DataType::IsFloatingPointType(value_type)) {
5107       // Add temps needed to do the GenerateGetAndUpdate() with core registers.
5108       size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u;
5109       locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
5110     } else if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
5111                value_type == DataType::Type::kReference) {
5112       // We need to preserve the declaring class (if present) and offset for read barrier
5113       // slow paths, so we must use a separate temporary for the exclusive store result.
5114       locations->AddTemp(Location::RequiresRegister());
5115     } else if (GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5116       // Add temps for the byte-reversed `arg` in the byte array view slow path.
5117       DCHECK_EQ(locations->GetTempCount(), 1u);
5118       locations->AddRegisterTemps((value_type == DataType::Type::kInt64) ? 2u : 1u);
5119     }
5120   } else {
5121     // We need temporaries for the new value and exclusive store result.
5122     size_t temps_needed = DataType::Is64BitType(value_type) ? 3u : 2u;
5123     if (get_and_update_op != GetAndUpdateOp::kAdd &&
5124         GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
5125       // Add temps for the byte-reversed `arg` in the byte array view slow path.
5126       if (value_type == DataType::Type::kInt64) {
5127         // We would ideally add 2 temps for Int64 but that would simply run out of registers,
5128         // so we instead need to reverse bytes in the actual argument and undo it at the end.
5129       } else {
5130         temps_needed += 1u;
5131       }
5132     }
5133     locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
5134     if (DataType::IsFloatingPointType(value_type)) {
5135       // Note: This shall allocate a D register. There is no way to request an S register.
5136       locations->AddTemp(Location::RequiresFpuRegister());
5137     }
5138   }
5139 }
5140 
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorARMVIXL * codegen,GetAndUpdateOp get_and_update_op,std::memory_order order,bool byte_swap=false)5141 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5142                                           CodeGeneratorARMVIXL* codegen,
5143                                           GetAndUpdateOp get_and_update_op,
5144                                           std::memory_order order,
5145                                           bool byte_swap = false) {
5146   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
5147   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
5148 
5149   ArmVIXLAssembler* assembler = codegen->GetAssembler();
5150   LocationSummary* locations = invoke->GetLocations();
5151   Location arg = locations->InAt(arg_index);
5152   Location out = locations->Out();
5153 
5154   VarHandleTarget target = GetVarHandleTarget(invoke);
5155   VarHandleSlowPathARMVIXL* slow_path = nullptr;
5156   if (!byte_swap) {
5157     slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
5158     GenerateVarHandleTarget(invoke, target, codegen);
5159     if (slow_path != nullptr) {
5160       slow_path->SetGetAndUpdateOp(get_and_update_op);
5161       __ Bind(slow_path->GetNativeByteOrderLabel());
5162     }
5163   }
5164 
5165   bool seq_cst_barrier = (order == std::memory_order_seq_cst);
5166   bool release_barrier = seq_cst_barrier || (order == std::memory_order_release);
5167   bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
5168   DCHECK(release_barrier || acquire_barrier || order == std::memory_order_relaxed);
5169 
5170   if (release_barrier) {
5171     codegen->GenerateMemoryBarrier(
5172         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kAnyStore);
5173   }
5174 
5175   // Use the scratch register for the pointer to the target location.
5176   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5177   vixl32::Register tmp_ptr = temps.Acquire();
5178   __ Add(tmp_ptr, target.object, target.offset);
5179 
5180   // Use the offset temporary for the exclusive store result.
5181   vixl32::Register store_result = target.offset;
5182 
5183   // The load/store type is never floating point.
5184   DataType::Type load_store_type = DataType::IsFloatingPointType(value_type)
5185       ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
5186       : value_type;
5187 
5188   // Prepare register for old value and temporaries if any.
5189   Location old_value = out;
5190   Location maybe_temp = Location::NoLocation();
5191   Location maybe_vreg_temp = Location::NoLocation();
5192   if (get_and_update_op == GetAndUpdateOp::kSet) {
5193     // For floating point GetAndSet, do the GenerateGetAndUpdate() with core registers,
5194     // rather than moving between core and FP registers in the loop.
5195     if (value_type == DataType::Type::kFloat64) {
5196       vixl32::DRegister arg_vreg = DRegisterFrom(arg);
5197       DCHECK_EQ(locations->GetTempCount(), 5u);  // `store_result` and the four here.
5198       old_value =
5199           LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)));
5200       arg = LocationFrom(RegisterFrom(locations->GetTemp(3)), RegisterFrom(locations->GetTemp(4)));
5201       if (byte_swap) {
5202         __ Vmov(HighRegisterFrom(arg), LowRegisterFrom(arg), arg_vreg);
5203         GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5204       } else {
5205         __ Vmov(LowRegisterFrom(arg), HighRegisterFrom(arg), arg_vreg);
5206       }
5207     } else if (value_type == DataType::Type::kFloat32) {
5208       vixl32::SRegister arg_vreg = SRegisterFrom(arg);
5209       DCHECK_EQ(locations->GetTempCount(), 3u);  // `store_result` and the two here.
5210       old_value = locations->GetTemp(1);
5211       arg = locations->GetTemp(2);
5212       __ Vmov(RegisterFrom(arg), arg_vreg);
5213       if (byte_swap) {
5214         GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg);
5215       }
5216     } else if (gUseReadBarrier && value_type == DataType::Type::kReference) {
5217       if (kUseBakerReadBarrier) {
5218         // Load the old value initially to a temporary register.
5219         // We shall move it to `out` later with a read barrier.
5220         old_value = LocationFrom(store_result);
5221         store_result = RegisterFrom(out);  // Use the `out` for the exclusive store result.
5222       } else {
5223         // The store_result is a separate temporary.
5224         DCHECK(!store_result.Is(target.object));
5225         DCHECK(!store_result.Is(target.offset));
5226       }
5227     } else if (byte_swap) {
5228       Location original_arg = arg;
5229       arg = locations->GetTemp(1);
5230       if (value_type == DataType::Type::kInt64) {
5231         arg = LocationFrom(RegisterFrom(arg), RegisterFrom(locations->GetTemp(2)));
5232         // Swap the high/low regs and reverse the bytes in each after the load.
5233         old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
5234       }
5235       GenerateReverseBytes(assembler, value_type, original_arg, arg);
5236     }
5237   } else {
5238     maybe_temp = DataType::Is64BitType(value_type)
5239         ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
5240         : locations->GetTemp(1);
5241     DCHECK(!maybe_temp.Contains(LocationFrom(store_result)));
5242     if (DataType::IsFloatingPointType(value_type)) {
5243       maybe_vreg_temp = locations->GetTemp(locations->GetTempCount() - 1u);
5244       DCHECK(maybe_vreg_temp.IsFpuRegisterPair());
5245     }
5246     if (byte_swap) {
5247       if (get_and_update_op == GetAndUpdateOp::kAdd) {
5248         // We need to do the byte swapping in the CAS loop for GetAndAdd.
5249         get_and_update_op = GetAndUpdateOp::kAddWithByteSwap;
5250       } else if (value_type == DataType::Type::kInt64) {
5251         // Swap the high/low regs and reverse the bytes in each after the load.
5252         old_value = LocationFrom(HighRegisterFrom(out), LowRegisterFrom(out));
5253         // Due to lack of registers, reverse bytes in `arg` and undo that later.
5254         GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5255         arg = LocationFrom(HighRegisterFrom(arg), LowRegisterFrom(arg));
5256       } else {
5257         DCHECK(!DataType::IsFloatingPointType(value_type));
5258         Location original_arg = arg;
5259         arg = locations->GetTemp(2);
5260         DCHECK(!arg.Contains(LocationFrom(store_result)));
5261         GenerateReverseBytes(assembler, value_type, original_arg, arg);
5262       }
5263     }
5264   }
5265 
5266   GenerateGetAndUpdate(codegen,
5267                        get_and_update_op,
5268                        load_store_type,
5269                        tmp_ptr,
5270                        arg,
5271                        old_value,
5272                        store_result,
5273                        maybe_temp,
5274                        maybe_vreg_temp);
5275 
5276   if (acquire_barrier) {
5277     codegen->GenerateMemoryBarrier(
5278         seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
5279   }
5280 
5281   if (byte_swap && get_and_update_op != GetAndUpdateOp::kAddWithByteSwap) {
5282     if (value_type == DataType::Type::kInt64) {
5283       GenerateReverseBytesInPlaceForEachWord(assembler, old_value);
5284       if (get_and_update_op != GetAndUpdateOp::kSet) {
5285         // Undo byte swapping in `arg`. We do not have the information
5286         // whether the value in these registers shall be needed later.
5287         GenerateReverseBytesInPlaceForEachWord(assembler, arg);
5288       }
5289     } else {
5290       GenerateReverseBytes(assembler, value_type, old_value, out);
5291     }
5292   } else if (get_and_update_op == GetAndUpdateOp::kSet &&
5293              DataType::IsFloatingPointType(value_type)) {
5294     if (value_type == DataType::Type::kFloat64) {
5295       __ Vmov(DRegisterFrom(out), LowRegisterFrom(old_value), HighRegisterFrom(old_value));
5296     } else {
5297       __ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
5298     }
5299   } else if (gUseReadBarrier && value_type == DataType::Type::kReference) {
5300     if (kUseBakerReadBarrier) {
5301       codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(RegisterFrom(out),
5302                                                             RegisterFrom(old_value));
5303     } else {
5304       codegen->GenerateReadBarrierSlow(
5305           invoke,
5306           Location::RegisterLocation(RegisterFrom(out).GetCode()),
5307           Location::RegisterLocation(RegisterFrom(old_value).GetCode()),
5308           Location::RegisterLocation(target.object.GetCode()),
5309           /*offset=*/ 0u,
5310           /*index=*/ Location::RegisterLocation(target.offset.GetCode()));
5311     }
5312   }
5313 
5314   if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
5315     // Reuse the offset temporary and scratch register for MarkGCCard.
5316     vixl32::Register temp = target.offset;
5317     vixl32::Register card = tmp_ptr;
5318     // Mark card for object assuming new value is stored.
5319     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
5320     codegen->MarkGCCard(temp, card, target.object, RegisterFrom(arg), new_value_can_be_null);
5321   }
5322 
5323   if (slow_path != nullptr) {
5324     DCHECK(!byte_swap);
5325     __ Bind(slow_path->GetExitLabel());
5326   }
5327 }
5328 
VisitVarHandleGetAndSet(HInvoke * invoke)5329 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5330   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5331 }
5332 
VisitVarHandleGetAndSet(HInvoke * invoke)5333 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
5334   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
5335 }
5336 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5337 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5338   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5339 }
5340 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5341 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5342   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
5343 }
5344 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5345 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5346   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
5347 }
5348 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5349 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5350   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
5351 }
5352 
VisitVarHandleGetAndAdd(HInvoke * invoke)5353 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5354   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5355 }
5356 
VisitVarHandleGetAndAdd(HInvoke * invoke)5357 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5358   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
5359 }
5360 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5361 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5362   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5363 }
5364 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5365 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5366   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
5367 }
5368 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5369 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5370   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
5371 }
5372 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5373 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5374   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
5375 }
5376 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5377 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5378   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5379 }
5380 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5381 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5382   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
5383 }
5384 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5385 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5386   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5387 }
5388 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5389 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5390   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
5391 }
5392 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5393 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5394   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
5395 }
5396 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5397 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5398   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
5399 }
5400 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5401 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5402   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5403 }
5404 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5405 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5406   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
5407 }
5408 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5409 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5410   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5411 }
5412 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5413 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5414   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
5415 }
5416 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5417 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5418   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
5419 }
5420 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5421 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5422   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
5423 }
5424 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5425 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5426   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5427 }
5428 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5429 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5430   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
5431 }
5432 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5433 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5434   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5435 }
5436 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5437 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5438   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
5439 }
5440 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5441 void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5442   CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
5443 }
5444 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5445 void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5446   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
5447 }
5448 
EmitByteArrayViewCode(CodeGenerator * codegen_in)5449 void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
5450   DCHECK(GetByteArrayViewCheckLabel()->IsReferenced());
5451   CodeGeneratorARMVIXL* codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_in);
5452   ArmVIXLAssembler* assembler = codegen->GetAssembler();
5453   HInvoke* invoke = GetInvoke();
5454   mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5455   DataType::Type value_type =
5456       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5457   DCHECK_NE(value_type, DataType::Type::kReference);
5458   size_t size = DataType::Size(value_type);
5459   DCHECK_GT(size, 1u);
5460   vixl32::Operand size_operand(dchecked_integral_cast<int32_t>(size));
5461   vixl32::Register varhandle = InputRegisterAt(invoke, 0);
5462   vixl32::Register object = InputRegisterAt(invoke, 1);
5463   vixl32::Register index = InputRegisterAt(invoke, 2);
5464 
5465   MemberOffset class_offset = mirror::Object::ClassOffset();
5466   MemberOffset array_length_offset = mirror::Array::LengthOffset();
5467   MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5468   MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5469 
5470   __ Bind(GetByteArrayViewCheckLabel());
5471 
5472   VarHandleTarget target = GetVarHandleTarget(invoke);
5473   {
5474     // Use the offset temporary register. It is not used yet at this point.
5475     vixl32::Register temp = RegisterFrom(invoke->GetLocations()->GetTemp(0u));
5476 
5477     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
5478     vixl32::Register temp2 = temps.Acquire();
5479 
5480     // The main path checked that the coordinateType0 is an array class that matches
5481     // the class of the actual coordinate argument but it does not match the value type.
5482     // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5483     __ Ldr(temp, MemOperand(varhandle, class_offset.Int32Value()));
5484     codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
5485     codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5486     __ Cmp(temp, temp2);
5487     __ B(ne, GetEntryLabel());
5488 
5489     // Check for array index out of bounds.
5490     __ Ldr(temp, MemOperand(object, array_length_offset.Int32Value()));
5491     if (!temp.IsLow()) {
5492       // Avoid using the 32-bit `cmp temp, #imm` in IT block by loading `size` into `temp2`.
5493       __ Mov(temp2, size_operand);
5494     }
5495     __ Subs(temp, temp, index);
5496     {
5497       // Use ExactAssemblyScope here because we are using IT.
5498       ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
5499                                   2 * k16BitT32InstructionSizeInBytes);
5500       __ it(hs);
5501       if (temp.IsLow()) {
5502         __ cmp(hs, temp, size_operand);
5503       } else {
5504         __ cmp(hs, temp, temp2);
5505       }
5506     }
5507     __ B(lo, GetEntryLabel());
5508 
5509     // Construct the target.
5510     __ Add(target.offset, index, data_offset.Int32Value());  // Note: `temp` cannot be used below.
5511 
5512     // Alignment check. For unaligned access, go to the runtime.
5513     DCHECK(IsPowerOfTwo(size));
5514     __ Tst(target.offset, dchecked_integral_cast<int32_t>(size - 1u));
5515     __ B(ne, GetEntryLabel());
5516 
5517     // Byte order check. For native byte order return to the main path.
5518     if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) {
5519       HInstruction* arg = invoke->InputAt(invoke->GetNumberOfArguments() - 1u);
5520       if (IsZeroBitPattern(arg)) {
5521         // There is no reason to differentiate between native byte order and byte-swap
5522         // for setting a zero bit pattern. Just return to the main path.
5523         __ B(GetNativeByteOrderLabel());
5524         return;
5525       }
5526     }
5527     __ Ldr(temp2, MemOperand(varhandle, native_byte_order_offset.Int32Value()));
5528     __ Cmp(temp2, 0);
5529     __ B(ne, GetNativeByteOrderLabel());
5530   }
5531 
5532   switch (access_mode_template) {
5533     case mirror::VarHandle::AccessModeTemplate::kGet:
5534       GenerateVarHandleGet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5535       break;
5536     case mirror::VarHandle::AccessModeTemplate::kSet:
5537       GenerateVarHandleSet(invoke, codegen, order_, atomic_, /*byte_swap=*/ true);
5538       break;
5539     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5540     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5541       GenerateVarHandleCompareAndSetOrExchange(
5542           invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
5543       break;
5544     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5545       GenerateVarHandleGetAndUpdate(
5546           invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
5547       break;
5548   }
5549   __ B(GetExitLabel());
5550 }
5551 
5552 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARMVIXL, Name)
5553 UNIMPLEMENTED_INTRINSIC_LIST_ARM(MARK_UNIMPLEMENTED);
5554 #undef MARK_UNIMPLEMENTED
5555 
5556 UNREACHABLE_INTRINSICS(ARMVIXL)
5557 
5558 #undef __
5559 
5560 }  // namespace arm
5561 }  // namespace art
5562