• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86_64.h"
18 
19 #include <limits>
20 
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "arch/x86_64/registers_x86_64.h"
23 #include "art_method.h"
24 #include "base/bit_utils.h"
25 #include "code_generator_x86_64.h"
26 #include "dex/modifiers.h"
27 #include "entrypoints/quick/quick_entrypoints.h"
28 #include "entrypoints/quick/quick_entrypoints_enum.h"
29 #include "heap_poisoning.h"
30 #include "intrinsic_objects.h"
31 #include "intrinsics.h"
32 #include "intrinsics_utils.h"
33 #include "lock_word.h"
34 #include "mirror/array-inl.h"
35 #include "mirror/method_handle_impl.h"
36 #include "mirror/object_array-inl.h"
37 #include "mirror/reference.h"
38 #include "mirror/string.h"
39 #include "optimizing/code_generator.h"
40 #include "optimizing/data_type.h"
41 #include "optimizing/locations.h"
42 #include "scoped_thread_state_change-inl.h"
43 #include "thread-current-inl.h"
44 #include "utils/x86_64/assembler_x86_64.h"
45 #include "utils/x86_64/constants_x86_64.h"
46 #include "well_known_classes.h"
47 
48 namespace art HIDDEN {
49 
50 namespace x86_64 {
51 
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)52 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
53   : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) {
54 }
55 
GetAssembler()56 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
57   return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
58 }
59 
GetAllocator()60 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
61   return codegen_->GetGraph()->GetAllocator();
62 }
63 
TryDispatch(HInvoke * invoke)64 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
65   Dispatch(invoke);
66   LocationSummary* res = invoke->GetLocations();
67   if (res == nullptr) {
68     return false;
69   }
70   return res->Intrinsified();
71 }
72 
73 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
74 
75 #define __ assembler->
76 
GenArrayAddress(X86_64Assembler * assembler,CpuRegister dest,CpuRegister base,Location pos,DataType::Type type,uint32_t data_offset)77 static void GenArrayAddress(X86_64Assembler* assembler,
78                             CpuRegister dest,
79                             CpuRegister base,
80                             Location pos,
81                             DataType::Type type,
82                             uint32_t data_offset) {
83   // Note: The heap is in low 4GiB, so we're using LEAL rather than LEAQ to save on code size.
84   if (pos.IsConstant()) {
85     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
86     __ leal(dest, Address(base, DataType::Size(type) * constant + data_offset));
87   } else {
88     const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
89     __ leal(dest, Address(base, pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
90   }
91 }
92 
93 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
94 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
95  public:
ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction * instruction)96   explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
97       : SlowPathCode(instruction) {
98   }
99 
EmitNativeCode(CodeGenerator * codegen)100   void EmitNativeCode(CodeGenerator* codegen) override {
101     DCHECK(codegen->EmitBakerReadBarrier());
102     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
103     X86_64Assembler* assembler = x86_64_codegen->GetAssembler();
104     LocationSummary* locations = instruction_->GetLocations();
105     DCHECK(locations->CanCall());
106     DCHECK(instruction_->IsInvokeStaticOrDirect())
107         << "Unexpected instruction in read barrier arraycopy slow path: "
108         << instruction_->DebugName();
109     DCHECK(instruction_->GetLocations()->Intrinsified());
110     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
111     Location length = locations->InAt(4);
112 
113     const DataType::Type type = DataType::Type::kReference;
114     const int32_t element_size = DataType::Size(type);
115 
116     CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
117     CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
118     CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
119 
120     __ Bind(GetEntryLabel());
121     // The `src_curr_addr` and `dst_curr_addr` were initialized before entering the slow-path.
122     GenArrayAddress(assembler, src_stop_addr, src_curr_addr, length, type, /*data_offset=*/ 0u);
123 
124     NearLabel loop;
125     __ Bind(&loop);
126     __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
127     __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
128     // TODO: Inline the mark bit check before calling the runtime?
129     // TMP = ReadBarrier::Mark(TMP);
130     // No need to save live registers; it's taken care of by the
131     // entrypoint. Also, there is no need to update the stack mask,
132     // as this runtime call will not trigger a garbage collection.
133     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
134     // This runtime call does not require a stack map.
135     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
136     __ MaybePoisonHeapReference(CpuRegister(TMP));
137     __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
138     __ addl(src_curr_addr, Immediate(element_size));
139     __ addl(dst_curr_addr, Immediate(element_size));
140     __ cmpl(src_curr_addr, src_stop_addr);
141     __ j(kNotEqual, &loop);
142     __ jmp(GetExitLabel());
143   }
144 
GetDescription() const145   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
146 
147  private:
148   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
149 };
150 
151 // The MethodHandle.invokeExact intrinsic sets up arguments to match the target method call. If we
152 // need to go to the slow path, we call art_quick_invoke_polymorphic_with_hidden_receiver, which
153 // expects the MethodHandle object in RDI (in place of the actual ArtMethod).
154 class InvokePolymorphicSlowPathX86_64 : public SlowPathCode {
155  public:
InvokePolymorphicSlowPathX86_64(HInstruction * instruction,CpuRegister method_handle)156   InvokePolymorphicSlowPathX86_64(HInstruction* instruction, CpuRegister method_handle)
157       : SlowPathCode(instruction), method_handle_(method_handle) {
158     DCHECK(instruction->IsInvokePolymorphic());
159   }
160 
EmitNativeCode(CodeGenerator * codegen)161   void EmitNativeCode(CodeGenerator* codegen) override {
162     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
163     X86_64Assembler* assembler = x86_64_codegen->GetAssembler();
164     __ Bind(GetEntryLabel());
165     SaveLiveRegisters(codegen, instruction_->GetLocations());
166 
167     // Passing `MethodHandle` object as hidden argument.
168     __ movl(CpuRegister(RDI), method_handle_);
169     x86_64_codegen->InvokeRuntime(QuickEntrypointEnum::kQuickInvokePolymorphicWithHiddenReceiver,
170                                   instruction_);
171 
172     RestoreLiveRegisters(codegen, instruction_->GetLocations());
173     __ jmp(GetExitLabel());
174   }
175 
GetDescription() const176   const char* GetDescription() const override { return "InvokePolymorphicSlowPathX86_64"; }
177 
178  private:
179   const CpuRegister method_handle_;
180   DISALLOW_COPY_AND_ASSIGN(InvokePolymorphicSlowPathX86_64);
181 };
182 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)183 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
184   LocationSummary* locations =
185       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
186   locations->SetInAt(0, Location::RequiresFpuRegister());
187   locations->SetOut(Location::RequiresRegister());
188 }
189 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)190 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
191   LocationSummary* locations =
192       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
193   locations->SetInAt(0, Location::RequiresRegister());
194   locations->SetOut(Location::RequiresFpuRegister());
195 }
196 
MoveFPToInt(CpuRegister dst,XmmRegister src,bool is64bit,X86_64Assembler * assembler)197 static void MoveFPToInt(
198     CpuRegister dst, XmmRegister src, bool is64bit, X86_64Assembler* assembler) {
199   if (is64bit) {
200     __ movq(dst, src);
201   } else {
202     __ movd(dst, src);
203   }
204 }
205 
MoveIntToFP(XmmRegister dst,CpuRegister src,bool is64bit,X86_64Assembler * assembler)206 static void MoveIntToFP(
207     XmmRegister dst, CpuRegister src, bool is64bit, X86_64Assembler* assembler) {
208   if (is64bit) {
209     __ movq(dst, src);
210   } else {
211     __ movd(dst, src);
212   }
213 }
214 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)215 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
216   XmmRegister input = locations->InAt(0).AsFpuRegister<XmmRegister>();
217   CpuRegister output = locations->Out().AsRegister<CpuRegister>();
218   MoveFPToInt(output, input, is64bit, assembler);
219 }
220 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)221 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
222   CpuRegister input = locations->InAt(0).AsRegister<CpuRegister>();
223   XmmRegister output = locations->Out().AsFpuRegister<XmmRegister>();
224   MoveIntToFP(output, input, is64bit, assembler);
225 }
226 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)227 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
228   CreateFPToIntLocations(allocator_, invoke);
229 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)230 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
231   CreateIntToFPLocations(allocator_, invoke);
232 }
233 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)234 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
235   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
236 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)237 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
238   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
239 }
240 
VisitFloatFloatToRawIntBits(HInvoke * invoke)241 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
242   CreateFPToIntLocations(allocator_, invoke);
243 }
VisitFloatIntBitsToFloat(HInvoke * invoke)244 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
245   CreateIntToFPLocations(allocator_, invoke);
246 }
247 
VisitFloatFloatToRawIntBits(HInvoke * invoke)248 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
249   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
250 }
VisitFloatIntBitsToFloat(HInvoke * invoke)251 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
252   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
253 }
254 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)255 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
256   LocationSummary* locations =
257       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
258   locations->SetInAt(0, Location::RequiresRegister());
259   locations->SetOut(Location::SameAsFirstInput());
260 }
261 
VisitIntegerReverseBytes(HInvoke * invoke)262 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
263   CreateIntToIntLocations(allocator_, invoke);
264 }
265 
VisitIntegerReverseBytes(HInvoke * invoke)266 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
267   codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt32);
268 }
269 
VisitLongReverseBytes(HInvoke * invoke)270 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
271   CreateIntToIntLocations(allocator_, invoke);
272 }
273 
VisitLongReverseBytes(HInvoke * invoke)274 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
275   codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt64);
276 }
277 
VisitShortReverseBytes(HInvoke * invoke)278 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
279   CreateIntToIntLocations(allocator_, invoke);
280 }
281 
VisitShortReverseBytes(HInvoke * invoke)282 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
283   codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt16);
284 }
285 
GenIsInfinite(LocationSummary * locations,bool is64bit,CodeGeneratorX86_64 * codegen)286 static void GenIsInfinite(LocationSummary* locations,
287                           bool is64bit,
288                           CodeGeneratorX86_64* codegen) {
289   X86_64Assembler* assembler = codegen->GetAssembler();
290 
291   XmmRegister input = locations->InAt(0).AsFpuRegister<XmmRegister>();
292   CpuRegister output = locations->Out().AsRegister<CpuRegister>();
293 
294   NearLabel done1, done2;
295 
296   if (is64bit) {
297     double kPositiveInfinity = std::numeric_limits<double>::infinity();
298     double kNegativeInfinity = -1 * kPositiveInfinity;
299 
300     __ xorq(output, output);
301     __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
302     __ j(kNotEqual, &done1);
303     __ j(kParityEven, &done2);
304     __ movq(output, Immediate(1));
305     __ jmp(&done2);
306     __ Bind(&done1);
307     __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
308     __ j(kNotEqual, &done2);
309     __ j(kParityEven, &done2);
310     __ movq(output, Immediate(1));
311     __ Bind(&done2);
312   } else {
313     float kPositiveInfinity = std::numeric_limits<float>::infinity();
314     float kNegativeInfinity = -1 * kPositiveInfinity;
315 
316     __ xorl(output, output);
317     __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
318     __ j(kNotEqual, &done1);
319     __ j(kParityEven, &done2);
320     __ movl(output, Immediate(1));
321     __ jmp(&done2);
322     __ Bind(&done1);
323     __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
324     __ j(kNotEqual, &done2);
325     __ j(kParityEven, &done2);
326     __ movl(output, Immediate(1));
327     __ Bind(&done2);
328   }
329 }
330 
VisitFloatIsInfinite(HInvoke * invoke)331 void IntrinsicLocationsBuilderX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
332   CreateFPToIntLocations(allocator_, invoke);
333 }
334 
VisitFloatIsInfinite(HInvoke * invoke)335 void IntrinsicCodeGeneratorX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
336   GenIsInfinite(invoke->GetLocations(), /* is64bit=*/  false, codegen_);
337 }
338 
VisitDoubleIsInfinite(HInvoke * invoke)339 void IntrinsicLocationsBuilderX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
340   CreateFPToIntLocations(allocator_, invoke);
341 }
342 
VisitDoubleIsInfinite(HInvoke * invoke)343 void IntrinsicCodeGeneratorX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
344   GenIsInfinite(invoke->GetLocations(), /* is64bit=*/  true, codegen_);
345 }
346 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)347 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
348   LocationSummary* locations =
349       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
350   locations->SetInAt(0, Location::RequiresFpuRegister());
351   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
352 }
353 
VisitMathSqrt(HInvoke * invoke)354 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
355   CreateFPToFPLocations(allocator_, invoke);
356 }
357 
VisitMathSqrt(HInvoke * invoke)358 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
359   LocationSummary* locations = invoke->GetLocations();
360   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
361   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
362 
363   GetAssembler()->sqrtsd(out, in);
364 }
365 
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)366 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
367                                        HInvoke* invoke,
368                                        CodeGeneratorX86_64* codegen) {
369   // Do we have instruction support?
370   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
371     return;
372   }
373 
374   CreateFPToFPLocations(allocator, invoke);
375 }
376 
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86_64Assembler * assembler,int round_mode)377 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86_64Assembler* assembler, int round_mode) {
378   LocationSummary* locations = invoke->GetLocations();
379   DCHECK(!locations->WillCall());
380   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
381   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
382   __ roundsd(out, in, Immediate(round_mode));
383 }
384 
VisitMathCeil(HInvoke * invoke)385 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
386   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
387 }
388 
VisitMathCeil(HInvoke * invoke)389 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
390   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
391 }
392 
VisitMathFloor(HInvoke * invoke)393 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
394   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
395 }
396 
VisitMathFloor(HInvoke * invoke)397 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
398   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
399 }
400 
VisitMathRint(HInvoke * invoke)401 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
402   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
403 }
404 
VisitMathRint(HInvoke * invoke)405 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
406   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
407 }
408 
CreateSSE41FPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)409 static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator,
410                                         HInvoke* invoke,
411                                         CodeGeneratorX86_64* codegen) {
412   // Do we have instruction support?
413   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
414     return;
415   }
416 
417   LocationSummary* locations =
418       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
419   locations->SetInAt(0, Location::RequiresFpuRegister());
420   locations->SetOut(Location::RequiresRegister());
421   locations->AddTemp(Location::RequiresFpuRegister());
422   locations->AddTemp(Location::RequiresFpuRegister());
423 }
424 
VisitMathRoundFloat(HInvoke * invoke)425 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
426   CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
427 }
428 
VisitMathRoundFloat(HInvoke * invoke)429 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
430   LocationSummary* locations = invoke->GetLocations();
431   DCHECK(!locations->WillCall());
432 
433   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
434   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
435   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
436   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
437   NearLabel skip_incr, done;
438   X86_64Assembler* assembler = GetAssembler();
439 
440   // Since no direct x86 rounding instruction matches the required semantics,
441   // this intrinsic is implemented as follows:
442   //  result = floor(in);
443   //  if (in - result >= 0.5f)
444   //    result = result + 1.0f;
445   __ movss(t2, in);
446   __ roundss(t1, in, Immediate(1));
447   __ subss(t2, t1);
448   __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
449   __ j(kBelow, &skip_incr);
450   __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
451   __ Bind(&skip_incr);
452 
453   // Final conversion to an integer. Unfortunately this also does not have a
454   // direct x86 instruction, since NaN should map to 0 and large positive
455   // values need to be clipped to the extreme value.
456   codegen_->Load32BitValue(out, kPrimIntMax);
457   __ cvtsi2ss(t2, out);
458   __ comiss(t1, t2);
459   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
460   __ movl(out, Immediate(0));  // does not change flags
461   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
462   __ cvttss2si(out, t1);
463   __ Bind(&done);
464 }
465 
VisitMathRoundDouble(HInvoke * invoke)466 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
467   CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
468 }
469 
VisitMathRoundDouble(HInvoke * invoke)470 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
471   LocationSummary* locations = invoke->GetLocations();
472   DCHECK(!locations->WillCall());
473 
474   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
475   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
476   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
477   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
478   NearLabel skip_incr, done;
479   X86_64Assembler* assembler = GetAssembler();
480 
481   // Since no direct x86 rounding instruction matches the required semantics,
482   // this intrinsic is implemented as follows:
483   //  result = floor(in);
484   //  if (in - result >= 0.5)
485   //    result = result + 1.0f;
486   __ movsd(t2, in);
487   __ roundsd(t1, in, Immediate(1));
488   __ subsd(t2, t1);
489   __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
490   __ j(kBelow, &skip_incr);
491   __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
492   __ Bind(&skip_incr);
493 
494   // Final conversion to an integer. Unfortunately this also does not have a
495   // direct x86 instruction, since NaN should map to 0 and large positive
496   // values need to be clipped to the extreme value.
497   codegen_->Load64BitValue(out, kPrimLongMax);
498   __ cvtsi2sd(t2, out, /* is64bit= */ true);
499   __ comisd(t1, t2);
500   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
501   __ movl(out, Immediate(0));  // does not change flags, implicit zero extension to 64-bit
502   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
503   __ cvttsd2si(out, t1, /* is64bit= */ true);
504   __ Bind(&done);
505 }
506 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)507 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
508   LocationSummary* locations =
509       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
510   InvokeRuntimeCallingConvention calling_convention;
511   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
512   locations->SetOut(Location::FpuRegisterLocation(XMM0));
513 
514   CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
515 }
516 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)517 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
518                           QuickEntrypointEnum entry) {
519   LocationSummary* locations = invoke->GetLocations();
520   DCHECK(locations->WillCall());
521   DCHECK(invoke->IsInvokeStaticOrDirect());
522 
523   codegen->InvokeRuntime(entry, invoke);
524 }
525 
VisitMathCos(HInvoke * invoke)526 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
527   CreateFPToFPCallLocations(allocator_, invoke);
528 }
529 
VisitMathCos(HInvoke * invoke)530 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
531   GenFPToFPCall(invoke, codegen_, kQuickCos);
532 }
533 
VisitMathSin(HInvoke * invoke)534 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
535   CreateFPToFPCallLocations(allocator_, invoke);
536 }
537 
VisitMathSin(HInvoke * invoke)538 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
539   GenFPToFPCall(invoke, codegen_, kQuickSin);
540 }
541 
VisitMathAcos(HInvoke * invoke)542 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
543   CreateFPToFPCallLocations(allocator_, invoke);
544 }
545 
VisitMathAcos(HInvoke * invoke)546 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
547   GenFPToFPCall(invoke, codegen_, kQuickAcos);
548 }
549 
VisitMathAsin(HInvoke * invoke)550 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
551   CreateFPToFPCallLocations(allocator_, invoke);
552 }
553 
VisitMathAsin(HInvoke * invoke)554 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
555   GenFPToFPCall(invoke, codegen_, kQuickAsin);
556 }
557 
VisitMathAtan(HInvoke * invoke)558 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
559   CreateFPToFPCallLocations(allocator_, invoke);
560 }
561 
VisitMathAtan(HInvoke * invoke)562 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
563   GenFPToFPCall(invoke, codegen_, kQuickAtan);
564 }
565 
VisitMathCbrt(HInvoke * invoke)566 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
567   CreateFPToFPCallLocations(allocator_, invoke);
568 }
569 
VisitMathCbrt(HInvoke * invoke)570 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
571   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
572 }
573 
VisitMathCosh(HInvoke * invoke)574 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
575   CreateFPToFPCallLocations(allocator_, invoke);
576 }
577 
VisitMathCosh(HInvoke * invoke)578 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
579   GenFPToFPCall(invoke, codegen_, kQuickCosh);
580 }
581 
VisitMathExp(HInvoke * invoke)582 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
583   CreateFPToFPCallLocations(allocator_, invoke);
584 }
585 
VisitMathExp(HInvoke * invoke)586 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
587   GenFPToFPCall(invoke, codegen_, kQuickExp);
588 }
589 
VisitMathExpm1(HInvoke * invoke)590 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
591   CreateFPToFPCallLocations(allocator_, invoke);
592 }
593 
VisitMathExpm1(HInvoke * invoke)594 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
595   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
596 }
597 
VisitMathLog(HInvoke * invoke)598 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
599   CreateFPToFPCallLocations(allocator_, invoke);
600 }
601 
VisitMathLog(HInvoke * invoke)602 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
603   GenFPToFPCall(invoke, codegen_, kQuickLog);
604 }
605 
VisitMathLog10(HInvoke * invoke)606 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
607   CreateFPToFPCallLocations(allocator_, invoke);
608 }
609 
VisitMathLog10(HInvoke * invoke)610 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
611   GenFPToFPCall(invoke, codegen_, kQuickLog10);
612 }
613 
VisitMathSinh(HInvoke * invoke)614 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
615   CreateFPToFPCallLocations(allocator_, invoke);
616 }
617 
VisitMathSinh(HInvoke * invoke)618 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
619   GenFPToFPCall(invoke, codegen_, kQuickSinh);
620 }
621 
VisitMathTan(HInvoke * invoke)622 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
623   CreateFPToFPCallLocations(allocator_, invoke);
624 }
625 
VisitMathTan(HInvoke * invoke)626 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
627   GenFPToFPCall(invoke, codegen_, kQuickTan);
628 }
629 
VisitMathTanh(HInvoke * invoke)630 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
631   CreateFPToFPCallLocations(allocator_, invoke);
632 }
633 
VisitMathTanh(HInvoke * invoke)634 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
635   GenFPToFPCall(invoke, codegen_, kQuickTanh);
636 }
637 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)638 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
639   LocationSummary* locations =
640       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
641   InvokeRuntimeCallingConvention calling_convention;
642   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
643   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
644   locations->SetOut(Location::FpuRegisterLocation(XMM0));
645 
646   CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
647 }
648 
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)649 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
650   DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
651   LocationSummary* locations =
652       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
653   InvokeRuntimeCallingConvention calling_convention;
654   locations->SetInAt(0, Location::RequiresFpuRegister());
655   locations->SetInAt(1, Location::RequiresFpuRegister());
656   locations->SetInAt(2, Location::RequiresFpuRegister());
657   locations->SetOut(Location::SameAsFirstInput());
658 }
659 
VisitMathAtan2(HInvoke * invoke)660 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
661   CreateFPFPToFPCallLocations(allocator_, invoke);
662 }
663 
VisitMathAtan2(HInvoke * invoke)664 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
665   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
666 }
667 
VisitMathPow(HInvoke * invoke)668 void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
669   CreateFPFPToFPCallLocations(allocator_, invoke);
670 }
671 
VisitMathPow(HInvoke * invoke)672 void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
673   GenFPToFPCall(invoke, codegen_, kQuickPow);
674 }
675 
VisitMathHypot(HInvoke * invoke)676 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
677   CreateFPFPToFPCallLocations(allocator_, invoke);
678 }
679 
VisitMathHypot(HInvoke * invoke)680 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
681   GenFPToFPCall(invoke, codegen_, kQuickHypot);
682 }
683 
VisitMathNextAfter(HInvoke * invoke)684 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
685   CreateFPFPToFPCallLocations(allocator_, invoke);
686 }
687 
VisitMathNextAfter(HInvoke * invoke)688 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
689   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
690 }
691 
CreateSystemArrayCopyLocations(HInvoke * invoke)692 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
693   // Check to see if we have known failures that will cause us to have to bail out
694   // to the runtime, and just generate the runtime call directly.
695   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
696   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
697 
698   // The positions must be non-negative.
699   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
700       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
701     // We will have to fail anyways.
702     return;
703   }
704 
705   // The length must be > 0.
706   HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
707   if (length != nullptr) {
708     int32_t len = length->GetValue();
709     if (len < 0) {
710       // Just call as normal.
711       return;
712     }
713   }
714   LocationSummary* locations =
715       new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary
716       (invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
717   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
718   locations->SetInAt(0, Location::RequiresRegister());
719   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
720   locations->SetInAt(2, Location::RequiresRegister());
721   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
722   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
723 
724   // And we need some temporaries.  We will use REP MOVS{B,W,L}, so we need fixed registers.
725   locations->AddTemp(Location::RegisterLocation(RSI));
726   locations->AddTemp(Location::RegisterLocation(RDI));
727   locations->AddTemp(Location::RegisterLocation(RCX));
728 }
729 
730 template <typename LhsType>
EmitCmplJLess(X86_64Assembler * assembler,LhsType lhs,Location rhs,Label * label)731 static void EmitCmplJLess(X86_64Assembler* assembler,
732                           LhsType lhs,
733                           Location rhs,
734                           Label* label) {
735   static_assert(std::is_same_v<LhsType, CpuRegister> || std::is_same_v<LhsType, Address>);
736   if (rhs.IsConstant()) {
737     int32_t rhs_constant = rhs.GetConstant()->AsIntConstant()->GetValue();
738     __ cmpl(lhs, Immediate(rhs_constant));
739   } else {
740     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
741   }
742   __ j(kLess, label);
743 }
744 
CheckSystemArrayCopyPosition(X86_64Assembler * assembler,CpuRegister array,Location pos,Location length,SlowPathCode * slow_path,CpuRegister temp,bool length_is_array_length,bool position_sign_checked)745 static void CheckSystemArrayCopyPosition(X86_64Assembler* assembler,
746                                          CpuRegister array,
747                                          Location pos,
748                                          Location length,
749                                          SlowPathCode* slow_path,
750                                          CpuRegister temp,
751                                          bool length_is_array_length,
752                                          bool position_sign_checked) {
753   // Where is the length in the Array?
754   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
755 
756   if (pos.IsConstant()) {
757     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
758     if (pos_const == 0) {
759       if (!length_is_array_length) {
760         // Check that length(array) >= length.
761         EmitCmplJLess(assembler, Address(array, length_offset), length, slow_path->GetEntryLabel());
762       }
763     } else {
764       // Calculate length(array) - pos.
765       // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
766       // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
767       __ movl(temp, Address(array, length_offset));
768       __ subl(temp, Immediate(pos_const));
769 
770       // Check that (length(array) - pos) >= length.
771       EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
772     }
773   } else if (length_is_array_length) {
774     // The only way the copy can succeed is if pos is zero.
775     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
776     __ testl(pos_reg, pos_reg);
777     __ j(kNotEqual, slow_path->GetEntryLabel());
778   } else {
779     // Check that pos >= 0.
780     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
781     if (!position_sign_checked) {
782       __ testl(pos_reg, pos_reg);
783       __ j(kLess, slow_path->GetEntryLabel());
784     }
785 
786     // Calculate length(array) - pos.
787     // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
788     // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
789     __ movl(temp, Address(array, length_offset));
790     __ subl(temp, pos_reg);
791 
792     // Check that (length(array) - pos) >= length.
793     EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
794   }
795 }
796 
SystemArrayCopyPrimitive(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,DataType::Type type)797 static void SystemArrayCopyPrimitive(HInvoke* invoke,
798                                      X86_64Assembler* assembler,
799                                      CodeGeneratorX86_64* codegen,
800                                      DataType::Type type) {
801   LocationSummary* locations = invoke->GetLocations();
802   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
803   Location src_pos = locations->InAt(1);
804   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
805   Location dest_pos = locations->InAt(3);
806   Location length = locations->InAt(4);
807 
808   // Temporaries that we need for MOVSB/W/L.
809   CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
810   DCHECK_EQ(src_base.AsRegister(), RSI);
811   CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
812   DCHECK_EQ(dest_base.AsRegister(), RDI);
813   CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
814   DCHECK_EQ(count.AsRegister(), RCX);
815 
816   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
817   codegen->AddSlowPath(slow_path);
818 
819   // Bail out if the source and destination are the same.
820   __ cmpl(src, dest);
821   __ j(kEqual, slow_path->GetEntryLabel());
822 
823   // Bail out if the source is null.
824   __ testl(src, src);
825   __ j(kEqual, slow_path->GetEntryLabel());
826 
827   // Bail out if the destination is null.
828   __ testl(dest, dest);
829   __ j(kEqual, slow_path->GetEntryLabel());
830 
831   // If the length is negative, bail out.
832   // We have already checked in the LocationsBuilder for the constant case.
833   if (!length.IsConstant()) {
834     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
835     __ j(kLess, slow_path->GetEntryLabel());
836   }
837 
838   // Validity checks: source. Use src_base as a temporary register.
839   CheckSystemArrayCopyPosition(assembler,
840                                src,
841                                src_pos,
842                                length,
843                                slow_path,
844                                src_base,
845                                /*length_is_array_length=*/ false,
846                                /*position_sign_checked=*/ false);
847 
848   // Validity checks: dest. Use src_base as a temporary register.
849   CheckSystemArrayCopyPosition(assembler,
850                                dest,
851                                dest_pos,
852                                length,
853                                slow_path,
854                                src_base,
855                                /*length_is_array_length=*/ false,
856                                /*position_sign_checked=*/ false);
857 
858   // We need the count in RCX.
859   if (length.IsConstant()) {
860     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
861   } else {
862     __ movl(count, length.AsRegister<CpuRegister>());
863   }
864 
865   // Okay, everything checks out.  Finally time to do the copy.
866   // Check assumption that sizeof(Char) is 2 (used in scaling below).
867   const size_t data_size = DataType::Size(type);
868   const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
869 
870   GenArrayAddress(assembler, src_base, src, src_pos, type, data_offset);
871   GenArrayAddress(assembler, dest_base, dest, dest_pos, type, data_offset);
872 
873   // Do the move.
874   switch (type) {
875     case DataType::Type::kInt8:
876        __ rep_movsb();
877        break;
878     case DataType::Type::kUint16:
879        __ rep_movsw();
880        break;
881     case DataType::Type::kInt32:
882        __ rep_movsl();
883        break;
884     default:
885        LOG(FATAL) << "Unexpected data type for intrinsic";
886   }
887   __ Bind(slow_path->GetExitLabel());
888 }
889 
VisitSystemArrayCopyChar(HInvoke * invoke)890 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
891   CreateSystemArrayCopyLocations(invoke);
892 }
VisitSystemArrayCopyChar(HInvoke * invoke)893 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
894   X86_64Assembler* assembler = GetAssembler();
895   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
896 }
897 
VisitSystemArrayCopyByte(HInvoke * invoke)898 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
899   X86_64Assembler* assembler = GetAssembler();
900   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
901 }
902 
VisitSystemArrayCopyByte(HInvoke * invoke)903 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
904   CreateSystemArrayCopyLocations(invoke);
905 }
906 
VisitSystemArrayCopyInt(HInvoke * invoke)907 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
908   X86_64Assembler* assembler = GetAssembler();
909   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
910 }
911 
VisitSystemArrayCopyInt(HInvoke * invoke)912 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
913   CreateSystemArrayCopyLocations(invoke);
914 }
915 
VisitSystemArrayCopy(HInvoke * invoke)916 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
917   // The only read barrier implementation supporting the
918   // SystemArrayCopy intrinsic is the Baker-style read barriers.
919   if (codegen_->EmitNonBakerReadBarrier()) {
920     return;
921   }
922 
923   constexpr int32_t kLengthThreshold = -1;  // No cut-off - handle large arrays in intrinsic code.
924   constexpr size_t kInitialNumTemps = 0u;  // We shall allocate temps explicitly.
925   LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
926       invoke, kLengthThreshold, kInitialNumTemps);
927   if (locations != nullptr) {
928     // Add temporaries.  We will use REP MOVSL, so we need fixed registers.
929     DCHECK_EQ(locations->GetTempCount(), kInitialNumTemps);
930     locations->AddTemp(Location::RegisterLocation(RSI));
931     locations->AddTemp(Location::RegisterLocation(RDI));
932     locations->AddTemp(Location::RegisterLocation(RCX));
933   }
934 }
935 
VisitSystemArrayCopy(HInvoke * invoke)936 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
937   // The only read barrier implementation supporting the
938   // SystemArrayCopy intrinsic is the Baker-style read barriers.
939   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
940 
941   X86_64Assembler* assembler = GetAssembler();
942   LocationSummary* locations = invoke->GetLocations();
943 
944   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
945   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
946   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
947   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
948   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
949 
950   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
951   Location src_pos = locations->InAt(1);
952   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
953   Location dest_pos = locations->InAt(3);
954   Location length = locations->InAt(4);
955   Location temp1_loc = locations->GetTemp(0);
956   CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
957   Location temp2_loc = locations->GetTemp(1);
958   CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
959   Location temp3_loc = locations->GetTemp(2);
960   CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
961 
962   SlowPathCode* intrinsic_slow_path =
963       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
964   codegen_->AddSlowPath(intrinsic_slow_path);
965 
966   NearLabel conditions_on_positions_validated;
967   SystemArrayCopyOptimizations optimizations(invoke);
968 
969   // If source and destination are the same, we go to slow path if we need to do forward copying.
970   // We do not need to do this check if the source and destination positions are the same.
971   if (!optimizations.GetSourcePositionIsDestinationPosition()) {
972     if (src_pos.IsConstant()) {
973       int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
974       if (dest_pos.IsConstant()) {
975         int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
976         if (optimizations.GetDestinationIsSource()) {
977           // Checked when building locations.
978           DCHECK_GE(src_pos_constant, dest_pos_constant);
979         } else if (src_pos_constant < dest_pos_constant) {
980           __ cmpl(src, dest);
981           __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
982         }
983       } else {
984         if (!optimizations.GetDestinationIsSource()) {
985           __ cmpl(src, dest);
986           __ j(kNotEqual, &conditions_on_positions_validated);
987         }
988         __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
989         __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
990       }
991     } else {
992       if (!optimizations.GetDestinationIsSource()) {
993         __ cmpl(src, dest);
994         __ j(kNotEqual, &conditions_on_positions_validated);
995       }
996       CpuRegister src_pos_reg = src_pos.AsRegister<CpuRegister>();
997       EmitCmplJLess(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel());
998     }
999   }
1000 
1001   __ Bind(&conditions_on_positions_validated);
1002 
1003   if (!optimizations.GetSourceIsNotNull()) {
1004     // Bail out if the source is null.
1005     __ testl(src, src);
1006     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1007   }
1008 
1009   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1010     // Bail out if the destination is null.
1011     __ testl(dest, dest);
1012     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1013   }
1014 
1015   // If the length is negative, bail out.
1016   // We have already checked in the LocationsBuilder for the constant case.
1017   if (!length.IsConstant() &&
1018       !optimizations.GetCountIsSourceLength() &&
1019       !optimizations.GetCountIsDestinationLength()) {
1020     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1021     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
1022   }
1023 
1024   // Validity checks: source.
1025   CheckSystemArrayCopyPosition(assembler,
1026                                src,
1027                                src_pos,
1028                                length,
1029                                intrinsic_slow_path,
1030                                temp1,
1031                                optimizations.GetCountIsSourceLength(),
1032                                /*position_sign_checked=*/ false);
1033 
1034   // Validity checks: dest.
1035   bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
1036   CheckSystemArrayCopyPosition(assembler,
1037                                dest,
1038                                dest_pos,
1039                                length,
1040                                intrinsic_slow_path,
1041                                temp1,
1042                                optimizations.GetCountIsDestinationLength(),
1043                                dest_position_sign_checked);
1044 
1045   auto check_non_primitive_array_class = [&](CpuRegister klass, CpuRegister temp) {
1046     // No read barrier is needed for reading a chain of constant references for comparing
1047     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1048     // /* HeapReference<Class> */ temp = klass->component_type_
1049     __ movl(temp, Address(klass, component_offset));
1050     __ MaybeUnpoisonHeapReference(temp);
1051     // Check that the component type is not null.
1052     __ testl(temp, temp);
1053     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1054     // Check that the component type is not a primitive.
1055     __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
1056     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1057   };
1058 
1059   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1060     // Check whether all elements of the source array are assignable to the component
1061     // type of the destination array. We do two checks: the classes are the same,
1062     // or the destination is Object[]. If none of these checks succeed, we go to the
1063     // slow path.
1064 
1065     if (codegen_->EmitBakerReadBarrier()) {
1066       // /* HeapReference<Class> */ temp1 = dest->klass_
1067       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1068           invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
1069       // Register `temp1` is not trashed by the read barrier emitted
1070       // by GenerateFieldLoadWithBakerReadBarrier below, as that
1071       // method produces a call to a ReadBarrierMarkRegX entry point,
1072       // which saves all potentially live registers, including
1073       // temporaries such a `temp1`.
1074       // /* HeapReference<Class> */ temp2 = src->klass_
1075       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1076           invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
1077       // If heap poisoning is enabled, `temp1` and `temp2` have been unpoisoned
1078       // by the previous calls to GenerateFieldLoadWithBakerReadBarrier.
1079     } else {
1080       // /* HeapReference<Class> */ temp1 = dest->klass_
1081       __ movl(temp1, Address(dest, class_offset));
1082       __ MaybeUnpoisonHeapReference(temp1);
1083       // /* HeapReference<Class> */ temp2 = src->klass_
1084       __ movl(temp2, Address(src, class_offset));
1085       __ MaybeUnpoisonHeapReference(temp2);
1086     }
1087 
1088     __ cmpl(temp1, temp2);
1089     if (optimizations.GetDestinationIsTypedObjectArray()) {
1090       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1091       NearLabel do_copy;
1092       // For class match, we can skip the source type check regardless of the optimization flag.
1093       __ j(kEqual, &do_copy);
1094       // No read barrier is needed for reading a chain of constant references
1095       // for comparing with null, see `ReadBarrierOption`.
1096       // /* HeapReference<Class> */ temp1 = temp1->component_type_
1097       __ movl(temp1, Address(temp1, component_offset));
1098       __ MaybeUnpoisonHeapReference(temp1);
1099       // No need to unpoison the following heap reference load, as
1100       // we're comparing against null.
1101       __ cmpl(Address(temp1, super_offset), Immediate(0));
1102       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1103       // Bail out if the source is not a non primitive array.
1104       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1105         check_non_primitive_array_class(temp2, CpuRegister(TMP));
1106       }
1107       __ Bind(&do_copy);
1108     } else {
1109       DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1110       // For class match, we can skip the array type check completely if at least one of source
1111       // and destination is known to be a non primitive array, otherwise one check is enough.
1112       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1113       if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1114           !optimizations.GetSourceIsNonPrimitiveArray()) {
1115         check_non_primitive_array_class(temp2, CpuRegister(TMP));
1116       }
1117     }
1118   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1119     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1120     // Bail out if the source is not a non primitive array.
1121     // No read barrier is needed for reading a chain of constant references for comparing
1122     // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1123     // /* HeapReference<Class> */ temp1 = src->klass_
1124     __ movl(temp1, Address(src, class_offset));
1125     __ MaybeUnpoisonHeapReference(temp1);
1126     check_non_primitive_array_class(temp1, CpuRegister(TMP));
1127   }
1128 
1129   if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
1130     // Null constant length: not need to emit the loop code at all.
1131   } else {
1132     const DataType::Type type = DataType::Type::kReference;
1133     const int32_t element_size = DataType::Size(type);
1134     const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1135 
1136     // Don't enter copy loop if `length == 0`.
1137     NearLabel skip_copy_and_write_barrier;
1138     if (!length.IsConstant()) {
1139       __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1140       __ j(kEqual, &skip_copy_and_write_barrier);
1141     }
1142 
1143     // Compute base source address, base destination address, and end
1144     // source address in `temp1`, `temp2` and `temp3` respectively.
1145     GenArrayAddress(assembler, temp1, src, src_pos, type, data_offset);
1146     GenArrayAddress(assembler, temp2, dest, dest_pos, type, data_offset);
1147 
1148     SlowPathCode* read_barrier_slow_path = nullptr;
1149     if (codegen_->EmitBakerReadBarrier()) {
1150       // SystemArrayCopy implementation for Baker read barriers (see
1151       // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
1152       //
1153       //   if (src_ptr != end_ptr) {
1154       //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1155       //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1156       //     bool is_gray = (rb_state == ReadBarrier::GrayState());
1157       //     if (is_gray) {
1158       //       // Slow-path copy.
1159       //       do {
1160       //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1161       //       } while (src_ptr != end_ptr)
1162       //     } else {
1163       //       // Fast-path copy.
1164       //       do {
1165       //         *dest_ptr++ = *src_ptr++;
1166       //       } while (src_ptr != end_ptr)
1167       //     }
1168       //   }
1169 
1170       // Given the numeric representation, it's enough to check the low bit of the rb_state.
1171       static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1172       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1173       constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
1174       constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
1175       constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
1176 
1177       // if (rb_state == ReadBarrier::GrayState())
1178       //   goto slow_path;
1179       // At this point, just do the "if" and make sure that flags are preserved until the branch.
1180       __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
1181 
1182       // Load fence to prevent load-load reordering.
1183       // Note that this is a no-op, thanks to the x86-64 memory model.
1184       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
1185 
1186       // Slow path used to copy array when `src` is gray.
1187       read_barrier_slow_path =
1188           new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
1189       codegen_->AddSlowPath(read_barrier_slow_path);
1190 
1191       // We have done the "if" of the gray bit check above, now branch based on the flags.
1192       __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
1193     }
1194 
1195     if (length.IsConstant()) {
1196       __ movl(temp3, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1197     } else {
1198       __ movl(temp3, length.AsRegister<CpuRegister>());
1199     }
1200 
1201     // Iterate over the arrays and do a raw copy of the objects. We don't need to poison/unpoison.
1202     DCHECK_EQ(temp1.AsRegister(), RSI);
1203     DCHECK_EQ(temp2.AsRegister(), RDI);
1204     DCHECK_EQ(temp3.AsRegister(), RCX);
1205     __ rep_movsl();
1206 
1207     if (read_barrier_slow_path != nullptr) {
1208       DCHECK(codegen_->EmitBakerReadBarrier());
1209       __ Bind(read_barrier_slow_path->GetExitLabel());
1210     }
1211 
1212     // We only need one card marking on the destination array.
1213     codegen_->MarkGCCard(temp1, temp2, dest);
1214 
1215     __ Bind(&skip_copy_and_write_barrier);
1216   }
1217 
1218   __ Bind(intrinsic_slow_path->GetExitLabel());
1219 }
1220 
VisitStringCompareTo(HInvoke * invoke)1221 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1222   LocationSummary* locations = new (allocator_) LocationSummary(
1223       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1224   InvokeRuntimeCallingConvention calling_convention;
1225   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1226   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1227   locations->SetOut(Location::RegisterLocation(RAX));
1228 }
1229 
VisitStringCompareTo(HInvoke * invoke)1230 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1231   X86_64Assembler* assembler = GetAssembler();
1232   LocationSummary* locations = invoke->GetLocations();
1233 
1234   // Note that the null check must have been done earlier.
1235   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1236 
1237   CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1238   __ testl(argument, argument);
1239   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1240   codegen_->AddSlowPath(slow_path);
1241   __ j(kEqual, slow_path->GetEntryLabel());
1242 
1243   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, slow_path);
1244   __ Bind(slow_path->GetExitLabel());
1245 }
1246 
VisitStringEquals(HInvoke * invoke)1247 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1248   LocationSummary* locations =
1249       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1250   locations->SetInAt(0, Location::RequiresRegister());
1251   locations->SetInAt(1, Location::RequiresRegister());
1252 
1253   // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1254   locations->AddTemp(Location::RegisterLocation(RCX));
1255   locations->AddTemp(Location::RegisterLocation(RDI));
1256 
1257   // Set output, RSI needed for repe_cmpsq instruction anyways.
1258   locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1259 }
1260 
VisitStringEquals(HInvoke * invoke)1261 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1262   X86_64Assembler* assembler = GetAssembler();
1263   LocationSummary* locations = invoke->GetLocations();
1264 
1265   CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1266   CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1267   CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1268   CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1269   CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1270 
1271   NearLabel end, return_true, return_false;
1272 
1273   // Get offsets of count, value, and class fields within a string object.
1274   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1275   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1276   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1277 
1278   // Note that the null check must have been done earlier.
1279   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1280 
1281   StringEqualsOptimizations optimizations(invoke);
1282   if (!optimizations.GetArgumentNotNull()) {
1283     // Check if input is null, return false if it is.
1284     __ testl(arg, arg);
1285     __ j(kEqual, &return_false);
1286   }
1287 
1288   if (!optimizations.GetArgumentIsString()) {
1289     // Instanceof check for the argument by comparing class fields.
1290     // All string objects must have the same type since String cannot be subclassed.
1291     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1292     // If the argument is a string object, its class field must be equal to receiver's class field.
1293     //
1294     // As the String class is expected to be non-movable, we can read the class
1295     // field from String.equals' arguments without read barriers.
1296     AssertNonMovableStringClass();
1297     // Also, because we use the loaded class references only to compare them, we
1298     // don't need to unpoison them.
1299     // /* HeapReference<Class> */ rcx = str->klass_
1300     __ movl(rcx, Address(str, class_offset));
1301     // if (rcx != /* HeapReference<Class> */ arg->klass_) return false
1302     __ cmpl(rcx, Address(arg, class_offset));
1303     __ j(kNotEqual, &return_false);
1304   }
1305 
1306   // Reference equality check, return true if same reference.
1307   __ cmpl(str, arg);
1308   __ j(kEqual, &return_true);
1309 
1310   // Load length and compression flag of receiver string.
1311   __ movl(rcx, Address(str, count_offset));
1312   // Check if lengths and compressiond flags are equal, return false if they're not.
1313   // Two identical strings will always have same compression style since
1314   // compression style is decided on alloc.
1315   __ cmpl(rcx, Address(arg, count_offset));
1316   __ j(kNotEqual, &return_false);
1317   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1318   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1319                 "Expecting 0=compressed, 1=uncompressed");
1320   __ jrcxz(&return_true);
1321 
1322   if (mirror::kUseStringCompression) {
1323     NearLabel string_uncompressed;
1324     // Extract length and differentiate between both compressed or both uncompressed.
1325     // Different compression style is cut above.
1326     __ shrl(rcx, Immediate(1));
1327     __ j(kCarrySet, &string_uncompressed);
1328     // Divide string length by 2, rounding up, and continue as if uncompressed.
1329     // Merge clearing the compression flag with +1 for rounding.
1330     __ addl(rcx, Immediate(1));
1331     __ shrl(rcx, Immediate(1));
1332     __ Bind(&string_uncompressed);
1333   }
1334   // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1335   __ leal(rsi, Address(str, value_offset));
1336   __ leal(rdi, Address(arg, value_offset));
1337 
1338   // Divide string length by 4 and adjust for lengths not divisible by 4.
1339   __ addl(rcx, Immediate(3));
1340   __ shrl(rcx, Immediate(2));
1341 
1342   // Assertions that must hold in order to compare strings 4 characters (uncompressed)
1343   // or 8 characters (compressed) at a time.
1344   DCHECK_ALIGNED(value_offset, 8);
1345   static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1346 
1347   // Loop to compare strings four characters at a time starting at the beginning of the string.
1348   __ repe_cmpsq();
1349   // If strings are not equal, zero flag will be cleared.
1350   __ j(kNotEqual, &return_false);
1351 
1352   // Return true and exit the function.
1353   // If loop does not result in returning false, we return true.
1354   __ Bind(&return_true);
1355   __ movl(rsi, Immediate(1));
1356   __ jmp(&end);
1357 
1358   // Return false and exit the function.
1359   __ Bind(&return_false);
1360   __ xorl(rsi, rsi);
1361   __ Bind(&end);
1362 }
1363 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1364 static void CreateStringIndexOfLocations(HInvoke* invoke,
1365                                          ArenaAllocator* allocator,
1366                                          bool start_at_zero) {
1367   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1368                                                                LocationSummary::kCallOnSlowPath,
1369                                                                kIntrinsified);
1370   // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1371   locations->SetInAt(0, Location::RegisterLocation(RDI));
1372   // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1373   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1374   // of the instruction explicitly.
1375   // Note: This works as we don't clobber RAX anywhere.
1376   locations->SetInAt(1, Location::RegisterLocation(RAX));
1377   if (!start_at_zero) {
1378     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1379   }
1380   // As we clobber RDI during execution anyways, also use it as the output.
1381   locations->SetOut(Location::SameAsFirstInput());
1382 
1383   // repne scasw uses RCX as the counter.
1384   locations->AddTemp(Location::RegisterLocation(RCX));
1385   // Need another temporary to be able to compute the result.
1386   locations->AddTemp(Location::RequiresRegister());
1387 }
1388 
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,bool start_at_zero)1389 static void GenerateStringIndexOf(HInvoke* invoke,
1390                                   X86_64Assembler* assembler,
1391                                   CodeGeneratorX86_64* codegen,
1392                                   bool start_at_zero) {
1393   LocationSummary* locations = invoke->GetLocations();
1394 
1395   // Note that the null check must have been done earlier.
1396   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1397 
1398   CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1399   CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1400   CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1401   CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1402   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1403 
1404   // Check our assumptions for registers.
1405   DCHECK_EQ(string_obj.AsRegister(), RDI);
1406   DCHECK_EQ(search_value.AsRegister(), RAX);
1407   DCHECK_EQ(counter.AsRegister(), RCX);
1408   DCHECK_EQ(out.AsRegister(), RDI);
1409 
1410   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1411   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1412   SlowPathCode* slow_path = nullptr;
1413   HInstruction* code_point = invoke->InputAt(1);
1414   if (code_point->IsIntConstant()) {
1415     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1416         std::numeric_limits<uint16_t>::max()) {
1417       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1418       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1419       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1420       codegen->AddSlowPath(slow_path);
1421       __ jmp(slow_path->GetEntryLabel());
1422       __ Bind(slow_path->GetExitLabel());
1423       return;
1424     }
1425   } else if (code_point->GetType() != DataType::Type::kUint16) {
1426     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1427     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1428     codegen->AddSlowPath(slow_path);
1429     __ j(kAbove, slow_path->GetEntryLabel());
1430   }
1431 
1432   // From here down, we know that we are looking for a char that fits in
1433   // 16 bits (uncompressed) or 8 bits (compressed).
1434   // Location of reference to data array within the String object.
1435   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1436   // Location of count within the String object.
1437   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1438 
1439   // Load the count field of the string containing the length and compression flag.
1440   __ movl(string_length, Address(string_obj, count_offset));
1441 
1442   // Do a zero-length check. Even with string compression `count == 0` means empty.
1443   // TODO: Support jecxz.
1444   NearLabel not_found_label;
1445   __ testl(string_length, string_length);
1446   __ j(kEqual, &not_found_label);
1447 
1448   if (mirror::kUseStringCompression) {
1449     // Use TMP to keep string_length_flagged.
1450     __ movl(CpuRegister(TMP), string_length);
1451     // Mask out first bit used as compression flag.
1452     __ shrl(string_length, Immediate(1));
1453   }
1454 
1455   if (start_at_zero) {
1456     // Number of chars to scan is the same as the string length.
1457     __ movl(counter, string_length);
1458     // Move to the start of the string.
1459     __ addq(string_obj, Immediate(value_offset));
1460   } else {
1461     CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1462 
1463     // Do a start_index check.
1464     __ cmpl(start_index, string_length);
1465     __ j(kGreaterEqual, &not_found_label);
1466 
1467     // Ensure we have a start index >= 0;
1468     __ xorl(counter, counter);
1469     __ cmpl(start_index, Immediate(0));
1470     __ cmov(kGreater, counter, start_index, /* is64bit= */ false);  // 32-bit copy is enough.
1471 
1472     if (mirror::kUseStringCompression) {
1473       NearLabel modify_counter, offset_uncompressed_label;
1474       __ testl(CpuRegister(TMP), Immediate(1));
1475       __ j(kNotZero, &offset_uncompressed_label);
1476       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1477       __ jmp(&modify_counter);
1478       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1479       __ Bind(&offset_uncompressed_label);
1480       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1481       __ Bind(&modify_counter);
1482     } else {
1483       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1484     }
1485     // Now update ecx, the work counter: it's gonna be string.length - start_index.
1486     __ negq(counter);  // Needs to be 64-bit negation, as the address computation is 64-bit.
1487     __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1488   }
1489 
1490   if (mirror::kUseStringCompression) {
1491     NearLabel uncompressed_string_comparison;
1492     NearLabel comparison_done;
1493     __ testl(CpuRegister(TMP), Immediate(1));
1494     __ j(kNotZero, &uncompressed_string_comparison);
1495     // Check if RAX (search_value) is ASCII.
1496     __ cmpl(search_value, Immediate(127));
1497     __ j(kGreater, &not_found_label);
1498     // Comparing byte-per-byte.
1499     __ repne_scasb();
1500     __ jmp(&comparison_done);
1501     // Everything is set up for repne scasw:
1502     //   * Comparison address in RDI.
1503     //   * Counter in ECX.
1504     __ Bind(&uncompressed_string_comparison);
1505     __ repne_scasw();
1506     __ Bind(&comparison_done);
1507   } else {
1508     __ repne_scasw();
1509   }
1510   // Did we find a match?
1511   __ j(kNotEqual, &not_found_label);
1512 
1513   // Yes, we matched.  Compute the index of the result.
1514   __ subl(string_length, counter);
1515   __ leal(out, Address(string_length, -1));
1516 
1517   NearLabel done;
1518   __ jmp(&done);
1519 
1520   // Failed to match; return -1.
1521   __ Bind(&not_found_label);
1522   __ movl(out, Immediate(-1));
1523 
1524   // And join up at the end.
1525   __ Bind(&done);
1526   if (slow_path != nullptr) {
1527     __ Bind(slow_path->GetExitLabel());
1528   }
1529 }
1530 
VisitStringIndexOf(HInvoke * invoke)1531 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1532   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1533 }
1534 
VisitStringIndexOf(HInvoke * invoke)1535 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1536   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1537 }
1538 
VisitStringIndexOfAfter(HInvoke * invoke)1539 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1540   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1541 }
1542 
VisitStringIndexOfAfter(HInvoke * invoke)1543 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1544   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1545 }
1546 
VisitStringNewStringFromBytes(HInvoke * invoke)1547 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1548   LocationSummary* locations = new (allocator_) LocationSummary(
1549       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1550   InvokeRuntimeCallingConvention calling_convention;
1551   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1552   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1553   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1554   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1555   locations->SetOut(Location::RegisterLocation(RAX));
1556 }
1557 
VisitStringNewStringFromBytes(HInvoke * invoke)1558 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1559   X86_64Assembler* assembler = GetAssembler();
1560   LocationSummary* locations = invoke->GetLocations();
1561 
1562   CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1563   __ testl(byte_array, byte_array);
1564   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1565   codegen_->AddSlowPath(slow_path);
1566   __ j(kEqual, slow_path->GetEntryLabel());
1567 
1568   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke);
1569   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1570   __ Bind(slow_path->GetExitLabel());
1571 }
1572 
VisitStringNewStringFromChars(HInvoke * invoke)1573 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1574   LocationSummary* locations =
1575       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1576   InvokeRuntimeCallingConvention calling_convention;
1577   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1578   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1579   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1580   locations->SetOut(Location::RegisterLocation(RAX));
1581 }
1582 
VisitStringNewStringFromChars(HInvoke * invoke)1583 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1584   // No need to emit code checking whether `locations->InAt(2)` is a null
1585   // pointer, as callers of the native method
1586   //
1587   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1588   //
1589   // all include a null check on `data` before calling that method.
1590   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke);
1591   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1592 }
1593 
VisitStringNewStringFromString(HInvoke * invoke)1594 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1595   LocationSummary* locations = new (allocator_) LocationSummary(
1596       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1597   InvokeRuntimeCallingConvention calling_convention;
1598   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1599   locations->SetOut(Location::RegisterLocation(RAX));
1600 }
1601 
VisitStringNewStringFromString(HInvoke * invoke)1602 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1603   X86_64Assembler* assembler = GetAssembler();
1604   LocationSummary* locations = invoke->GetLocations();
1605 
1606   CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1607   __ testl(string_to_copy, string_to_copy);
1608   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1609   codegen_->AddSlowPath(slow_path);
1610   __ j(kEqual, slow_path->GetEntryLabel());
1611 
1612   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke);
1613   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1614   __ Bind(slow_path->GetExitLabel());
1615 }
1616 
VisitStringGetCharsNoCheck(HInvoke * invoke)1617 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1618   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1619   LocationSummary* locations =
1620       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1621   locations->SetInAt(0, Location::RequiresRegister());
1622   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1623   locations->SetInAt(2, Location::RequiresRegister());
1624   locations->SetInAt(3, Location::RequiresRegister());
1625   locations->SetInAt(4, Location::RequiresRegister());
1626 
1627   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1628   locations->AddTemp(Location::RegisterLocation(RSI));
1629   locations->AddTemp(Location::RegisterLocation(RDI));
1630   locations->AddTemp(Location::RegisterLocation(RCX));
1631 }
1632 
VisitStringGetCharsNoCheck(HInvoke * invoke)1633 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1634   X86_64Assembler* assembler = GetAssembler();
1635   LocationSummary* locations = invoke->GetLocations();
1636 
1637   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1638   // Location of data in char array buffer.
1639   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1640   // Location of char array data in string.
1641   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1642 
1643   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1644   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1645   Location srcBegin = locations->InAt(1);
1646   int srcBegin_value =
1647       srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1648   CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1649   CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1650   CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1651 
1652   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1653   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1654   DCHECK_EQ(char_size, 2u);
1655 
1656   NearLabel done;
1657   // Compute the number of chars (words) to move.
1658   __ movl(CpuRegister(RCX), srcEnd);
1659   if (srcBegin.IsConstant()) {
1660     __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1661   } else {
1662     DCHECK(srcBegin.IsRegister());
1663     __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1664   }
1665   if (mirror::kUseStringCompression) {
1666     NearLabel copy_uncompressed, copy_loop;
1667     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1668     DCHECK_EQ(c_char_size, 1u);
1669     // Location of count in string.
1670     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1671 
1672     __ testl(Address(obj, count_offset), Immediate(1));
1673     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1674                   "Expecting 0=compressed, 1=uncompressed");
1675     __ j(kNotZero, &copy_uncompressed);
1676     // Compute the address of the source string by adding the number of chars from
1677     // the source beginning to the value offset of a string.
1678     __ leaq(CpuRegister(RSI),
1679             CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1680     // Start the loop to copy String's value to Array of Char.
1681     __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1682 
1683     __ Bind(&copy_loop);
1684     __ jrcxz(&done);
1685     // Use TMP as temporary (convert byte from RSI to word).
1686     // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
1687     __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
1688     __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
1689     __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
1690     __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
1691     // TODO: Add support for LOOP to X86_64Assembler.
1692     __ subl(CpuRegister(RCX), Immediate(1));
1693     __ jmp(&copy_loop);
1694 
1695     __ Bind(&copy_uncompressed);
1696   }
1697 
1698   __ leaq(CpuRegister(RSI),
1699           CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1700   // Compute the address of the destination buffer.
1701   __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1702   // Do the move.
1703   __ rep_movsw();
1704 
1705   __ Bind(&done);
1706 }
1707 
GenPeek(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1708 static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1709   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1710   CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
1711   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1712   // to avoid a SIGBUS.
1713   switch (size) {
1714     case DataType::Type::kInt8:
1715       __ movsxb(out, Address(address, 0));
1716       break;
1717     case DataType::Type::kInt16:
1718       __ movsxw(out, Address(address, 0));
1719       break;
1720     case DataType::Type::kInt32:
1721       __ movl(out, Address(address, 0));
1722       break;
1723     case DataType::Type::kInt64:
1724       __ movq(out, Address(address, 0));
1725       break;
1726     default:
1727       LOG(FATAL) << "Type not recognized for peek: " << size;
1728       UNREACHABLE();
1729   }
1730 }
1731 
VisitMemoryPeekByte(HInvoke * invoke)1732 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1733   CreateIntToIntLocations(allocator_, invoke);
1734 }
1735 
VisitMemoryPeekByte(HInvoke * invoke)1736 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1737   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1738 }
1739 
VisitMemoryPeekIntNative(HInvoke * invoke)1740 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1741   CreateIntToIntLocations(allocator_, invoke);
1742 }
1743 
VisitMemoryPeekIntNative(HInvoke * invoke)1744 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1745   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1746 }
1747 
VisitMemoryPeekLongNative(HInvoke * invoke)1748 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1749   CreateIntToIntLocations(allocator_, invoke);
1750 }
1751 
VisitMemoryPeekLongNative(HInvoke * invoke)1752 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1753   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1754 }
1755 
VisitMemoryPeekShortNative(HInvoke * invoke)1756 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1757   CreateIntToIntLocations(allocator_, invoke);
1758 }
1759 
VisitMemoryPeekShortNative(HInvoke * invoke)1760 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1761   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1762 }
1763 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)1764 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1765   LocationSummary* locations =
1766       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1767   locations->SetInAt(0, Location::RequiresRegister());
1768   locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
1769 }
1770 
GenPoke(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1771 static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1772   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1773   Location value = locations->InAt(1);
1774   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1775   // to avoid a SIGBUS.
1776   switch (size) {
1777     case DataType::Type::kInt8:
1778       if (value.IsConstant()) {
1779         __ movb(Address(address, 0),
1780                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1781       } else {
1782         __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1783       }
1784       break;
1785     case DataType::Type::kInt16:
1786       if (value.IsConstant()) {
1787         __ movw(Address(address, 0),
1788                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1789       } else {
1790         __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1791       }
1792       break;
1793     case DataType::Type::kInt32:
1794       if (value.IsConstant()) {
1795         __ movl(Address(address, 0),
1796                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1797       } else {
1798         __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1799       }
1800       break;
1801     case DataType::Type::kInt64:
1802       if (value.IsConstant()) {
1803         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1804         DCHECK(IsInt<32>(v));
1805         int32_t v_32 = v;
1806         __ movq(Address(address, 0), Immediate(v_32));
1807       } else {
1808         __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1809       }
1810       break;
1811     default:
1812       LOG(FATAL) << "Type not recognized for poke: " << size;
1813       UNREACHABLE();
1814   }
1815 }
1816 
VisitMemoryPokeByte(HInvoke * invoke)1817 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1818   CreateIntIntToVoidLocations(allocator_, invoke);
1819 }
1820 
VisitMemoryPokeByte(HInvoke * invoke)1821 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1822   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1823 }
1824 
VisitMemoryPokeIntNative(HInvoke * invoke)1825 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1826   CreateIntIntToVoidLocations(allocator_, invoke);
1827 }
1828 
VisitMemoryPokeIntNative(HInvoke * invoke)1829 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1830   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1831 }
1832 
VisitMemoryPokeLongNative(HInvoke * invoke)1833 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1834   CreateIntIntToVoidLocations(allocator_, invoke);
1835 }
1836 
VisitMemoryPokeLongNative(HInvoke * invoke)1837 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1838   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1839 }
1840 
VisitMemoryPokeShortNative(HInvoke * invoke)1841 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1842   CreateIntIntToVoidLocations(allocator_, invoke);
1843 }
1844 
VisitMemoryPokeShortNative(HInvoke * invoke)1845 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1846   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1847 }
1848 
VisitThreadCurrentThread(HInvoke * invoke)1849 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1850   LocationSummary* locations =
1851       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1852   locations->SetOut(Location::RequiresRegister());
1853 }
1854 
VisitThreadCurrentThread(HInvoke * invoke)1855 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1856   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1857   GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
1858                                                     /* no_rip= */ true));
1859 }
1860 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)1861 static void GenUnsafeGet(HInvoke* invoke,
1862                          DataType::Type type,
1863                          [[maybe_unused]] bool is_volatile,
1864                          CodeGeneratorX86_64* codegen) {
1865   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1866   LocationSummary* locations = invoke->GetLocations();
1867   Location base_loc = locations->InAt(1);
1868   CpuRegister base = base_loc.AsRegister<CpuRegister>();
1869   Location offset_loc = locations->InAt(2);
1870   CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1871   Location output_loc = locations->Out();
1872   CpuRegister output = output_loc.AsRegister<CpuRegister>();
1873 
1874   switch (type) {
1875     case DataType::Type::kInt8:
1876       __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1877       break;
1878 
1879     case DataType::Type::kInt32:
1880       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1881       break;
1882 
1883     case DataType::Type::kReference: {
1884       if (codegen->EmitReadBarrier()) {
1885         if (kUseBakerReadBarrier) {
1886           Address src(base, offset, ScaleFactor::TIMES_1, 0);
1887           codegen->GenerateReferenceLoadWithBakerReadBarrier(
1888               invoke, output_loc, base, src, /* needs_null_check= */ false);
1889         } else {
1890           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1891           codegen->GenerateReadBarrierSlow(
1892               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1893         }
1894       } else {
1895         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1896         __ MaybeUnpoisonHeapReference(output);
1897       }
1898       break;
1899     }
1900 
1901     case DataType::Type::kInt64:
1902       __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1903       break;
1904 
1905     default:
1906       LOG(FATAL) << "Unsupported op size " << type;
1907       UNREACHABLE();
1908   }
1909 }
1910 
GenUnsafeGetAbsolute(HInvoke * invoke,DataType::Type type,CodeGeneratorX86_64 * codegen)1911 static void GenUnsafeGetAbsolute(HInvoke* invoke,
1912                                  DataType::Type type,
1913                                  CodeGeneratorX86_64* codegen) {
1914   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1915   LocationSummary* locations = invoke->GetLocations();
1916   Location address_loc = locations->InAt(1);
1917   Address address = Address(address_loc.AsRegister<CpuRegister>(), 0);
1918   Location output_loc = locations->Out();
1919   CpuRegister output = output_loc.AsRegister<CpuRegister>();
1920 
1921   switch (type) {
1922     case DataType::Type::kInt8:
1923       __ movsxb(output, address);
1924       break;
1925 
1926     case DataType::Type::kInt32:
1927       __ movl(output, address);
1928       break;
1929 
1930     case DataType::Type::kInt64:
1931       __ movq(output, address);
1932       break;
1933 
1934     default:
1935       LOG(FATAL) << "Unsupported op size " << type;
1936       UNREACHABLE();
1937   }
1938 }
1939 
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)1940 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1941   LocationSummary* locations =
1942       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1943   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1944   locations->SetInAt(1, Location::RequiresRegister());
1945   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1946 }
1947 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)1948 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1949                                           HInvoke* invoke,
1950                                           CodeGeneratorX86_64* codegen) {
1951   bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
1952   LocationSummary* locations =
1953       new (allocator) LocationSummary(invoke,
1954                                       can_call
1955                                           ? LocationSummary::kCallOnSlowPath
1956                                           : LocationSummary::kNoCall,
1957                                       kIntrinsified);
1958   if (can_call && kUseBakerReadBarrier) {
1959     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1960   }
1961   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1962   locations->SetInAt(1, Location::RequiresRegister());
1963   locations->SetInAt(2, Location::RequiresRegister());
1964   locations->SetOut(Location::RequiresRegister(),
1965                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1966 }
1967 
VisitUnsafeGet(HInvoke * invoke)1968 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1969   VisitJdkUnsafeGet(invoke);
1970 }
VisitUnsafeGetAbsolute(HInvoke * invoke)1971 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
1972   VisitJdkUnsafeGetAbsolute(invoke);
1973 }
VisitUnsafeGetVolatile(HInvoke * invoke)1974 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1975   VisitJdkUnsafeGetVolatile(invoke);
1976 }
VisitUnsafeGetLong(HInvoke * invoke)1977 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1978   VisitJdkUnsafeGetLong(invoke);
1979 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1980 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1981   VisitJdkUnsafeGetLongVolatile(invoke);
1982 }
VisitUnsafeGetObject(HInvoke * invoke)1983 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1984   VisitJdkUnsafeGetReference(invoke);
1985 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1986 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1987   VisitJdkUnsafeGetReferenceVolatile(invoke);
1988 }
VisitUnsafeGetByte(HInvoke * invoke)1989 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
1990   VisitJdkUnsafeGetByte(invoke);
1991 }
1992 
VisitJdkUnsafeGet(HInvoke * invoke)1993 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
1994   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1995 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)1996 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
1997   CreateIntIntToIntLocations(allocator_, invoke);
1998 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1999 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2000   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2001 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2002 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2003   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2004 }
VisitJdkUnsafeGetLong(HInvoke * invoke)2005 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2006   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2007 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2008 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2009   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2010 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2011 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2012   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2013 }
VisitJdkUnsafeGetReference(HInvoke * invoke)2014 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2015   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2016 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2017 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2018   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2019 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2020 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2021   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2022 }
VisitJdkUnsafeGetByte(HInvoke * invoke)2023 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2024   CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2025 }
2026 
VisitUnsafeGet(HInvoke * invoke)2027 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
2028   VisitJdkUnsafeGet(invoke);
2029 }
VisitUnsafeGetAbsolute(HInvoke * invoke)2030 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2031   VisitJdkUnsafeGetAbsolute(invoke);
2032 }
VisitUnsafeGetVolatile(HInvoke * invoke)2033 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2034   VisitJdkUnsafeGetVolatile(invoke);
2035 }
VisitUnsafeGetLong(HInvoke * invoke)2036 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2037   VisitJdkUnsafeGetLong(invoke);
2038 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2039 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2040   VisitJdkUnsafeGetLongVolatile(invoke);
2041 }
VisitUnsafeGetObject(HInvoke * invoke)2042 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2043   VisitJdkUnsafeGetReference(invoke);
2044 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2045 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2046   VisitJdkUnsafeGetReferenceVolatile(invoke);
2047 }
VisitUnsafeGetByte(HInvoke * invoke)2048 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
2049   VisitJdkUnsafeGetByte(invoke);
2050 }
2051 
VisitJdkUnsafeGet(HInvoke * invoke)2052 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
2053   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2054 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2055 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2056   GenUnsafeGetAbsolute(invoke, DataType::Type::kInt32, codegen_);
2057 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2058 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2059   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2060 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2061 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2062   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2063 }
VisitJdkUnsafeGetLong(HInvoke * invoke)2064 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2065   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2066 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2067 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2068   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2069 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2070 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2071   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2072 }
VisitJdkUnsafeGetReference(HInvoke * invoke)2073 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2074   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2075 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2076 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2077   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2078 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2079 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2080   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2081 }
VisitJdkUnsafeGetByte(HInvoke * invoke)2082 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2083   GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
2084 }
2085 
CreateIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2086 static void CreateIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
2087                                                     [[maybe_unused]] DataType::Type type,
2088                                                     HInvoke* invoke) {
2089   LocationSummary* locations =
2090       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2091   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2092   locations->SetInAt(1, Location::RequiresRegister());
2093   locations->SetInAt(2, Location::RequiresRegister());
2094 }
2095 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2096 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
2097                                                        DataType::Type type,
2098                                                        HInvoke* invoke) {
2099   LocationSummary* locations =
2100       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2101   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2102   locations->SetInAt(1, Location::RequiresRegister());
2103   locations->SetInAt(2, Location::RequiresRegister());
2104   locations->SetInAt(3, Location::RequiresRegister());
2105   if (type == DataType::Type::kReference) {
2106     // Need temp registers for card-marking.
2107     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2108     locations->AddTemp(Location::RequiresRegister());
2109   }
2110 }
2111 
VisitUnsafePut(HInvoke * invoke)2112 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
2113   VisitJdkUnsafePut(invoke);
2114 }
VisitUnsafePutAbsolute(HInvoke * invoke)2115 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2116   VisitJdkUnsafePutAbsolute(invoke);
2117 }
VisitUnsafePutOrderedInt(HInvoke * invoke)2118 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrderedInt(HInvoke* invoke) {
2119   VisitJdkUnsafePutOrderedInt(invoke);
2120 }
VisitUnsafePutVolatile(HInvoke * invoke)2121 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2122   VisitJdkUnsafePutVolatile(invoke);
2123 }
VisitUnsafePutObject(HInvoke * invoke)2124 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2125   VisitJdkUnsafePutReference(invoke);
2126 }
VisitUnsafePutOrderedObject(HInvoke * invoke)2127 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrderedObject(HInvoke* invoke) {
2128   VisitJdkUnsafePutOrderedObject(invoke);
2129 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2130 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2131   VisitJdkUnsafePutReferenceVolatile(invoke);
2132 }
VisitUnsafePutLong(HInvoke * invoke)2133 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2134   VisitJdkUnsafePutLong(invoke);
2135 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2136 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2137   VisitJdkUnsafePutLongOrdered(invoke);
2138 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2139 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2140   VisitJdkUnsafePutLongVolatile(invoke);
2141 }
VisitUnsafePutByte(HInvoke * invoke)2142 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutByte(HInvoke* invoke) {
2143   VisitJdkUnsafePut(invoke);
2144 }
2145 
VisitJdkUnsafePut(HInvoke * invoke)2146 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2147   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2148 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2149 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2150   CreateIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2151 }
VisitJdkUnsafePutOrderedInt(HInvoke * invoke)2152 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutOrderedInt(HInvoke* invoke) {
2153   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2154 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2155 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2156   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2157 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2158 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2159   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2160 }
VisitJdkUnsafePutReference(HInvoke * invoke)2161 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2162   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2163 }
VisitJdkUnsafePutOrderedObject(HInvoke * invoke)2164 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutOrderedObject(HInvoke* invoke) {
2165   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2166 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2167 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2168   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2169 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2170 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2171   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2172 }
VisitJdkUnsafePutLong(HInvoke * invoke)2173 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2174   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2175 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2176 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2177   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2178 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2179 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2180   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2181 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2182 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2183   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2184 }
VisitJdkUnsafePutByte(HInvoke * invoke)2185 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2186   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt8, invoke);
2187 }
2188 
2189 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2190 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2191 static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile,
2192                          CodeGeneratorX86_64* codegen) {
2193   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2194   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2195   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2196   CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2197 
2198   if (type == DataType::Type::kInt64) {
2199     __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2200   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2201     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2202     __ movl(temp, value);
2203     __ PoisonHeapReference(temp);
2204     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2205   } else if (type == DataType::Type::kInt32 || type == DataType::Type::kReference) {
2206     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2207   } else {
2208     CHECK_EQ(type, DataType::Type::kInt8) << "Unimplemented GenUnsafePut data type";
2209     __ movb(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2210   }
2211 
2212   if (is_volatile) {
2213     codegen->MemoryFence();
2214   }
2215 
2216   if (type == DataType::Type::kReference) {
2217     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2218     codegen->MaybeMarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2219                              locations->GetTemp(1).AsRegister<CpuRegister>(),
2220                              base,
2221                              value,
2222                              value_can_be_null);
2223   }
2224 }
2225 
2226 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2227 // memory model.
GenUnsafePutAbsolute(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2228 static void GenUnsafePutAbsolute(LocationSummary* locations,
2229                                  DataType::Type type,
2230                                  bool is_volatile,
2231                                  CodeGeneratorX86_64* codegen) {
2232   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2233   CpuRegister address_reg = locations->InAt(1).AsRegister<CpuRegister>();
2234   Address address = Address(address_reg, 0);
2235   CpuRegister value = locations->InAt(2).AsRegister<CpuRegister>();
2236 
2237   if (type == DataType::Type::kInt64) {
2238     __ movq(address, value);
2239   } else if (type == DataType::Type::kInt32) {
2240     __ movl(address, value);
2241   } else {
2242     CHECK_EQ(type, DataType::Type::kInt8) << "Unimplemented GenUnsafePut data type";
2243     __ movb(address, value);
2244   }
2245 
2246   if (is_volatile) {
2247     codegen->MemoryFence();
2248   }
2249 }
2250 
VisitUnsafePut(HInvoke * invoke)2251 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
2252   VisitJdkUnsafePut(invoke);
2253 }
VisitUnsafePutAbsolute(HInvoke * invoke)2254 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2255   VisitJdkUnsafePutAbsolute(invoke);
2256 }
VisitUnsafePutOrderedInt(HInvoke * invoke)2257 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrderedInt(HInvoke* invoke) {
2258   VisitJdkUnsafePutOrderedInt(invoke);
2259 }
VisitUnsafePutVolatile(HInvoke * invoke)2260 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2261   VisitJdkUnsafePutVolatile(invoke);
2262 }
VisitUnsafePutObject(HInvoke * invoke)2263 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2264   VisitJdkUnsafePutReference(invoke);
2265 }
VisitUnsafePutOrderedObject(HInvoke * invoke)2266 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrderedObject(HInvoke* invoke) {
2267   VisitJdkUnsafePutOrderedObject(invoke);
2268 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2269 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2270   VisitJdkUnsafePutReferenceVolatile(invoke);
2271 }
VisitUnsafePutLong(HInvoke * invoke)2272 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2273   VisitJdkUnsafePutLong(invoke);
2274 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2275 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2276   VisitJdkUnsafePutLongOrdered(invoke);
2277 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2278 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2279   VisitJdkUnsafePutLongVolatile(invoke);
2280 }
VisitUnsafePutByte(HInvoke * invoke)2281 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutByte(HInvoke* invoke) {
2282   VisitJdkUnsafePutByte(invoke);
2283 }
2284 
VisitJdkUnsafePut(HInvoke * invoke)2285 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2286   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2287 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2288 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2289   GenUnsafePutAbsolute(
2290       invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/false, codegen_);
2291 }
VisitJdkUnsafePutOrderedInt(HInvoke * invoke)2292 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutOrderedInt(HInvoke* invoke) {
2293   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2294 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2295 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2296   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2297 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2298 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2299   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
2300 }
VisitJdkUnsafePutReference(HInvoke * invoke)2301 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2302   GenUnsafePut(
2303       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2304 }
VisitJdkUnsafePutOrderedObject(HInvoke * invoke)2305 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutOrderedObject(HInvoke* invoke) {
2306   GenUnsafePut(
2307       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2308 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2309 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2310   GenUnsafePut(
2311       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2312 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2313 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2314   GenUnsafePut(
2315       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2316 }
VisitJdkUnsafePutLong(HInvoke * invoke)2317 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2318   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2319 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2320 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2321   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2322 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2323 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2324   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2325 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2326 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2327   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2328 }
VisitJdkUnsafePutByte(HInvoke * invoke)2329 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2330   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
2331 }
2332 
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)2333 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
2334                                      HInvoke* invoke,
2335                                      CodeGeneratorX86_64* codegen,
2336                                      DataType::Type type) {
2337   const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke);
2338   LocationSummary* locations =
2339       new (allocator) LocationSummary(invoke,
2340                                       can_call
2341                                           ? LocationSummary::kCallOnSlowPath
2342                                           : LocationSummary::kNoCall,
2343                                       kIntrinsified);
2344   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2345   locations->SetInAt(1, Location::RequiresRegister());
2346   locations->SetInAt(2, Location::RequiresRegister());
2347   // expected value must be in EAX/RAX.
2348   locations->SetInAt(3, Location::RegisterLocation(RAX));
2349   locations->SetInAt(4, Location::RequiresRegister());
2350 
2351   // RAX is clobbered in CMPXCHG, but we set it as out so no need to add it as temporary.
2352   locations->SetOut(Location::RegisterLocation(RAX));
2353 
2354   if (type == DataType::Type::kReference) {
2355     // Need two temporaries for MarkGCCard.
2356     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2357     locations->AddTemp(Location::RequiresRegister());
2358     if (codegen->EmitReadBarrier()) {
2359       // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
2360       DCHECK(kUseBakerReadBarrier);
2361       locations->AddTemp(Location::RequiresRegister());
2362     }
2363   }
2364 }
2365 
VisitUnsafeCASInt(HInvoke * invoke)2366 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2367   VisitJdkUnsafeCASInt(invoke);
2368 }
2369 
VisitUnsafeCASLong(HInvoke * invoke)2370 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2371   VisitJdkUnsafeCASLong(invoke);
2372 }
2373 
VisitUnsafeCASObject(HInvoke * invoke)2374 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2375   VisitJdkUnsafeCASObject(invoke);
2376 }
2377 
VisitJdkUnsafeCASInt(HInvoke * invoke)2378 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2379   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2380   VisitJdkUnsafeCompareAndSetInt(invoke);
2381 }
2382 
VisitJdkUnsafeCASLong(HInvoke * invoke)2383 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2384   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2385   VisitJdkUnsafeCompareAndSetLong(invoke);
2386 }
2387 
VisitJdkUnsafeCASObject(HInvoke * invoke)2388 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2389   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2390   VisitJdkUnsafeCompareAndSetReference(invoke);
2391 }
2392 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2393 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2394   CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt32);
2395 }
2396 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2397 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2398   CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt64);
2399 }
2400 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2401 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2402   // The only supported read barrier implementation is the Baker-style read barriers.
2403   if (codegen_->EmitNonBakerReadBarrier()) {
2404     return;
2405   }
2406 
2407   CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kReference);
2408 }
2409 
2410 // Convert ZF into the Boolean result.
GenZFlagToResult(X86_64Assembler * assembler,CpuRegister out)2411 static inline void GenZFlagToResult(X86_64Assembler* assembler, CpuRegister out) {
2412   __ setcc(kZero, out);
2413   __ movzxb(out, out);
2414 }
2415 
2416 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64 * codegen,DataType::Type type,Address field_addr,Location value,bool is_cmpxchg,bool byte_swap)2417 static void GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64* codegen,
2418                                           DataType::Type type,
2419                                           Address field_addr,
2420                                           Location value,
2421                                           bool is_cmpxchg,
2422                                           bool byte_swap) {
2423   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2424   InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2425 
2426   if (byte_swap) {
2427     instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2428     instr_codegen->Bswap(value, type);
2429   }
2430 
2431   switch (type) {
2432     case DataType::Type::kBool:
2433     case DataType::Type::kInt8:
2434       __ LockCmpxchgb(field_addr, value.AsRegister<CpuRegister>());
2435       break;
2436     case DataType::Type::kInt16:
2437     case DataType::Type::kUint16:
2438       __ LockCmpxchgw(field_addr, value.AsRegister<CpuRegister>());
2439       break;
2440     case DataType::Type::kInt32:
2441     case DataType::Type::kUint32:
2442       __ LockCmpxchgl(field_addr, value.AsRegister<CpuRegister>());
2443       break;
2444     case DataType::Type::kInt64:
2445     case DataType::Type::kUint64:
2446       __ LockCmpxchgq(field_addr, value.AsRegister<CpuRegister>());
2447       break;
2448     default:
2449       LOG(FATAL) << "Unexpected non-integral CAS type " << type;
2450   }
2451   // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2452 
2453   if (byte_swap) {
2454     // Restore byte order for value.
2455     instr_codegen->Bswap(value, type);
2456   }
2457 
2458   CpuRegister rax(RAX);
2459   if (is_cmpxchg) {
2460     if (byte_swap) {
2461       instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2462     }
2463     // Sign-extend or zero-extend the result as necessary.
2464     switch (type) {
2465       case DataType::Type::kBool:
2466         __ movzxb(rax, rax);
2467         break;
2468       case DataType::Type::kInt8:
2469         __ movsxb(rax, rax);
2470         break;
2471       case DataType::Type::kInt16:
2472         __ movsxw(rax, rax);
2473         break;
2474       case DataType::Type::kUint16:
2475         __ movzxw(rax, rax);
2476         break;
2477       default:
2478         break;  // No need to do anything.
2479     }
2480   } else {
2481     GenZFlagToResult(assembler, rax);
2482   }
2483 }
2484 
GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64 * codegen,Address field_addr,CpuRegister temp,Location value,Location expected,Location out,bool is64bit,bool is_cmpxchg,bool byte_swap)2485 static void GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64* codegen,
2486                                          Address field_addr,
2487                                          CpuRegister temp,
2488                                          Location value,
2489                                          Location expected,
2490                                          Location out,
2491                                          bool is64bit,
2492                                          bool is_cmpxchg,
2493                                          bool byte_swap) {
2494   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2495   InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2496 
2497   Location rax_loc = Location::RegisterLocation(RAX);
2498   Location temp_loc = Location::RegisterLocation(temp.AsRegister());
2499 
2500   DataType::Type type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
2501 
2502   // Copy `expected` to RAX (required by the CMPXCHG instruction).
2503   codegen->Move(rax_loc, expected);
2504 
2505   // Copy value to some other register (ensure it's not RAX).
2506   DCHECK_NE(temp.AsRegister(), RAX);
2507   codegen->Move(temp_loc, value);
2508 
2509   if (byte_swap) {
2510     instr_codegen->Bswap(rax_loc, type);
2511     instr_codegen->Bswap(temp_loc, type);
2512   }
2513 
2514   if (is64bit) {
2515     __ LockCmpxchgq(field_addr, temp);
2516   } else {
2517     __ LockCmpxchgl(field_addr, temp);
2518   }
2519   // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2520   // No need to restore byte order for temporary register.
2521 
2522   if (is_cmpxchg) {
2523     if (byte_swap) {
2524       instr_codegen->Bswap(rax_loc, type);
2525     }
2526     MoveIntToFP(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit, assembler);
2527   } else {
2528     GenZFlagToResult(assembler, out.AsRegister<CpuRegister>());
2529   }
2530 }
2531 
2532 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64 * codegen,HInvoke * invoke,CpuRegister base,CpuRegister offset,CpuRegister value,CpuRegister temp1,CpuRegister temp2,CpuRegister temp3,bool is_cmpxchg)2533 static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
2534                                           HInvoke* invoke,
2535                                           CpuRegister base,
2536                                           CpuRegister offset,
2537                                           CpuRegister value,
2538                                           CpuRegister temp1,
2539                                           CpuRegister temp2,
2540                                           CpuRegister temp3,
2541                                           bool is_cmpxchg) {
2542   // The only supported read barrier implementation is the Baker-style read barriers.
2543   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
2544 
2545   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2546 
2547   // Mark card for object assuming new value is stored.
2548   bool value_can_be_null = true;  // TODO: Worth finding out this information?
2549   codegen->MaybeMarkGCCard(temp1, temp2, base, value, value_can_be_null);
2550 
2551   Address field_addr(base, offset, TIMES_1, 0);
2552   if (codegen->EmitBakerReadBarrier()) {
2553     // Need to make sure the reference stored in the field is a to-space
2554     // one before attempting the CAS or the CAS could fail incorrectly.
2555     codegen->GenerateReferenceLoadWithBakerReadBarrier(
2556         invoke,
2557         Location::RegisterLocation(temp3.AsRegister()),
2558         base,
2559         field_addr,
2560         /* needs_null_check= */ false,
2561         /* always_update_field= */ true,
2562         &temp1,
2563         &temp2);
2564   } else {
2565     // Nothing to do, the value will be loaded into the out register by CMPXCHG.
2566   }
2567 
2568   bool base_equals_value = (base.AsRegister() == value.AsRegister());
2569   Register value_reg = value.AsRegister();
2570   if (kPoisonHeapReferences) {
2571     if (base_equals_value) {
2572       // If `base` and `value` are the same register location, move `value_reg` to a temporary
2573       // register.  This way, poisoning `value_reg` won't invalidate `base`.
2574       value_reg = temp1.AsRegister();
2575       __ movl(CpuRegister(value_reg), base);
2576     }
2577 
2578     // Check that the register allocator did not assign the location of expected value (RAX) to
2579     // `value` nor to `base`, so that heap poisoning (when enabled) works as intended below.
2580     // - If `value` were equal to RAX, both references would be poisoned twice, meaning they would
2581     //   not be poisoned at all, as heap poisoning uses address negation.
2582     // - If `base` were equal to RAX, poisoning RAX would invalidate `base`.
2583     DCHECK_NE(RAX, value_reg);
2584     DCHECK_NE(RAX, base.AsRegister());
2585 
2586     __ PoisonHeapReference(CpuRegister(RAX));
2587     __ PoisonHeapReference(CpuRegister(value_reg));
2588   }
2589 
2590   __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
2591   // LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
2592 
2593   if (is_cmpxchg) {
2594     // Output is in RAX, so we can rely on CMPXCHG and do nothing.
2595     __ MaybeUnpoisonHeapReference(CpuRegister(RAX));
2596   } else {
2597     GenZFlagToResult(assembler, CpuRegister(RAX));
2598   }
2599 
2600   // If heap poisoning is enabled, we need to unpoison the values that were poisoned earlier.
2601   if (kPoisonHeapReferences) {
2602     if (base_equals_value) {
2603       // `value_reg` has been moved to a temporary register, no need to unpoison it.
2604     } else {
2605       // Ensure `value` is not RAX, so that unpoisoning the former does not invalidate the latter.
2606       DCHECK_NE(RAX, value_reg);
2607       __ UnpoisonHeapReference(CpuRegister(value_reg));
2608     }
2609   }
2610 }
2611 
2612 // In debug mode, return true if all registers are pairwise different. In release mode, do nothing
2613 // and always return true.
RegsAreAllDifferent(const std::vector<CpuRegister> & regs)2614 static bool RegsAreAllDifferent(const std::vector<CpuRegister>& regs) {
2615   if (kIsDebugBuild) {
2616     for (size_t i = 0; i < regs.size(); ++i) {
2617       for (size_t j = 0; j < i; ++j) {
2618         if (regs[i].AsRegister() == regs[j].AsRegister()) {
2619           return false;
2620         }
2621       }
2622     }
2623   }
2624   return true;
2625 }
2626 
2627 // GenCompareAndSetOrExchange handles all value types and therefore accepts generic locations and
2628 // temporary indices that may not correspond to real registers for code paths that do not use them.
GenCompareAndSetOrExchange(CodeGeneratorX86_64 * codegen,HInvoke * invoke,DataType::Type type,CpuRegister base,CpuRegister offset,uint32_t temp1_index,uint32_t temp2_index,uint32_t temp3_index,Location new_value,Location expected,Location out,bool is_cmpxchg,bool byte_swap)2629 static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen,
2630                                        HInvoke* invoke,
2631                                        DataType::Type type,
2632                                        CpuRegister base,
2633                                        CpuRegister offset,
2634                                        uint32_t temp1_index,
2635                                        uint32_t temp2_index,
2636                                        uint32_t temp3_index,
2637                                        Location new_value,
2638                                        Location expected,
2639                                        Location out,
2640                                        bool is_cmpxchg,
2641                                        bool byte_swap) {
2642   LocationSummary* locations = invoke->GetLocations();
2643   Address field_address(base, offset, TIMES_1, 0);
2644 
2645   if (DataType::IsFloatingPointType(type)) {
2646     bool is64bit = (type == DataType::Type::kFloat64);
2647     CpuRegister temp = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2648     DCHECK(RegsAreAllDifferent({base, offset, temp, CpuRegister(RAX)}));
2649 
2650     GenCompareAndSetOrExchangeFP(
2651         codegen, field_address, temp, new_value, expected, out, is64bit, is_cmpxchg, byte_swap);
2652   } else {
2653     // Both the expected value for CMPXCHG and the output are in RAX.
2654     DCHECK_EQ(RAX, expected.AsRegister<Register>());
2655     DCHECK_EQ(RAX, out.AsRegister<Register>());
2656 
2657     if (type == DataType::Type::kReference) {
2658       CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>();
2659       CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2660       CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>();
2661       CpuRegister temp3 = codegen->EmitReadBarrier()
2662           ? locations->GetTemp(temp3_index).AsRegister<CpuRegister>()
2663           : CpuRegister(kNoRegister);
2664       DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3}));
2665 
2666       DCHECK(!byte_swap);
2667       GenCompareAndSetOrExchangeRef(
2668           codegen, invoke, base, offset, new_value_reg, temp1, temp2, temp3, is_cmpxchg);
2669     } else {
2670       GenCompareAndSetOrExchangeInt(codegen, type, field_address, new_value, is_cmpxchg, byte_swap);
2671     }
2672   }
2673 }
2674 
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2675 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2676   LocationSummary* locations = invoke->GetLocations();
2677   GenCompareAndSetOrExchange(codegen,
2678                              invoke,
2679                              type,
2680                              /*base=*/ locations->InAt(1).AsRegister<CpuRegister>(),
2681                              /*offset=*/ locations->InAt(2).AsRegister<CpuRegister>(),
2682                              /*temp1_index=*/ 0,
2683                              /*temp2_index=*/ 1,
2684                              /*temp3_index=*/ 2,
2685                              /*new_value=*/ locations->InAt(4),
2686                              /*expected=*/ locations->InAt(3),
2687                              locations->Out(),
2688                              /*is_cmpxchg=*/ false,
2689                              /*byte_swap=*/ false);
2690 }
2691 
VisitUnsafeCASInt(HInvoke * invoke)2692 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2693   VisitJdkUnsafeCASInt(invoke);
2694 }
2695 
VisitUnsafeCASLong(HInvoke * invoke)2696 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2697   VisitJdkUnsafeCASLong(invoke);
2698 }
2699 
VisitUnsafeCASObject(HInvoke * invoke)2700 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2701   VisitJdkUnsafeCASObject(invoke);
2702 }
2703 
VisitJdkUnsafeCASInt(HInvoke * invoke)2704 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2705   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2706   VisitJdkUnsafeCompareAndSetInt(invoke);
2707 }
2708 
VisitJdkUnsafeCASLong(HInvoke * invoke)2709 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2710   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2711   VisitJdkUnsafeCompareAndSetLong(invoke);
2712 }
2713 
VisitJdkUnsafeCASObject(HInvoke * invoke)2714 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2715   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2716   VisitJdkUnsafeCompareAndSetReference(invoke);
2717 }
2718 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2719 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2720   GenCAS(DataType::Type::kInt32, invoke, codegen_);
2721 }
2722 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2723 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2724   GenCAS(DataType::Type::kInt64, invoke, codegen_);
2725 }
2726 
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2727 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2728   // The only supported read barrier implementation is the Baker-style read barriers.
2729   DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2730 
2731   GenCAS(DataType::Type::kReference, invoke, codegen_);
2732 }
2733 
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2734 static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
2735                                               HInvoke* invoke,
2736                                               CodeGeneratorX86_64* codegen) {
2737   const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
2738   LocationSummary* locations =
2739       new (allocator) LocationSummary(invoke,
2740                                       can_call
2741                                           ? LocationSummary::kCallOnSlowPath
2742                                           : LocationSummary::kNoCall,
2743                                       kIntrinsified);
2744   if (can_call && kUseBakerReadBarrier) {
2745     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2746   }
2747   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2748   locations->SetInAt(1, Location::RequiresRegister());
2749   locations->SetInAt(2, Location::RequiresRegister());
2750   // Use the same register for both the output and the new value or addend
2751   // to take advantage of XCHG or XADD. Arbitrarily pick RAX.
2752   locations->SetInAt(3, Location::RegisterLocation(RAX));
2753   // Only set the `out` register if it's needed. In the void case we can still use RAX in the
2754   // same manner as it is marked as a temp register.
2755   if (invoke->GetType() == DataType::Type::kVoid) {
2756     locations->AddTemp(Location::RegisterLocation(RAX));
2757   } else {
2758     locations->SetOut(Location::RegisterLocation(RAX));
2759   }
2760 }
2761 
VisitUnsafeGetAndAddInt(HInvoke * invoke)2762 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2763   VisitJdkUnsafeGetAndAddInt(invoke);
2764 }
2765 
VisitUnsafeGetAndAddLong(HInvoke * invoke)2766 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2767   VisitJdkUnsafeGetAndAddLong(invoke);
2768 }
2769 
VisitUnsafeGetAndSetInt(HInvoke * invoke)2770 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2771   VisitJdkUnsafeGetAndSetInt(invoke);
2772 }
2773 
VisitUnsafeGetAndSetLong(HInvoke * invoke)2774 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2775   VisitJdkUnsafeGetAndSetLong(invoke);
2776 }
2777 
VisitUnsafeGetAndSetObject(HInvoke * invoke)2778 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2779   VisitJdkUnsafeGetAndSetReference(invoke);
2780 }
2781 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2782 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2783   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2784 }
2785 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2786 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2787   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2788 }
2789 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2790 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2791   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2792 }
2793 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2794 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2795   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2796 }
2797 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2798 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2799   // The only supported read barrier implementation is the Baker-style read barriers.
2800   if (codegen_->EmitNonBakerReadBarrier()) {
2801     return;
2802   }
2803 
2804   CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2805   invoke->GetLocations()->AddRegisterTemps(3);
2806 }
2807 
2808 enum class GetAndUpdateOp {
2809   kSet,
2810   kAdd,
2811   kBitwiseAnd,
2812   kBitwiseOr,
2813   kBitwiseXor
2814 };
2815 
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op)2816 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
2817                                   DataType::Type type,
2818                                   CodeGeneratorX86_64* codegen,
2819                                   GetAndUpdateOp get_and_update_op) {
2820   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2821   LocationSummary* locations = invoke->GetLocations();
2822 
2823   const bool is_void = invoke->GetType() == DataType::Type::kVoid;
2824   Location rax_loc = Location::RegisterLocation(RAX);
2825   // We requested RAX to use as a temporary for void methods, as we don't return the value.
2826   DCHECK_IMPLIES(!is_void, locations->Out().Equals(rax_loc));
2827   CpuRegister out_or_temp = rax_loc.AsRegister<CpuRegister>();           // Result.
2828   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();       // Object pointer.
2829   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();     // Long offset.
2830   DCHECK_EQ(out_or_temp, locations->InAt(3).AsRegister<CpuRegister>());  // New value or addend.
2831   Address field_address(base, offset, TIMES_1, 0);
2832 
2833   if (type == DataType::Type::kInt32) {
2834     if (get_and_update_op == GetAndUpdateOp::kAdd) {
2835       __ LockXaddl(field_address, out_or_temp);
2836     } else {
2837       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2838       __ xchgl(out_or_temp, field_address);
2839     }
2840   } else if (type == DataType::Type::kInt64) {
2841     if (get_and_update_op == GetAndUpdateOp::kAdd) {
2842       __ LockXaddq(field_address, out_or_temp);
2843     } else {
2844       DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2845       __ xchgq(out_or_temp, field_address);
2846     }
2847   } else {
2848     DCHECK_EQ(type, DataType::Type::kReference);
2849     DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2850 
2851     // In the void case, we have an extra temp register, which is used to signal the register
2852     // allocator that we are clobering RAX.
2853     const uint32_t extra_temp = is_void ? 1u : 0u;
2854     DCHECK_EQ(locations->GetTempCount(), 3u + extra_temp);
2855     DCHECK_IMPLIES(is_void, locations->GetTemp(0u).Equals(Location::RegisterLocation(RAX)));
2856 
2857     CpuRegister temp1 = locations->GetTemp(0u + extra_temp).AsRegister<CpuRegister>();
2858     CpuRegister temp2 = locations->GetTemp(1u + extra_temp).AsRegister<CpuRegister>();
2859     CpuRegister temp3 = locations->GetTemp(2u + extra_temp).AsRegister<CpuRegister>();
2860 
2861     if (codegen->EmitReadBarrier()) {
2862       DCHECK(kUseBakerReadBarrier);
2863       // Ensure that the field contains a to-space reference.
2864       codegen->GenerateReferenceLoadWithBakerReadBarrier(
2865           invoke,
2866           Location::RegisterLocation(temp3.AsRegister()),
2867           base,
2868           field_address,
2869           /*needs_null_check=*/ false,
2870           /*always_update_field=*/ true,
2871           &temp1,
2872           &temp2);
2873     }
2874 
2875     // Mark card for object as a new value shall be stored.
2876     bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
2877     codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out_or_temp, new_value_can_be_null);
2878 
2879     if (kPoisonHeapReferences) {
2880       // Use a temp to avoid poisoning base of the field address, which might happen if `out`
2881       // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
2882       __ movl(temp1, out_or_temp);
2883       __ PoisonHeapReference(temp1);
2884       __ xchgl(temp1, field_address);
2885       if (!is_void) {
2886         __ UnpoisonHeapReference(temp1);
2887         __ movl(out_or_temp, temp1);
2888       }
2889     } else {
2890       __ xchgl(out_or_temp, field_address);
2891     }
2892   }
2893 }
2894 
VisitUnsafeGetAndAddInt(HInvoke * invoke)2895 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2896   VisitJdkUnsafeGetAndAddInt(invoke);
2897 }
2898 
VisitUnsafeGetAndAddLong(HInvoke * invoke)2899 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2900   VisitJdkUnsafeGetAndAddLong(invoke);
2901 }
2902 
VisitUnsafeGetAndSetInt(HInvoke * invoke)2903 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2904   VisitJdkUnsafeGetAndSetInt(invoke);
2905 }
2906 
VisitUnsafeGetAndSetLong(HInvoke * invoke)2907 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2908   VisitJdkUnsafeGetAndSetLong(invoke);
2909 }
2910 
VisitUnsafeGetAndSetObject(HInvoke * invoke)2911 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2912   VisitJdkUnsafeGetAndSetReference(invoke);
2913 }
2914 
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2915 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2916   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
2917 }
2918 
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2919 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2920   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
2921 }
2922 
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2923 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2924   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
2925 }
2926 
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2927 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2928   GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
2929 }
2930 
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2931 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2932   GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
2933 }
2934 
VisitIntegerReverse(HInvoke * invoke)2935 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2936   LocationSummary* locations =
2937       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2938   locations->SetInAt(0, Location::RequiresRegister());
2939   locations->SetOut(Location::SameAsFirstInput());
2940   locations->AddTemp(Location::RequiresRegister());
2941 }
2942 
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2943 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2944                      X86_64Assembler* assembler) {
2945   Immediate imm_shift(shift);
2946   Immediate imm_mask(mask);
2947   __ movl(temp, reg);
2948   __ shrl(reg, imm_shift);
2949   __ andl(temp, imm_mask);
2950   __ andl(reg, imm_mask);
2951   __ shll(temp, imm_shift);
2952   __ orl(reg, temp);
2953 }
2954 
VisitIntegerReverse(HInvoke * invoke)2955 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2956   X86_64Assembler* assembler = GetAssembler();
2957   LocationSummary* locations = invoke->GetLocations();
2958 
2959   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2960   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2961 
2962   /*
2963    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2964    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2965    * compared to generic luni implementation which has 5 rounds of swapping bits.
2966    * x = bswap x
2967    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2968    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2969    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2970    */
2971   __ bswapl(reg);
2972   SwapBits(reg, temp, 1, 0x55555555, assembler);
2973   SwapBits(reg, temp, 2, 0x33333333, assembler);
2974   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2975 }
2976 
VisitLongReverse(HInvoke * invoke)2977 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2978   LocationSummary* locations =
2979       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2980   locations->SetInAt(0, Location::RequiresRegister());
2981   locations->SetOut(Location::SameAsFirstInput());
2982   locations->AddRegisterTemps(2);
2983 }
2984 
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2985 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2986                        int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2987   Immediate imm_shift(shift);
2988   __ movq(temp_mask, Immediate(mask));
2989   __ movq(temp, reg);
2990   __ shrq(reg, imm_shift);
2991   __ andq(temp, temp_mask);
2992   __ andq(reg, temp_mask);
2993   __ shlq(temp, imm_shift);
2994   __ orq(reg, temp);
2995 }
2996 
VisitLongReverse(HInvoke * invoke)2997 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2998   X86_64Assembler* assembler = GetAssembler();
2999   LocationSummary* locations = invoke->GetLocations();
3000 
3001   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
3002   CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
3003   CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
3004 
3005   /*
3006    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
3007    * swapping bits to reverse bits in a long number x. Using bswap to save instructions
3008    * compared to generic luni implementation which has 5 rounds of swapping bits.
3009    * x = bswap x
3010    * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
3011    * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
3012    * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
3013    */
3014   __ bswapq(reg);
3015   SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
3016   SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
3017   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
3018 }
3019 
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen,HInvoke * invoke)3020 static void CreateBitCountLocations(
3021     ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
3022   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
3023     // Do nothing if there is no popcnt support. This results in generating
3024     // a call for the intrinsic rather than direct code.
3025     return;
3026   }
3027   LocationSummary* locations =
3028       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3029   locations->SetInAt(0, Location::Any());
3030   locations->SetOut(Location::RequiresRegister());
3031 }
3032 
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3033 static void GenBitCount(X86_64Assembler* assembler,
3034                         CodeGeneratorX86_64* codegen,
3035                         HInvoke* invoke,
3036                         bool is_long) {
3037   LocationSummary* locations = invoke->GetLocations();
3038   Location src = locations->InAt(0);
3039   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3040 
3041   if (invoke->InputAt(0)->IsConstant()) {
3042     // Evaluate this at compile time.
3043     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3044     int32_t result = is_long
3045         ? POPCOUNT(static_cast<uint64_t>(value))
3046         : POPCOUNT(static_cast<uint32_t>(value));
3047     codegen->Load32BitValue(out, result);
3048     return;
3049   }
3050 
3051   if (src.IsRegister()) {
3052     if (is_long) {
3053       __ popcntq(out, src.AsRegister<CpuRegister>());
3054     } else {
3055       __ popcntl(out, src.AsRegister<CpuRegister>());
3056     }
3057   } else if (is_long) {
3058     DCHECK(src.IsDoubleStackSlot());
3059     __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3060   } else {
3061     DCHECK(src.IsStackSlot());
3062     __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3063   }
3064 }
3065 
VisitIntegerBitCount(HInvoke * invoke)3066 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
3067   CreateBitCountLocations(allocator_, codegen_, invoke);
3068 }
3069 
VisitIntegerBitCount(HInvoke * invoke)3070 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
3071   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3072 }
3073 
VisitLongBitCount(HInvoke * invoke)3074 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
3075   CreateBitCountLocations(allocator_, codegen_, invoke);
3076 }
3077 
VisitLongBitCount(HInvoke * invoke)3078 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
3079   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3080 }
3081 
CreateOneBitLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_high)3082 static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) {
3083   LocationSummary* locations =
3084       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3085   locations->SetInAt(0, Location::Any());
3086   locations->SetOut(Location::RequiresRegister());
3087   locations->AddTemp(is_high ? Location::RegisterLocation(RCX)  // needs CL
3088                              : Location::RequiresRegister());  // any will do
3089 }
3090 
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)3091 static void GenOneBit(X86_64Assembler* assembler,
3092                       CodeGeneratorX86_64* codegen,
3093                       HInvoke* invoke,
3094                       bool is_high, bool is_long) {
3095   LocationSummary* locations = invoke->GetLocations();
3096   Location src = locations->InAt(0);
3097   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3098 
3099   if (invoke->InputAt(0)->IsConstant()) {
3100     // Evaluate this at compile time.
3101     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3102     if (value == 0) {
3103       __ xorl(out, out);  // Clears upper bits too.
3104       return;
3105     }
3106     // Nonzero value.
3107     if (is_high) {
3108       value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
3109                       : 31 - CLZ(static_cast<uint32_t>(value));
3110     } else {
3111       value = is_long ? CTZ(static_cast<uint64_t>(value))
3112                       : CTZ(static_cast<uint32_t>(value));
3113     }
3114     if (is_long) {
3115       codegen->Load64BitValue(out, 1ULL << value);
3116     } else {
3117       codegen->Load32BitValue(out, 1 << value);
3118     }
3119     return;
3120   }
3121 
3122   // Handle the non-constant cases.
3123   if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() &&
3124       src.IsRegister()) {
3125       __ blsi(out, src.AsRegister<CpuRegister>());
3126   } else {
3127     CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3128     if (is_high) {
3129       // Use architectural support: basically 1 << bsr.
3130       if (src.IsRegister()) {
3131         if (is_long) {
3132           __ bsrq(tmp, src.AsRegister<CpuRegister>());
3133         } else {
3134           __ bsrl(tmp, src.AsRegister<CpuRegister>());
3135         }
3136       } else if (is_long) {
3137         DCHECK(src.IsDoubleStackSlot());
3138         __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3139       } else {
3140         DCHECK(src.IsStackSlot());
3141         __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3142       }
3143       // BSR sets ZF if the input was zero.
3144       NearLabel is_zero, done;
3145       __ j(kEqual, &is_zero);
3146       __ movl(out, Immediate(1));  // Clears upper bits too.
3147       if (is_long) {
3148         __ shlq(out, tmp);
3149       } else {
3150         __ shll(out, tmp);
3151       }
3152       __ jmp(&done);
3153       __ Bind(&is_zero);
3154       __ xorl(out, out);  // Clears upper bits too.
3155       __ Bind(&done);
3156     } else  {
3157       // Copy input into temporary.
3158       if (src.IsRegister()) {
3159         if (is_long) {
3160           __ movq(tmp, src.AsRegister<CpuRegister>());
3161         } else {
3162           __ movl(tmp, src.AsRegister<CpuRegister>());
3163         }
3164       } else if (is_long) {
3165         DCHECK(src.IsDoubleStackSlot());
3166         __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3167       } else {
3168         DCHECK(src.IsStackSlot());
3169         __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3170       }
3171       // Do the bit twiddling: basically tmp & -tmp;
3172       if (is_long) {
3173         __ movq(out, tmp);
3174         __ negq(tmp);
3175         __ andq(out, tmp);
3176       } else {
3177         __ movl(out, tmp);
3178         __ negl(tmp);
3179         __ andl(out, tmp);
3180       }
3181     }
3182   }
3183 }
3184 
VisitIntegerHighestOneBit(HInvoke * invoke)3185 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
3186   CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
3187 }
3188 
VisitIntegerHighestOneBit(HInvoke * invoke)3189 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
3190   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false);
3191 }
3192 
VisitLongHighestOneBit(HInvoke * invoke)3193 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
3194   CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
3195 }
3196 
VisitLongHighestOneBit(HInvoke * invoke)3197 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
3198   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true);
3199 }
3200 
VisitIntegerLowestOneBit(HInvoke * invoke)3201 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
3202   CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
3203 }
3204 
VisitIntegerLowestOneBit(HInvoke * invoke)3205 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
3206   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false);
3207 }
3208 
VisitLongLowestOneBit(HInvoke * invoke)3209 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
3210   CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
3211 }
3212 
VisitLongLowestOneBit(HInvoke * invoke)3213 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
3214   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true);
3215 }
3216 
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)3217 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3218   LocationSummary* locations =
3219       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3220   locations->SetInAt(0, Location::Any());
3221   locations->SetOut(Location::RequiresRegister());
3222 }
3223 
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3224 static void GenLeadingZeros(X86_64Assembler* assembler,
3225                             CodeGeneratorX86_64* codegen,
3226                             HInvoke* invoke, bool is_long) {
3227   LocationSummary* locations = invoke->GetLocations();
3228   Location src = locations->InAt(0);
3229   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3230 
3231   int zero_value_result = is_long ? 64 : 32;
3232   if (invoke->InputAt(0)->IsConstant()) {
3233     // Evaluate this at compile time.
3234     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3235     if (value == 0) {
3236       value = zero_value_result;
3237     } else {
3238       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
3239     }
3240     codegen->Load32BitValue(out, value);
3241     return;
3242   }
3243 
3244   // Handle the non-constant cases.
3245   if (src.IsRegister()) {
3246     if (is_long) {
3247       __ bsrq(out, src.AsRegister<CpuRegister>());
3248     } else {
3249       __ bsrl(out, src.AsRegister<CpuRegister>());
3250     }
3251   } else if (is_long) {
3252     DCHECK(src.IsDoubleStackSlot());
3253     __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3254   } else {
3255     DCHECK(src.IsStackSlot());
3256     __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3257   }
3258 
3259   // BSR sets ZF if the input was zero, and the output is undefined.
3260   NearLabel is_zero, done;
3261   __ j(kEqual, &is_zero);
3262 
3263   // Correct the result from BSR to get the CLZ result.
3264   __ xorl(out, Immediate(zero_value_result - 1));
3265   __ jmp(&done);
3266 
3267   // Fix the zero case with the expected result.
3268   __ Bind(&is_zero);
3269   __ movl(out, Immediate(zero_value_result));
3270 
3271   __ Bind(&done);
3272 }
3273 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3274 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3275   CreateLeadingZeroLocations(allocator_, invoke);
3276 }
3277 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3278 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3279   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3280 }
3281 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3282 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3283   CreateLeadingZeroLocations(allocator_, invoke);
3284 }
3285 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3286 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3287   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3288 }
3289 
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)3290 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3291   LocationSummary* locations =
3292       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3293   locations->SetInAt(0, Location::Any());
3294   locations->SetOut(Location::RequiresRegister());
3295 }
3296 
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3297 static void GenTrailingZeros(X86_64Assembler* assembler,
3298                              CodeGeneratorX86_64* codegen,
3299                              HInvoke* invoke, bool is_long) {
3300   LocationSummary* locations = invoke->GetLocations();
3301   Location src = locations->InAt(0);
3302   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3303 
3304   int zero_value_result = is_long ? 64 : 32;
3305   if (invoke->InputAt(0)->IsConstant()) {
3306     // Evaluate this at compile time.
3307     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3308     if (value == 0) {
3309       value = zero_value_result;
3310     } else {
3311       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
3312     }
3313     codegen->Load32BitValue(out, value);
3314     return;
3315   }
3316 
3317   // Handle the non-constant cases.
3318   if (src.IsRegister()) {
3319     if (is_long) {
3320       __ bsfq(out, src.AsRegister<CpuRegister>());
3321     } else {
3322       __ bsfl(out, src.AsRegister<CpuRegister>());
3323     }
3324   } else if (is_long) {
3325     DCHECK(src.IsDoubleStackSlot());
3326     __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3327   } else {
3328     DCHECK(src.IsStackSlot());
3329     __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3330   }
3331 
3332   // BSF sets ZF if the input was zero, and the output is undefined.
3333   NearLabel done;
3334   __ j(kNotEqual, &done);
3335 
3336   // Fix the zero case with the expected result.
3337   __ movl(out, Immediate(zero_value_result));
3338 
3339   __ Bind(&done);
3340 }
3341 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3342 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3343   CreateTrailingZeroLocations(allocator_, invoke);
3344 }
3345 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3346 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3347   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3348 }
3349 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3350 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3351   CreateTrailingZeroLocations(allocator_, invoke);
3352 }
3353 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3354 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3355   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3356 }
3357 
3358 #define VISIT_INTRINSIC(name, low, high, type, start_index)                              \
3359   void IntrinsicLocationsBuilderX86_64::Visit##name##ValueOf(HInvoke* invoke) {          \
3360     InvokeRuntimeCallingConvention calling_convention;                                   \
3361     IntrinsicVisitor::ComputeValueOfLocations(                                           \
3362         invoke,                                                                          \
3363         codegen_,                                                                        \
3364         low,                                                                             \
3365         (high) - (low) + 1,                                                              \
3366         Location::RegisterLocation(RAX),                                                 \
3367         Location::RegisterLocation(calling_convention.GetRegisterAt(0)));                \
3368   }                                                                                      \
3369   void IntrinsicCodeGeneratorX86_64::Visit##name##ValueOf(HInvoke* invoke) {             \
3370     IntrinsicVisitor::ValueOfInfo info =                                                 \
3371         IntrinsicVisitor::ComputeValueOfInfo(invoke,                                     \
3372                                              codegen_->GetCompilerOptions(),             \
3373                                              WellKnownClasses::java_lang_##name##_value, \
3374                                              low,                                        \
3375                                              (high) - (low) + 1,                         \
3376                                              start_index);                               \
3377     HandleValueOf(invoke, info, type);                                                   \
3378   }
BOXED_TYPES(VISIT_INTRINSIC)3379   BOXED_TYPES(VISIT_INTRINSIC)
3380 #undef VISIT_INTRINSIC
3381 
3382 template <typename T>
3383 static void Store(X86_64Assembler* assembler,
3384                   DataType::Type primitive_type,
3385                   const Address& address,
3386                   const T& operand) {
3387   switch (primitive_type) {
3388     case DataType::Type::kInt8:
3389     case DataType::Type::kUint8: {
3390       __ movb(address, operand);
3391       break;
3392     }
3393     case DataType::Type::kInt16:
3394     case DataType::Type::kUint16: {
3395       __ movw(address, operand);
3396       break;
3397     }
3398     case DataType::Type::kInt32: {
3399       __ movl(address, operand);
3400       break;
3401     }
3402     default: {
3403       LOG(FATAL) << "Unrecognized ValueOf type " << primitive_type;
3404     }
3405   }
3406 }
3407 
HandleValueOf(HInvoke * invoke,const IntrinsicVisitor::ValueOfInfo & info,DataType::Type type)3408 void IntrinsicCodeGeneratorX86_64::HandleValueOf(HInvoke* invoke,
3409                                                  const IntrinsicVisitor::ValueOfInfo& info,
3410                                                  DataType::Type type) {
3411   LocationSummary* locations = invoke->GetLocations();
3412   X86_64Assembler* assembler = GetAssembler();
3413 
3414   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3415   InvokeRuntimeCallingConvention calling_convention;
3416   CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
3417   auto allocate_instance = [&]() {
3418     codegen_->LoadIntrinsicDeclaringClass(argument, invoke);
3419     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke);
3420     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3421   };
3422   if (invoke->InputAt(0)->IsIntConstant()) {
3423     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3424     if (static_cast<uint32_t>(value - info.low) < info.length) {
3425       // Just embed the object in the code.
3426       DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
3427       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
3428     } else {
3429       DCHECK(locations->CanCall());
3430       // Allocate and initialize a new object.
3431       // TODO: If we JIT, we could allocate the boxed value now, and store it in the
3432       // JIT object table.
3433       allocate_instance();
3434       Store(assembler, type, Address(out, info.value_offset), Immediate(value));
3435     }
3436   } else {
3437     DCHECK(locations->CanCall());
3438     CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
3439     // Check bounds of our cache.
3440     __ leal(out, Address(in, -info.low));
3441     __ cmpl(out, Immediate(info.length));
3442     NearLabel allocate, done;
3443     __ j(kAboveEqual, &allocate);
3444     // If the value is within the bounds, load the boxed value directly from the array.
3445     DCHECK_NE(out.AsRegister(), argument.AsRegister());
3446     codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference);
3447     static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3448                   "Check heap reference size.");
3449     __ movl(out, Address(argument, out, TIMES_4, 0));
3450     __ MaybeUnpoisonHeapReference(out);
3451     __ jmp(&done);
3452     __ Bind(&allocate);
3453     // Otherwise allocate and initialize a new object.
3454     allocate_instance();
3455     Store(assembler, type, Address(out, info.value_offset), in);
3456     __ Bind(&done);
3457   }
3458 }
3459 
VisitReferenceGetReferent(HInvoke * invoke)3460 void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3461   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3462 }
3463 
VisitReferenceGetReferent(HInvoke * invoke)3464 void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3465   X86_64Assembler* assembler = GetAssembler();
3466   LocationSummary* locations = invoke->GetLocations();
3467 
3468   Location obj = locations->InAt(0);
3469   Location out = locations->Out();
3470 
3471   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
3472   codegen_->AddSlowPath(slow_path);
3473 
3474   if (codegen_->EmitReadBarrier()) {
3475     // Check self->GetWeakRefAccessEnabled().
3476     ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
3477     __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true),
3478                   Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3479     __ j(kNotEqual, slow_path->GetEntryLabel());
3480   }
3481 
3482   // Load the java.lang.ref.Reference class, use the output register as a temporary.
3483   codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<CpuRegister>(), invoke);
3484 
3485   // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3486   MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3487   DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3488   DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3489             IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3490   __ cmpw(Address(out.AsRegister<CpuRegister>(), disable_intrinsic_offset.Uint32Value()),
3491           Immediate(0));
3492   __ j(kNotEqual, slow_path->GetEntryLabel());
3493 
3494   // Load the value from the field.
3495   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3496   if (codegen_->EmitBakerReadBarrier()) {
3497     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3498                                                     out,
3499                                                     obj.AsRegister<CpuRegister>(),
3500                                                     referent_offset,
3501                                                     /*needs_null_check=*/ true);
3502     // Note that the fence is a no-op, thanks to the x86-64 memory model.
3503     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3504   } else {
3505     __ movl(out.AsRegister<CpuRegister>(), Address(obj.AsRegister<CpuRegister>(), referent_offset));
3506     codegen_->MaybeRecordImplicitNullCheck(invoke);
3507     // Note that the fence is a no-op, thanks to the x86-64 memory model.
3508     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3509     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3510   }
3511   __ Bind(slow_path->GetExitLabel());
3512 }
3513 
VisitReferenceRefersTo(HInvoke * invoke)3514 void IntrinsicLocationsBuilderX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3515   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
3516 }
3517 
VisitReferenceRefersTo(HInvoke * invoke)3518 void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3519   X86_64Assembler* assembler = GetAssembler();
3520   LocationSummary* locations = invoke->GetLocations();
3521 
3522   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
3523   CpuRegister other = locations->InAt(1).AsRegister<CpuRegister>();
3524   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3525 
3526   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3527   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3528 
3529   __ movl(out, Address(obj, referent_offset));
3530   codegen_->MaybeRecordImplicitNullCheck(invoke);
3531   __ MaybeUnpoisonHeapReference(out);
3532   // Note that the fence is a no-op, thanks to the x86-64 memory model.
3533   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3534 
3535   __ cmpl(out, other);
3536 
3537   if (codegen_->EmitReadBarrier()) {
3538     DCHECK(kUseBakerReadBarrier);
3539 
3540     NearLabel calculate_result;
3541     __ j(kEqual, &calculate_result);  // ZF set if taken.
3542 
3543     // Check if the loaded reference is null in a way that leaves ZF clear for null.
3544     __ cmpl(out, Immediate(1));
3545     __ j(kBelow, &calculate_result);  // ZF clear if taken.
3546 
3547     // For correct memory visibility, we need a barrier before loading the lock word
3548     // but we already have the barrier emitted for volatile load above which is sufficient.
3549 
3550     // Load the lockword and check if it is a forwarding address.
3551     static_assert(LockWord::kStateShift == 30u);
3552     static_assert(LockWord::kStateForwardingAddress == 3u);
3553     __ movl(out, Address(out, monitor_offset));
3554     __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3555     __ j(kBelow, &calculate_result);   // ZF clear if taken.
3556 
3557     // Extract the forwarding address and compare with `other`.
3558     __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3559     __ cmpl(out, other);
3560 
3561     __ Bind(&calculate_result);
3562   }
3563 
3564   // Convert ZF into the Boolean result.
3565   __ setcc(kEqual, out);
3566   __ movzxb(out, out);
3567 }
3568 
VisitThreadInterrupted(HInvoke * invoke)3569 void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3570   LocationSummary* locations =
3571       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3572   locations->SetOut(Location::RequiresRegister());
3573 }
3574 
VisitThreadInterrupted(HInvoke * invoke)3575 void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3576   X86_64Assembler* assembler = GetAssembler();
3577   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
3578   Address address = Address::Absolute
3579       (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true);
3580   NearLabel done;
3581   __ gs()->movl(out, address);
3582   __ testl(out, out);
3583   __ j(kEqual, &done);
3584   __ gs()->movl(address, Immediate(0));
3585   codegen_->MemoryFence();
3586   __ Bind(&done);
3587 }
3588 
VisitReachabilityFence(HInvoke * invoke)3589 void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
3590   LocationSummary* locations =
3591       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3592   locations->SetInAt(0, Location::Any());
3593 }
3594 
VisitReachabilityFence(HInvoke * invoke)3595 void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
3596 
CreateDivideUnsignedLocations(HInvoke * invoke,ArenaAllocator * allocator)3597 static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) {
3598   LocationSummary* locations =
3599       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3600   locations->SetInAt(0, Location::RegisterLocation(RAX));
3601   locations->SetInAt(1, Location::RequiresRegister());
3602   locations->SetOut(Location::SameAsFirstInput());
3603   // Intel uses edx:eax as the dividend.
3604   locations->AddTemp(Location::RegisterLocation(RDX));
3605 }
3606 
GenerateDivideUnsigned(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type data_type)3607 static void GenerateDivideUnsigned(HInvoke* invoke,
3608                                    CodeGeneratorX86_64* codegen,
3609                                    DataType::Type data_type) {
3610   LocationSummary* locations = invoke->GetLocations();
3611   Location out = locations->Out();
3612   Location first = locations->InAt(0);
3613   Location second = locations->InAt(1);
3614   CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3615   CpuRegister second_reg = second.AsRegister<CpuRegister>();
3616 
3617   DCHECK_EQ(RAX, first.AsRegister<Register>());
3618   DCHECK_EQ(RAX, out.AsRegister<Register>());
3619   DCHECK_EQ(RDX, rdx.AsRegister());
3620 
3621   // We check if the divisor is zero and bail to the slow path to handle if so.
3622   auto* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
3623   codegen->AddSlowPath(slow_path);
3624 
3625   X86_64Assembler* assembler = codegen->GetAssembler();
3626   if (data_type == DataType::Type::kInt32) {
3627     __ testl(second_reg, second_reg);
3628     __ j(kEqual, slow_path->GetEntryLabel());
3629     __ xorl(rdx, rdx);
3630     __ divl(second_reg);
3631   } else {
3632     DCHECK(data_type == DataType::Type::kInt64);
3633     __ testq(second_reg, second_reg);
3634     __ j(kEqual, slow_path->GetEntryLabel());
3635     __ xorq(rdx, rdx);
3636     __ divq(second_reg);
3637   }
3638   __ Bind(slow_path->GetExitLabel());
3639 }
3640 
VisitIntegerDivideUnsigned(HInvoke * invoke)3641 void IntrinsicLocationsBuilderX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3642   CreateDivideUnsignedLocations(invoke, allocator_);
3643 }
3644 
VisitIntegerDivideUnsigned(HInvoke * invoke)3645 void IntrinsicCodeGeneratorX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3646   GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt32);
3647 }
3648 
VisitLongDivideUnsigned(HInvoke * invoke)3649 void IntrinsicLocationsBuilderX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3650   CreateDivideUnsignedLocations(invoke, allocator_);
3651 }
3652 
VisitLongDivideUnsigned(HInvoke * invoke)3653 void IntrinsicCodeGeneratorX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3654   GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt64);
3655 }
3656 
VisitMathMultiplyHigh(HInvoke * invoke)3657 void IntrinsicLocationsBuilderX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3658   LocationSummary* locations =
3659       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3660   locations->SetInAt(0, Location::RegisterLocation(RAX));
3661   locations->SetInAt(1, Location::RequiresRegister());
3662   locations->SetOut(Location::RegisterLocation(RDX));
3663   locations->AddTemp(Location::RegisterLocation(RAX));
3664 }
3665 
VisitMathMultiplyHigh(HInvoke * invoke)3666 void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3667   X86_64Assembler* assembler = GetAssembler();
3668   LocationSummary* locations = invoke->GetLocations();
3669 
3670   CpuRegister y = locations->InAt(1).AsRegister<CpuRegister>();
3671 
3672   DCHECK_EQ(locations->InAt(0).AsRegister<Register>(), RAX);
3673   DCHECK_EQ(locations->Out().AsRegister<Register>(), RDX);
3674 
3675   __ imulq(y);
3676 }
3677 
3678 class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
3679  public:
VarHandleSlowPathX86_64(HInvoke * invoke)3680   explicit VarHandleSlowPathX86_64(HInvoke* invoke)
3681       : IntrinsicSlowPathX86_64(invoke) {
3682   }
3683 
SetVolatile(bool is_volatile)3684   void SetVolatile(bool is_volatile) {
3685     is_volatile_ = is_volatile;
3686   }
3687 
SetAtomic(bool is_atomic)3688   void SetAtomic(bool is_atomic) {
3689     is_atomic_ = is_atomic;
3690   }
3691 
SetNeedAnyStoreBarrier(bool need_any_store_barrier)3692   void SetNeedAnyStoreBarrier(bool need_any_store_barrier) {
3693     need_any_store_barrier_ = need_any_store_barrier;
3694   }
3695 
SetNeedAnyAnyBarrier(bool need_any_any_barrier)3696   void SetNeedAnyAnyBarrier(bool need_any_any_barrier) {
3697     need_any_any_barrier_ = need_any_any_barrier;
3698   }
3699 
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3700   void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3701     get_and_update_op_ = get_and_update_op;
3702   }
3703 
GetByteArrayViewCheckLabel()3704   Label* GetByteArrayViewCheckLabel() {
3705     return &byte_array_view_check_label_;
3706   }
3707 
GetNativeByteOrderLabel()3708   Label* GetNativeByteOrderLabel() {
3709     return &native_byte_order_label_;
3710   }
3711 
EmitNativeCode(CodeGenerator * codegen)3712   void EmitNativeCode(CodeGenerator* codegen) override {
3713     if (GetByteArrayViewCheckLabel()->IsLinked()) {
3714       EmitByteArrayViewCode(down_cast<CodeGeneratorX86_64*>(codegen));
3715     }
3716     IntrinsicSlowPathX86_64::EmitNativeCode(codegen);
3717   }
3718 
3719  private:
GetInvoke() const3720   HInvoke* GetInvoke() const {
3721     return GetInstruction()->AsInvoke();
3722   }
3723 
GetAccessModeTemplate() const3724   mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3725     return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3726   }
3727 
3728   void EmitByteArrayViewCode(CodeGeneratorX86_64* codegen);
3729 
3730   Label byte_array_view_check_label_;
3731   Label native_byte_order_label_;
3732 
3733   // Arguments forwarded to specific methods.
3734   bool is_volatile_;
3735   bool is_atomic_;
3736   bool need_any_store_barrier_;
3737   bool need_any_any_barrier_;
3738   GetAndUpdateOp get_and_update_op_;
3739 };
3740 
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3741 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3742   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
3743   X86_64Assembler* assembler = codegen->GetAssembler();
3744   LocationSummary* locations = invoke->GetLocations();
3745   DCHECK(locations->InAt(0).Equals(locations->Out()));
3746   XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
3747   XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
3748   XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
3749   if (invoke->GetType() == DataType::Type::kFloat32) {
3750     __ vfmadd213ss(left, right, accumulator);
3751   } else {
3752     DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
3753     __ vfmadd213sd(left, right, accumulator);
3754   }
3755 }
3756 
VisitMathFmaDouble(HInvoke * invoke)3757 void IntrinsicCodeGeneratorX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3758   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3759   GenerateMathFma(invoke, codegen_);
3760 }
3761 
VisitMathFmaDouble(HInvoke * invoke)3762 void IntrinsicLocationsBuilderX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3763   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3764     CreateFPFPFPToFPCallLocations(allocator_, invoke);
3765   }
3766 }
3767 
VisitMathFmaFloat(HInvoke * invoke)3768 void IntrinsicCodeGeneratorX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3769   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3770   GenerateMathFma(invoke, codegen_);
3771 }
3772 
VisitMathFmaFloat(HInvoke * invoke)3773 void IntrinsicLocationsBuilderX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3774   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3775     CreateFPFPFPToFPCallLocations(allocator_, invoke);
3776   }
3777 }
3778 
3779 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64 * codegen,SlowPathCode * slow_path,CpuRegister object,CpuRegister temp,Address type_address,bool object_can_be_null=true)3780 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64* codegen,
3781                                                     SlowPathCode* slow_path,
3782                                                     CpuRegister object,
3783                                                     CpuRegister temp,
3784                                                     Address type_address,
3785                                                     bool object_can_be_null = true) {
3786   X86_64Assembler* assembler = codegen->GetAssembler();
3787 
3788   const MemberOffset class_offset = mirror::Object::ClassOffset();
3789   const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3790 
3791   NearLabel check_type_compatibility, type_matched;
3792 
3793   // If the object is null, there is no need to check the type
3794   if (object_can_be_null) {
3795     __ testl(object, object);
3796     __ j(kZero, &type_matched);
3797   }
3798 
3799   // Do not unpoison for in-memory comparison.
3800   // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3801   __ movl(temp, Address(object, class_offset));
3802   __ Bind(&check_type_compatibility);
3803   __ cmpl(temp, type_address);
3804   __ j(kEqual, &type_matched);
3805   // Load the super class.
3806   __ MaybeUnpoisonHeapReference(temp);
3807   __ movl(temp, Address(temp, super_class_offset));
3808   // If the super class is null, we reached the root of the hierarchy without a match.
3809   // We let the slow path handle uncovered cases (e.g. interfaces).
3810   __ testl(temp, temp);
3811   __ j(kEqual, slow_path->GetEntryLabel());
3812   __ jmp(&check_type_compatibility);
3813   __ Bind(&type_matched);
3814 }
3815 
3816 // Check access mode and the primitive type from VarHandle.varType.
3817 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3818 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path,DataType::Type type)3819 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3820                                                         CodeGeneratorX86_64* codegen,
3821                                                         VarHandleSlowPathX86_64* slow_path,
3822                                                         DataType::Type type) {
3823   X86_64Assembler* assembler = codegen->GetAssembler();
3824 
3825   LocationSummary* locations = invoke->GetLocations();
3826   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3827   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3828 
3829   mirror::VarHandle::AccessMode access_mode =
3830       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3831   Primitive::Type primitive_type = DataTypeToPrimitive(type);
3832 
3833   const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3834   const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3835   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3836 
3837   // Check that the operation is permitted.
3838   __ testl(Address(varhandle, access_mode_bit_mask_offset),
3839            Immediate(1u << static_cast<uint32_t>(access_mode)));
3840   __ j(kZero, slow_path->GetEntryLabel());
3841 
3842   // For primitive types, we do not need a read barrier when loading a reference only for loading
3843   // constant field through the reference. For reference types, we deliberately avoid the read
3844   // barrier, letting the slow path handle the false negatives.
3845   __ movl(temp, Address(varhandle, var_type_offset));
3846   __ MaybeUnpoisonHeapReference(temp);
3847 
3848   // Check the varType.primitiveType field against the type we're trying to use.
3849   __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3850   __ j(kNotEqual, slow_path->GetEntryLabel());
3851 
3852   if (type == DataType::Type::kReference) {
3853     // Check reference arguments against the varType.
3854     // False negatives due to varType being an interface or array type
3855     // or due to the missing read barrier are handled by the slow path.
3856     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3857     uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3858     uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3859     for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3860       HInstruction* arg = invoke->InputAt(arg_index);
3861       DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3862       if (!arg->IsNullConstant()) {
3863         CpuRegister arg_reg = invoke->GetLocations()->InAt(arg_index).AsRegister<CpuRegister>();
3864         Address type_addr(varhandle, var_type_offset);
3865         GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp, type_addr);
3866       }
3867     }
3868   }
3869 }
3870 
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3871 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3872                                               CodeGeneratorX86_64* codegen,
3873                                               VarHandleSlowPathX86_64* slow_path) {
3874   X86_64Assembler* assembler = codegen->GetAssembler();
3875 
3876   LocationSummary* locations = invoke->GetLocations();
3877   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3878 
3879   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3880 
3881   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3882   // Do not emit read barrier (or unpoison the reference) for comparing to null.
3883   __ cmpl(Address(varhandle, coordinate_type0_offset), Immediate(0));
3884   __ j(kNotEqual, slow_path->GetEntryLabel());
3885 }
3886 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3887 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3888                                                  CodeGeneratorX86_64* codegen,
3889                                                  VarHandleSlowPathX86_64* slow_path) {
3890   VarHandleOptimizations optimizations(invoke);
3891   X86_64Assembler* assembler = codegen->GetAssembler();
3892 
3893   LocationSummary* locations = invoke->GetLocations();
3894   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3895   CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3896   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3897 
3898   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3899   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3900 
3901   // Null-check the object.
3902   if (!optimizations.GetSkipObjectNullCheck()) {
3903     __ testl(object, object);
3904     __ j(kZero, slow_path->GetEntryLabel());
3905   }
3906 
3907   if (!optimizations.GetUseKnownImageVarHandle()) {
3908     // Check that the VarHandle references an instance field by checking that
3909     // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3910     // type compatibility check with the source object's type, which will fail for null.
3911     __ cmpl(Address(varhandle, coordinate_type1_offset), Immediate(0));
3912     __ j(kNotEqual, slow_path->GetEntryLabel());
3913 
3914     // Check that the object has the correct type.
3915     // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3916     GenerateSubTypeObjectCheckNoReadBarrier(codegen,
3917                                             slow_path,
3918                                             object,
3919                                             temp,
3920                                             Address(varhandle, coordinate_type0_offset),
3921                                             /*object_can_be_null=*/ false);
3922   }
3923 }
3924 
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3925 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3926                                          CodeGeneratorX86_64* codegen,
3927                                          VarHandleSlowPathX86_64* slow_path) {
3928   VarHandleOptimizations optimizations(invoke);
3929   X86_64Assembler* assembler = codegen->GetAssembler();
3930   LocationSummary* locations = invoke->GetLocations();
3931 
3932   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3933   CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3934   CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
3935   DataType::Type value_type =
3936       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3937   Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3938 
3939   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3940   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3941   const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3942   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3943   const MemberOffset class_offset = mirror::Object::ClassOffset();
3944   const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3945 
3946   // Null-check the object.
3947   if (!optimizations.GetSkipObjectNullCheck()) {
3948     __ testl(object, object);
3949     __ j(kZero, slow_path->GetEntryLabel());
3950   }
3951 
3952   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3953 
3954   // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3955   // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3956   // coordinateType0 shall not be null but we do not explicitly verify that.
3957   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3958   __ cmpl(Address(varhandle, coordinate_type1_offset.Int32Value()), Immediate(0));
3959   __ j(kEqual, slow_path->GetEntryLabel());
3960 
3961   // Check object class against componentType0.
3962   //
3963   // This is an exact check and we defer other cases to the runtime. This includes
3964   // conversion to array of superclass references, which is valid but subsequently
3965   // requires all update operations to check that the value can indeed be stored.
3966   // We do not want to perform such extra checks in the intrinsified code.
3967   //
3968   // We do this check without read barrier, so there can be false negatives which we
3969   // defer to the slow path. There shall be no false negatives for array classes in the
3970   // boot image (including Object[] and primitive arrays) because they are non-movable.
3971   __ movl(temp, Address(object, class_offset.Int32Value()));
3972   __ cmpl(temp, Address(varhandle, coordinate_type0_offset.Int32Value()));
3973   __ j(kNotEqual, slow_path->GetEntryLabel());
3974 
3975   // Check that the coordinateType0 is an array type. We do not need a read barrier
3976   // for loading constant reference fields (or chains of them) for comparison with null,
3977   // nor for finally loading a constant primitive field (primitive type) below.
3978   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3979   __ movl(temp, Address(temp, component_type_offset.Int32Value()));
3980   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3981   __ testl(temp, temp);
3982   __ j(kZero, slow_path->GetEntryLabel());
3983 
3984   // Check that the array component type matches the primitive type.
3985   Label* slow_path_label;
3986   if (primitive_type == Primitive::kPrimNot) {
3987     slow_path_label = slow_path->GetEntryLabel();
3988   } else {
3989     // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
3990     // we shall check for a byte array view in the slow path.
3991     // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
3992     // so we cannot emit that if we're JITting without boot image.
3993     bool boot_image_available =
3994         codegen->GetCompilerOptions().IsBootImage() ||
3995         !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
3996     bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
3997     slow_path_label =
3998         can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
3999   }
4000   __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
4001   __ j(kNotEqual, slow_path_label);
4002 
4003   // Check for array index out of bounds.
4004   __ cmpl(index, Address(object, array_length_offset.Int32Value()));
4005   __ j(kAboveEqual, slow_path->GetEntryLabel());
4006 }
4007 
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)4008 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4009                                               CodeGeneratorX86_64* codegen,
4010                                               VarHandleSlowPathX86_64* slow_path) {
4011   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4012   if (expected_coordinates_count == 0u) {
4013     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4014   } else if (expected_coordinates_count == 1u) {
4015     GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4016   } else {
4017     DCHECK_EQ(expected_coordinates_count, 2u);
4018     GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4019   }
4020 }
4021 
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)4022 static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
4023                                                         CodeGeneratorX86_64* codegen,
4024                                                         DataType::Type type) {
4025   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4026   VarHandleOptimizations optimizations(invoke);
4027   if (optimizations.GetUseKnownImageVarHandle()) {
4028     DCHECK_NE(expected_coordinates_count, 2u);
4029     if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
4030       return nullptr;
4031     }
4032   }
4033 
4034   VarHandleSlowPathX86_64* slow_path =
4035       new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
4036   codegen->AddSlowPath(slow_path);
4037 
4038   if (!optimizations.GetUseKnownImageVarHandle()) {
4039     GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4040   }
4041   GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4042 
4043   return slow_path;
4044 }
4045 
4046 struct VarHandleTarget {
4047   Register object;  // The object holding the value to operate on.
4048   Register offset;  // The offset of the value to operate on.
4049 };
4050 
GetVarHandleTarget(HInvoke * invoke)4051 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4052   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4053   LocationSummary* locations = invoke->GetLocations();
4054 
4055   VarHandleTarget target;
4056   // The temporary allocated for loading the offset.
4057   target.offset = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
4058   // The reference to the object that holds the value to operate on.
4059   target.object = (expected_coordinates_count == 0u)
4060       ? locations->GetTemp(1).AsRegister<CpuRegister>().AsRegister()
4061       : locations->InAt(1).AsRegister<CpuRegister>().AsRegister();
4062   return target;
4063 }
4064 
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorX86_64 * codegen)4065 static void GenerateVarHandleTarget(HInvoke* invoke,
4066                                     const VarHandleTarget& target,
4067                                     CodeGeneratorX86_64* codegen) {
4068   LocationSummary* locations = invoke->GetLocations();
4069   X86_64Assembler* assembler = codegen->GetAssembler();
4070   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4071 
4072   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
4073 
4074   if (expected_coordinates_count <= 1u) {
4075     if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
4076       ScopedObjectAccess soa(Thread::Current());
4077       ArtField* target_field = GetImageVarHandleField(invoke);
4078       if (expected_coordinates_count == 0u) {
4079         ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
4080         __ movl(CpuRegister(target.object),
4081                 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /*no_rip=*/ false));
4082         if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
4083           codegen->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(declaring_class));
4084         } else {
4085           codegen->RecordBootImageTypePatch(declaring_class->GetDexFile(),
4086                                             declaring_class->GetDexTypeIndex());
4087         }
4088       }
4089       __ movl(CpuRegister(target.offset), Immediate(target_field->GetOffset().Uint32Value()));
4090     } else {
4091       // For static fields, we need to fill the `target.object` with the declaring class,
4092       // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
4093       // we do not need the declaring class, so we can forget the `ArtField*` when
4094       // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
4095       CpuRegister field((expected_coordinates_count == 0) ? target.object : target.offset);
4096 
4097       const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4098       const MemberOffset offset_offset = ArtField::OffsetOffset();
4099 
4100       // Load the ArtField*, the offset and, if needed, declaring class.
4101       __ movq(field, Address(varhandle, art_field_offset));
4102       __ movl(CpuRegister(target.offset), Address(field, offset_offset));
4103       if (expected_coordinates_count == 0u) {
4104         InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
4105         instr_codegen->GenerateGcRootFieldLoad(invoke,
4106                                                Location::RegisterLocation(target.object),
4107                                                Address(field, ArtField::DeclaringClassOffset()),
4108                                                /*fixup_label=*/nullptr,
4109                                                codegen->GetCompilerReadBarrierOption());
4110       }
4111     }
4112   } else {
4113     DCHECK_EQ(expected_coordinates_count, 2u);
4114 
4115     DataType::Type value_type =
4116         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4117     ScaleFactor scale = CodeGenerator::ScaleFactorForType(value_type);
4118     MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4119     CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
4120 
4121     // The effect of LEA is `target.offset = index * scale + data_offset`.
4122     __ leal(CpuRegister(target.offset), Address(index, scale, data_offset.Int32Value()));
4123   }
4124 }
4125 
HasVarHandleIntrinsicImplementation(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4126 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4127   // The only supported read barrier implementation is the Baker-style read barriers.
4128   if (codegen->EmitNonBakerReadBarrier()) {
4129     return false;
4130   }
4131 
4132   VarHandleOptimizations optimizations(invoke);
4133   if (optimizations.GetDoNotIntrinsify()) {
4134     return false;
4135   }
4136 
4137   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4138   DCHECK_LE(expected_coordinates_count, 2u);  // Filtered by the `DoNotIntrinsify` flag above.
4139   return true;
4140 }
4141 
CreateVarHandleCommonLocations(HInvoke * invoke)4142 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
4143   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4144   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4145   LocationSummary* locations = new (allocator) LocationSummary(
4146       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4147 
4148   locations->SetInAt(0, Location::RequiresRegister());
4149   // Require coordinates in registers. These are the object holding the value
4150   // to operate on (except for static fields) and index (for arrays and views).
4151   for (size_t i = 0; i != expected_coordinates_count; ++i) {
4152     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4153   }
4154 
4155   uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4156   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4157   for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4158     HInstruction* arg = invoke->InputAt(arg_index);
4159     if (DataType::IsFloatingPointType(arg->GetType())) {
4160       locations->SetInAt(arg_index, Location::FpuRegisterOrConstant(arg));
4161     } else {
4162       locations->SetInAt(arg_index, Location::RegisterOrConstant(arg));
4163     }
4164   }
4165 
4166   // Add a temporary for offset.
4167   locations->AddTemp(Location::RequiresRegister());
4168 
4169   if (expected_coordinates_count == 0u) {
4170     // Add a temporary to hold the declaring class.
4171     locations->AddTemp(Location::RequiresRegister());
4172   }
4173 
4174   return locations;
4175 }
4176 
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4177 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4178   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4179     return;
4180   }
4181 
4182   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4183   if (DataType::IsFloatingPointType(invoke->GetType())) {
4184     locations->SetOut(Location::RequiresFpuRegister());
4185   } else {
4186     locations->SetOut(Location::RequiresRegister());
4187   }
4188 }
4189 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool byte_swap=false)4190 static void GenerateVarHandleGet(HInvoke* invoke,
4191                                  CodeGeneratorX86_64* codegen,
4192                                  bool byte_swap = false) {
4193   DataType::Type type = invoke->GetType();
4194   DCHECK_NE(type, DataType::Type::kVoid);
4195 
4196   LocationSummary* locations = invoke->GetLocations();
4197   X86_64Assembler* assembler = codegen->GetAssembler();
4198 
4199   VarHandleTarget target = GetVarHandleTarget(invoke);
4200   VarHandleSlowPathX86_64* slow_path = nullptr;
4201   if (!byte_swap) {
4202     slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4203     GenerateVarHandleTarget(invoke, target, codegen);
4204     if (slow_path != nullptr) {
4205       __ Bind(slow_path->GetNativeByteOrderLabel());
4206     }
4207   }
4208 
4209   // Load the value from the field
4210   Address src(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
4211   Location out = locations->Out();
4212 
4213   if (type == DataType::Type::kReference) {
4214     if (codegen->EmitReadBarrier()) {
4215       DCHECK(kUseBakerReadBarrier);
4216       codegen->GenerateReferenceLoadWithBakerReadBarrier(
4217           invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false);
4218     } else {
4219       __ movl(out.AsRegister<CpuRegister>(), src);
4220       __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
4221     }
4222     DCHECK(!byte_swap);
4223   } else {
4224     codegen->LoadFromMemoryNoReference(type, out, src);
4225     if (byte_swap) {
4226       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4227       codegen->GetInstructionCodegen()->Bswap(out, type, &temp);
4228     }
4229   }
4230 
4231   if (slow_path != nullptr) {
4232     DCHECK(!byte_swap);
4233     __ Bind(slow_path->GetExitLabel());
4234   }
4235 }
4236 
VisitMethodHandleInvokeExact(HInvoke * invoke)4237 void IntrinsicLocationsBuilderX86_64::VisitMethodHandleInvokeExact(HInvoke* invoke) {
4238   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4239   LocationSummary* locations = new (allocator)
4240       LocationSummary(invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
4241 
4242   InvokeDexCallingConventionVisitorX86_64 calling_convention;
4243   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
4244 
4245   uint32_t number_of_args = invoke->GetNumberOfArguments();
4246 
4247   // Accomodating LocationSummary for underlying invoke-* call.
4248   for (uint32_t i = 1; i < number_of_args; ++i) {
4249     locations->SetInAt(i, calling_convention.GetNextLocation(invoke->InputAt(i)->GetType()));
4250   }
4251 
4252   // Passing MethodHandle object as the last parameter: accessors implementation rely on it.
4253   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kReference);
4254   Location receiver_mh_loc = calling_convention.GetNextLocation(DataType::Type::kReference);
4255   locations->SetInAt(0, receiver_mh_loc);
4256 
4257   // The last input is MethodType object corresponding to the call-site.
4258   locations->SetInAt(number_of_args, Location::RequiresRegister());
4259 
4260   locations->AddTemp(Location::RequiresRegister());
4261   // Hidden arg for invoke-interface.
4262   locations->AddTemp(Location::RegisterLocation(RAX));
4263 
4264   if (!receiver_mh_loc.IsRegister()) {
4265     locations->AddTemp(Location::RequiresRegister());
4266   }
4267 }
4268 
VisitMethodHandleInvokeExact(HInvoke * invoke)4269 void IntrinsicCodeGeneratorX86_64::VisitMethodHandleInvokeExact(HInvoke* invoke) {
4270   LocationSummary* locations = invoke->GetLocations();
4271   X86_64Assembler* assembler = codegen_->GetAssembler();
4272 
4273   Location receiver_mh_loc = locations->InAt(0);
4274   CpuRegister method_handle = receiver_mh_loc.IsRegister()
4275       ? receiver_mh_loc.AsRegister<CpuRegister>()
4276       : locations->GetTemp(2).AsRegister<CpuRegister>();
4277 
4278   if (!receiver_mh_loc.IsRegister()) {
4279     DCHECK(receiver_mh_loc.IsStackSlot());
4280     __ movl(method_handle, Address(CpuRegister(RSP), receiver_mh_loc.GetStackIndex()));
4281   }
4282 
4283   SlowPathCode* slow_path =
4284       new (codegen_->GetScopedAllocator()) InvokePolymorphicSlowPathX86_64(invoke, method_handle);
4285   codegen_->AddSlowPath(slow_path);
4286 
4287   CpuRegister call_site_type =
4288       locations->InAt(invoke->GetNumberOfArguments()).AsRegister<CpuRegister>();
4289 
4290   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4291 
4292   // Call site should match with MethodHandle's type.
4293   if (kPoisonHeapReferences) {
4294     // call_site_type should be left intact as it 1) might be in callee-saved register 2) is known
4295     // for GC to contain a reference.
4296     __ movl(temp, call_site_type);
4297     __ PoisonHeapReference(temp);
4298     __ cmpl(temp, Address(method_handle, mirror::MethodHandle::MethodTypeOffset()));
4299     __ j(kNotEqual, slow_path->GetEntryLabel());
4300   } else {
4301     __ cmpl(call_site_type, Address(method_handle, mirror::MethodHandle::MethodTypeOffset()));
4302     __ j(kNotEqual, slow_path->GetEntryLabel());
4303   }
4304 
4305   CpuRegister method = CpuRegister(kMethodRegisterArgument);
4306   __ movq(method, Address(method_handle, mirror::MethodHandle::ArtFieldOrMethodOffset()));
4307 
4308   Label execute_target_method;
4309   Label method_dispatch;
4310   Label static_dispatch;
4311 
4312   Address method_handle_kind = Address(method_handle, mirror::MethodHandle::HandleKindOffset());
4313 
4314   __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::kFirstAccessorKind));
4315   __ j(kLess, &method_dispatch);
4316   __ movq(method, Address(method_handle, mirror::MethodHandleImpl::TargetOffset()));
4317   __ Jump(&execute_target_method);
4318 
4319   __ Bind(&method_dispatch);
4320   if (invoke->AsInvokePolymorphic()->CanTargetInstanceMethod()) {
4321     CpuRegister receiver = locations->InAt(1).AsRegister<CpuRegister>();
4322 
4323     // Receiver shouldn't be null for all the following cases.
4324     __ testl(receiver, receiver);
4325     __ j(kEqual, slow_path->GetEntryLabel());
4326 
4327     __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeDirect));
4328     // No dispatch is needed for invoke-direct.
4329     __ j(kEqual, &execute_target_method);
4330 
4331     Label non_virtual_dispatch;
4332     // Handle invoke-virtual case.
4333     __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeVirtual));
4334     __ j(kNotEqual, &non_virtual_dispatch);
4335 
4336     // Skip virtual dispatch if `method` is private.
4337     __ testl(Address(method, ArtMethod::AccessFlagsOffset()), Immediate(kAccPrivate));
4338     __ j(kNotZero, &execute_target_method);
4339 
4340     __ movl(temp, Address(method, ArtMethod::DeclaringClassOffset()));
4341     __ cmpl(temp, Address(receiver, mirror::Object::ClassOffset()));
4342     // If method is defined in the receiver's class, execute it as it is.
4343     __ j(kEqual, &execute_target_method);
4344 
4345     // MethodIndex is uint16_t.
4346     __ movzxw(temp, Address(method, ArtMethod::MethodIndexOffset()));
4347 
4348     constexpr uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4349     // Re-using method register for receiver class.
4350     __ movl(method, Address(receiver, class_offset));
4351     __ MaybeUnpoisonHeapReference(method);
4352 
4353     constexpr uint32_t vtable_offset =
4354         mirror::Class::EmbeddedVTableOffset(art::PointerSize::k64).Int32Value();
4355     __ movq(method, Address(method, temp, TIMES_8, vtable_offset));
4356     __ Jump(&execute_target_method);
4357 
4358     __ Bind(&non_virtual_dispatch);
4359     __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeInterface));
4360     __ j(kNotEqual, &static_dispatch);
4361 
4362     __ movl(temp, Address(method, ArtMethod::AccessFlagsOffset()));
4363 
4364     __ testl(temp, Immediate(kAccPrivate));
4365     __ j(kNotZero, &execute_target_method);
4366 
4367     CpuRegister hidden_arg = locations->GetTemp(1).AsRegister<CpuRegister>();
4368     // Set the hidden argument.
4369     DCHECK_EQ(RAX, hidden_arg.AsRegister());
4370     __ movq(hidden_arg, method);
4371 
4372     Label get_imt_index_from_method_index;
4373     Label do_imt_dispatch;
4374 
4375     // Get IMT index.
4376     // Not doing default conflict check as IMT index is set for all method which have
4377     // kAccAbstract bit.
4378     __ testl(temp, Immediate(kAccAbstract));
4379     __ j(kZero, &get_imt_index_from_method_index);
4380 
4381     // imt_index_ is uint16_t
4382     __ movzxw(temp, Address(method, ArtMethod::ImtIndexOffset()));
4383     __ Jump(&do_imt_dispatch);
4384 
4385     // Default method, do method->GetMethodIndex() & (ImTable::kSizeTruncToPowerOfTwo - 1);
4386     __ Bind(&get_imt_index_from_method_index);
4387     __ movl(temp, Address(method, ArtMethod::MethodIndexOffset()));
4388     __ andl(temp, Immediate(ImTable::kSizeTruncToPowerOfTwo - 1));
4389 
4390     __ Bind(&do_imt_dispatch);
4391     // Re-using `method` to store receiver class and ImTableEntry.
4392     __ movl(method, Address(receiver, mirror::Object::ClassOffset()));
4393     __ MaybeUnpoisonHeapReference(method);
4394 
4395     __ movq(method, Address(method, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4396     // method = receiver->GetClass()->embedded_imtable_->Get(method_offset);
4397     __ movq(method, Address(method, temp, TIMES_8, /* disp= */ 0));
4398 
4399     __ Jump(&execute_target_method);
4400   }
4401   __ Bind(&static_dispatch);
4402   __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeStatic));
4403   __ j(kNotEqual, slow_path->GetEntryLabel());
4404   // MH's kind is invoke-static. The method can be called directly, hence fall-through.
4405 
4406   __ Bind(&execute_target_method);
4407   __ call(Address(
4408       method,
4409       ArtMethod::EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k64).SizeValue()));
4410   codegen_->RecordPcInfo(invoke, slow_path);
4411   __ Bind(slow_path->GetExitLabel());
4412 }
4413 
VisitVarHandleGet(HInvoke * invoke)4414 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGet(HInvoke* invoke) {
4415   CreateVarHandleGetLocations(invoke, codegen_);
4416 }
4417 
VisitVarHandleGet(HInvoke * invoke)4418 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGet(HInvoke* invoke) {
4419   GenerateVarHandleGet(invoke, codegen_);
4420 }
4421 
VisitVarHandleGetAcquire(HInvoke * invoke)4422 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4423   CreateVarHandleGetLocations(invoke, codegen_);
4424 }
4425 
VisitVarHandleGetAcquire(HInvoke * invoke)4426 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4427   // VarHandleGetAcquire is the same as VarHandleGet on x86-64 due to the x86 memory model.
4428   GenerateVarHandleGet(invoke, codegen_);
4429 }
4430 
VisitVarHandleGetOpaque(HInvoke * invoke)4431 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4432   CreateVarHandleGetLocations(invoke, codegen_);
4433 }
4434 
VisitVarHandleGetOpaque(HInvoke * invoke)4435 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4436   // VarHandleGetOpaque is the same as VarHandleGet on x86-64 due to the x86 memory model.
4437   GenerateVarHandleGet(invoke, codegen_);
4438 }
4439 
VisitVarHandleGetVolatile(HInvoke * invoke)4440 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4441   CreateVarHandleGetLocations(invoke, codegen_);
4442 }
4443 
VisitVarHandleGetVolatile(HInvoke * invoke)4444 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4445   // VarHandleGetVolatile is the same as VarHandleGet on x86-64 due to the x86 memory model.
4446   GenerateVarHandleGet(invoke, codegen_);
4447 }
4448 
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4449 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4450   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4451     return;
4452   }
4453 
4454   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4455 
4456   // Extra temporary is used for card in MarkGCCard and to move 64-bit constants to memory.
4457   locations->AddTemp(Location::RequiresRegister());
4458 }
4459 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_volatile,bool is_atomic,bool byte_swap=false)4460 static void GenerateVarHandleSet(HInvoke* invoke,
4461                                  CodeGeneratorX86_64* codegen,
4462                                  bool is_volatile,
4463                                  bool is_atomic,
4464                                  bool byte_swap = false) {
4465   X86_64Assembler* assembler = codegen->GetAssembler();
4466 
4467   LocationSummary* locations = invoke->GetLocations();
4468   const uint32_t last_temp_index = locations->GetTempCount() - 1;
4469 
4470   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4471   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4472 
4473   VarHandleTarget target = GetVarHandleTarget(invoke);
4474   VarHandleSlowPathX86_64* slow_path = nullptr;
4475   if (!byte_swap) {
4476     slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
4477     GenerateVarHandleTarget(invoke, target, codegen);
4478     if (slow_path != nullptr) {
4479       slow_path->SetVolatile(is_volatile);
4480       slow_path->SetAtomic(is_atomic);
4481       __ Bind(slow_path->GetNativeByteOrderLabel());
4482     }
4483   }
4484 
4485   switch (invoke->GetIntrinsic()) {
4486     case Intrinsics::kVarHandleSetRelease:
4487       codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4488       break;
4489     case Intrinsics::kVarHandleSetVolatile:
4490       // setVolatile needs kAnyStore barrier, but HandleFieldSet takes care of that.
4491       break;
4492     default:
4493       // Other intrinsics don't need a barrier.
4494       break;
4495   }
4496 
4497   Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
4498 
4499   // Store the value to the field.
4500   codegen->GetInstructionCodegen()->HandleFieldSet(
4501       invoke,
4502       value_index,
4503       last_temp_index,
4504       value_type,
4505       dst,
4506       CpuRegister(target.object),
4507       is_volatile,
4508       is_atomic,
4509       /*value_can_be_null=*/true,
4510       byte_swap,
4511       // Value can be null, and this write barrier is not being relied on for other sets.
4512       value_type == DataType::Type::kReference ? WriteBarrierKind::kEmitNotBeingReliedOn :
4513                                                  WriteBarrierKind::kDontEmit);
4514 
4515   // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
4516 
4517   if (slow_path != nullptr) {
4518     DCHECK(!byte_swap);
4519     __ Bind(slow_path->GetExitLabel());
4520   }
4521 }
4522 
VisitVarHandleSet(HInvoke * invoke)4523 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) {
4524   CreateVarHandleSetLocations(invoke, codegen_);
4525 }
4526 
VisitVarHandleSet(HInvoke * invoke)4527 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
4528   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4529 }
4530 
VisitVarHandleSetOpaque(HInvoke * invoke)4531 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4532   CreateVarHandleSetLocations(invoke, codegen_);
4533 }
4534 
VisitVarHandleSetOpaque(HInvoke * invoke)4535 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4536   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4537 }
4538 
VisitVarHandleSetRelease(HInvoke * invoke)4539 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4540   CreateVarHandleSetLocations(invoke, codegen_);
4541 }
4542 
VisitVarHandleSetRelease(HInvoke * invoke)4543 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4544   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4545 }
4546 
VisitVarHandleSetVolatile(HInvoke * invoke)4547 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4548   CreateVarHandleSetLocations(invoke, codegen_);
4549 }
4550 
VisitVarHandleSetVolatile(HInvoke * invoke)4551 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4552   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ true, /*is_atomic=*/ true);
4553 }
4554 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4555 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4556                                                             CodeGeneratorX86_64* codegen) {
4557   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4558     return;
4559   }
4560 
4561   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4562   uint32_t expected_value_index = number_of_arguments - 2;
4563   uint32_t new_value_index = number_of_arguments - 1;
4564   DataType::Type return_type = invoke->GetType();
4565   DataType::Type expected_type = GetDataTypeFromShorty(invoke, expected_value_index);
4566   DCHECK_EQ(expected_type, GetDataTypeFromShorty(invoke, new_value_index));
4567 
4568   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4569 
4570   if (DataType::IsFloatingPointType(return_type)) {
4571     locations->SetOut(Location::RequiresFpuRegister());
4572   } else {
4573     // Take advantage of the fact that CMPXCHG writes result to RAX.
4574     locations->SetOut(Location::RegisterLocation(RAX));
4575   }
4576 
4577   if (DataType::IsFloatingPointType(expected_type)) {
4578     // RAX is needed to load the expected floating-point value into a register for CMPXCHG.
4579     locations->AddTemp(Location::RegisterLocation(RAX));
4580     // Another temporary is needed to load the new floating-point value into a register for CMPXCHG.
4581     locations->AddTemp(Location::RequiresRegister());
4582   } else {
4583     // Ensure that expected value is in RAX, as required by CMPXCHG.
4584     locations->SetInAt(expected_value_index, Location::RegisterLocation(RAX));
4585     locations->SetInAt(new_value_index, Location::RequiresRegister());
4586     if (expected_type == DataType::Type::kReference) {
4587       // Need two temporaries for MarkGCCard.
4588       locations->AddRegisterTemps(2);
4589       if (codegen->EmitReadBarrier()) {
4590         // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
4591         DCHECK(kUseBakerReadBarrier);
4592         locations->AddTemp(Location::RequiresRegister());
4593       }
4594     }
4595     // RAX is clobbered in CMPXCHG, but no need to mark it as temporary as it's the output register.
4596     DCHECK_EQ(RAX, locations->Out().AsRegister<Register>());
4597   }
4598 }
4599 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_cmpxchg,bool byte_swap=false)4600 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4601                                                      CodeGeneratorX86_64* codegen,
4602                                                      bool is_cmpxchg,
4603                                                      bool byte_swap = false) {
4604   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4605 
4606   X86_64Assembler* assembler = codegen->GetAssembler();
4607   LocationSummary* locations = invoke->GetLocations();
4608 
4609   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4610   uint32_t expected_value_index = number_of_arguments - 2;
4611   uint32_t new_value_index = number_of_arguments - 1;
4612   DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4613 
4614   VarHandleSlowPathX86_64* slow_path = nullptr;
4615   VarHandleTarget target = GetVarHandleTarget(invoke);
4616   if (!byte_swap) {
4617     slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4618     GenerateVarHandleTarget(invoke, target, codegen);
4619     if (slow_path != nullptr) {
4620       __ Bind(slow_path->GetNativeByteOrderLabel());
4621     }
4622   }
4623 
4624   uint32_t temp_count = locations->GetTempCount();
4625   GenCompareAndSetOrExchange(codegen,
4626                              invoke,
4627                              type,
4628                              CpuRegister(target.object),
4629                              CpuRegister(target.offset),
4630                              /*temp1_index=*/ temp_count - 1,
4631                              /*temp2_index=*/ temp_count - 2,
4632                              /*temp3_index=*/ temp_count - 3,
4633                              locations->InAt(new_value_index),
4634                              locations->InAt(expected_value_index),
4635                              locations->Out(),
4636                              is_cmpxchg,
4637                              byte_swap);
4638 
4639   // We are using LOCK CMPXCHG in all cases because there is no CAS equivalent that has weak
4640   // failure semantics. LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
4641 
4642   if (slow_path != nullptr) {
4643     DCHECK(!byte_swap);
4644     __ Bind(slow_path->GetExitLabel());
4645   }
4646 }
4647 
VisitVarHandleCompareAndSet(HInvoke * invoke)4648 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4649   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4650 }
4651 
VisitVarHandleCompareAndSet(HInvoke * invoke)4652 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4653   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4654 }
4655 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4656 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4657   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4658 }
4659 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4660 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4661   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4662 }
4663 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4664 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4665   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4666 }
4667 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4668 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4669   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4670 }
4671 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4672 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4673   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4674 }
4675 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4676 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4677   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4678 }
4679 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4680 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4681   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4682 }
4683 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4684 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4685   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4686 }
4687 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4688 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4689   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4690 }
4691 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4692 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4693   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4694 }
4695 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4696 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4697   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4698 }
4699 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4700 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4701   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4702 }
4703 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4704 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4705   CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4706 }
4707 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4708 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4709   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4710 }
4711 
CreateVarHandleGetAndSetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4712 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4713   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4714     return;
4715   }
4716 
4717   // Get the type from the shorty as the invokes may not return a value.
4718   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4719   uint32_t new_value_index = number_of_arguments - 1;
4720   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4721   DataType::Type return_type = invoke->GetType();
4722   const bool is_void = return_type == DataType::Type::kVoid;
4723   DCHECK_IMPLIES(!is_void, return_type == value_type);
4724 
4725   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4726 
4727   if (DataType::IsFloatingPointType(value_type)) {
4728     // Only set the `out` register if it's needed. In the void case we don't use `out`.
4729     if (!is_void) {
4730       locations->SetOut(Location::RequiresFpuRegister());
4731     }
4732     // A temporary is needed to load the new floating-point value into a register for XCHG.
4733     locations->AddTemp(Location::RequiresRegister());
4734   } else {
4735     locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
4736     if (value_type == DataType::Type::kReference) {
4737       // Need two temporaries for MarkGCCard.
4738       locations->AddRegisterTemps(2);
4739       if (codegen->EmitReadBarrier()) {
4740         // Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier.
4741         DCHECK(kUseBakerReadBarrier);
4742         locations->AddTemp(Location::RequiresRegister());
4743       }
4744     }
4745     // Only set the `out` register if it's needed. In the void case we can still use RAX in the
4746     // same manner as it is marked as a temp register.
4747     if (is_void) {
4748       locations->AddTemp(Location::RegisterLocation(RAX));
4749     } else {
4750       // Use the same register for both the new value and output to take advantage of XCHG.
4751       // It doesn't have to be RAX, but we need to choose some to make sure it's the same.
4752       locations->SetOut(Location::RegisterLocation(RAX));
4753     }
4754   }
4755 }
4756 
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,CpuRegister ref,bool byte_swap)4757 static void GenerateVarHandleGetAndSet(HInvoke* invoke,
4758                                        CodeGeneratorX86_64* codegen,
4759                                        Location value,
4760                                        DataType::Type type,
4761                                        Address field_addr,
4762                                        CpuRegister ref,
4763                                        bool byte_swap) {
4764   X86_64Assembler* assembler = codegen->GetAssembler();
4765   LocationSummary* locations = invoke->GetLocations();
4766   Location out = locations->Out();
4767   uint32_t temp_count = locations->GetTempCount();
4768   DataType::Type return_type = invoke->GetType();
4769   const bool is_void = return_type == DataType::Type::kVoid;
4770   DCHECK_IMPLIES(!is_void, return_type == type);
4771 
4772   if (DataType::IsFloatingPointType(type)) {
4773     // `getAndSet` for floating-point types: move the new FP value into a register, atomically
4774     // exchange it with the field, and move the old value into the output FP register.
4775     Location temp = locations->GetTemp(temp_count - 1);
4776     codegen->Move(temp, value);
4777     bool is64bit = (type == DataType::Type::kFloat64);
4778     DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
4779     if (byte_swap) {
4780       codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4781     }
4782     if (is64bit) {
4783       __ xchgq(temp.AsRegister<CpuRegister>(), field_addr);
4784     } else {
4785       __ xchgl(temp.AsRegister<CpuRegister>(), field_addr);
4786     }
4787     if (byte_swap) {
4788       codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4789     }
4790     if (!is_void) {
4791       MoveIntToFP(
4792           out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit, assembler);
4793     }
4794   } else if (type == DataType::Type::kReference) {
4795     // `getAndSet` for references: load reference and atomically exchange it with the field.
4796     // Output register is the same as the one holding new value, so no need to move the result.
4797     DCHECK(!byte_swap);
4798 
4799     // In the void case, we have an extra temp register, which is used to signal the register
4800     // allocator that we are clobering RAX.
4801     const uint32_t extra_temp = is_void ? 1u : 0u;
4802     DCHECK_IMPLIES(is_void,
4803                    locations->GetTemp(temp_count - 1u).Equals(Location::RegisterLocation(RAX)));
4804 
4805     CpuRegister temp1 = locations->GetTemp(temp_count - extra_temp - 1u).AsRegister<CpuRegister>();
4806     CpuRegister temp2 = locations->GetTemp(temp_count - extra_temp - 2u).AsRegister<CpuRegister>();
4807     CpuRegister valreg = value.AsRegister<CpuRegister>();
4808 
4809     if (codegen->EmitBakerReadBarrier()) {
4810       codegen->GenerateReferenceLoadWithBakerReadBarrier(
4811           invoke,
4812           locations->GetTemp(temp_count - extra_temp - 3u),
4813           ref,
4814           field_addr,
4815           /*needs_null_check=*/false,
4816           /*always_update_field=*/true,
4817           &temp1,
4818           &temp2);
4819     }
4820     codegen->MarkGCCard(temp1, temp2, ref);
4821 
4822     DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
4823     if (kPoisonHeapReferences) {
4824       // Use a temp to avoid poisoning base of the field address, which might happen if `valreg` is
4825       // the same as `target.object` (for code like `vh.getAndSet(obj, obj)`).
4826       __ movl(temp1, valreg);
4827       __ PoisonHeapReference(temp1);
4828       __ xchgl(temp1, field_addr);
4829       if (!is_void) {
4830         __ UnpoisonHeapReference(temp1);
4831         __ movl(valreg, temp1);
4832       }
4833     } else {
4834       __ xchgl(valreg, field_addr);
4835     }
4836   } else {
4837     // `getAndSet` for integral types: atomically exchange the new value with the field. Output
4838     // register is the same as the one holding new value. Do sign extend / zero extend as needed.
4839     if (byte_swap) {
4840       codegen->GetInstructionCodegen()->Bswap(value, type);
4841     }
4842     CpuRegister valreg = value.AsRegister<CpuRegister>();
4843     DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
4844     switch (type) {
4845       case DataType::Type::kBool:
4846       case DataType::Type::kUint8:
4847         __ xchgb(valreg, field_addr);
4848         if (!is_void) {
4849           __ movzxb(valreg, valreg);
4850         }
4851         break;
4852       case DataType::Type::kInt8:
4853         __ xchgb(valreg, field_addr);
4854         if (!is_void) {
4855           __ movsxb(valreg, valreg);
4856         }
4857         break;
4858       case DataType::Type::kUint16:
4859         __ xchgw(valreg, field_addr);
4860         if (!is_void) {
4861           __ movzxw(valreg, valreg);
4862         }
4863         break;
4864       case DataType::Type::kInt16:
4865         __ xchgw(valreg, field_addr);
4866         if (!is_void) {
4867           __ movsxw(valreg, valreg);
4868         }
4869         break;
4870       case DataType::Type::kInt32:
4871       case DataType::Type::kUint32:
4872         __ xchgl(valreg, field_addr);
4873         break;
4874       case DataType::Type::kInt64:
4875       case DataType::Type::kUint64:
4876         __ xchgq(valreg, field_addr);
4877         break;
4878       default:
4879         LOG(FATAL) << "unexpected type in getAndSet intrinsic: " << type;
4880         UNREACHABLE();
4881     }
4882     if (byte_swap) {
4883       codegen->GetInstructionCodegen()->Bswap(value, type);
4884     }
4885   }
4886 }
4887 
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4888 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4889   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4890     return;
4891   }
4892 
4893   // Get the type from the shorty as the invokes may not return a value.
4894   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4895   uint32_t new_value_index = number_of_arguments - 1;
4896   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4897   DataType::Type return_type = invoke->GetType();
4898   const bool is_void = return_type == DataType::Type::kVoid;
4899   DCHECK_IMPLIES(!is_void, return_type == value_type);
4900 
4901   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4902 
4903   DCHECK_NE(DataType::Type::kReference, value_type);
4904   DCHECK(!DataType::IsFloatingPointType(value_type));
4905   // A temporary to compute the bitwise operation on the old and the new values.
4906   locations->AddTemp(Location::RequiresRegister());
4907   // We need value to be either in a register, or a 32-bit constant (as there are no arithmetic
4908   // instructions that accept 64-bit immediate on x86_64).
4909   locations->SetInAt(new_value_index,
4910                      DataType::Is64BitType(value_type) ?
4911                          Location::RequiresRegister() :
4912                          Location::RegisterOrConstant(invoke->InputAt(new_value_index)));
4913   if (is_void) {
4914     // Used as a temporary, even when we are not outputting it so reserve it. This has to be
4915     // requested before the other temporary since there's variable number of temp registers and the
4916     // other temp register is expected to be the last one.
4917     locations->AddTemp(Location::RegisterLocation(RAX));
4918   } else {
4919     // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary.
4920     locations->SetOut(Location::RegisterLocation(RAX));
4921   }
4922 }
4923 
GenerateVarHandleGetAndOp(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,GetAndUpdateOp get_and_update_op,bool byte_swap)4924 static void GenerateVarHandleGetAndOp(HInvoke* invoke,
4925                                       CodeGeneratorX86_64* codegen,
4926                                       Location value,
4927                                       DataType::Type type,
4928                                       Address field_addr,
4929                                       GetAndUpdateOp get_and_update_op,
4930                                       bool byte_swap) {
4931   X86_64Assembler* assembler = codegen->GetAssembler();
4932   LocationSummary* locations = invoke->GetLocations();
4933   // In the void case, we have an extra temp register, which is used to signal the register
4934   // allocator that we are clobering RAX.
4935   const bool is_void = invoke->GetType() == DataType::Type::kVoid;
4936   const uint32_t extra_temp = is_void ? 1u : 0u;
4937   const uint32_t temp_count = locations->GetTempCount();
4938   DCHECK_IMPLIES(is_void,
4939                  locations->GetTemp(temp_count - 1u).Equals(Location::RegisterLocation(RAX)));
4940   Location temp_loc = locations->GetTemp(temp_count - extra_temp - 1u);
4941   Location rax_loc = Location::RegisterLocation(RAX);
4942   DCHECK_IMPLIES(!is_void, locations->Out().Equals(rax_loc));
4943   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4944   bool is64Bit = DataType::Is64BitType(type);
4945 
4946   NearLabel retry;
4947   __ Bind(&retry);
4948 
4949   // Load field value into RAX and copy it into a temporary register for the operation.
4950   codegen->LoadFromMemoryNoReference(type, rax_loc, field_addr);
4951   codegen->Move(temp_loc, rax_loc);
4952   if (byte_swap) {
4953     // Byte swap the temporary, since we need to perform operation in native endianness.
4954     codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4955   }
4956 
4957   DCHECK_IMPLIES(value.IsConstant(), !is64Bit);
4958   int32_t const_value = value.IsConstant()
4959       ? CodeGenerator::GetInt32ValueOf(value.GetConstant())
4960       : 0;
4961 
4962   // Use 32-bit registers for 8/16/32-bit types to save on the REX prefix.
4963   switch (get_and_update_op) {
4964     case GetAndUpdateOp::kAdd:
4965       DCHECK(byte_swap);  // The non-byte-swapping path should use a faster XADD instruction.
4966       if (is64Bit) {
4967         __ addq(temp, value.AsRegister<CpuRegister>());
4968       } else if (value.IsConstant()) {
4969         __ addl(temp, Immediate(const_value));
4970       } else {
4971         __ addl(temp, value.AsRegister<CpuRegister>());
4972       }
4973       break;
4974     case GetAndUpdateOp::kBitwiseAnd:
4975       if (is64Bit) {
4976         __ andq(temp, value.AsRegister<CpuRegister>());
4977       } else if (value.IsConstant()) {
4978         __ andl(temp, Immediate(const_value));
4979       } else {
4980         __ andl(temp, value.AsRegister<CpuRegister>());
4981       }
4982       break;
4983     case GetAndUpdateOp::kBitwiseOr:
4984       if (is64Bit) {
4985         __ orq(temp, value.AsRegister<CpuRegister>());
4986       } else if (value.IsConstant()) {
4987         __ orl(temp, Immediate(const_value));
4988       } else {
4989         __ orl(temp, value.AsRegister<CpuRegister>());
4990       }
4991       break;
4992     case GetAndUpdateOp::kBitwiseXor:
4993       if (is64Bit) {
4994         __ xorq(temp, value.AsRegister<CpuRegister>());
4995       } else if (value.IsConstant()) {
4996         __ xorl(temp, Immediate(const_value));
4997       } else {
4998         __ xorl(temp, value.AsRegister<CpuRegister>());
4999       }
5000       break;
5001     default:
5002       LOG(FATAL) <<  "unexpected operation";
5003       UNREACHABLE();
5004   }
5005 
5006   if (byte_swap) {
5007     // RAX still contains the original value, but we need to byte swap the temporary back.
5008     codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
5009   }
5010 
5011   switch (type) {
5012     case DataType::Type::kBool:
5013     case DataType::Type::kUint8:
5014     case DataType::Type::kInt8:
5015       __ LockCmpxchgb(field_addr, temp);
5016       break;
5017     case DataType::Type::kUint16:
5018     case DataType::Type::kInt16:
5019       __ LockCmpxchgw(field_addr, temp);
5020       break;
5021     case DataType::Type::kInt32:
5022     case DataType::Type::kUint32:
5023       __ LockCmpxchgl(field_addr, temp);
5024       break;
5025     case DataType::Type::kInt64:
5026     case DataType::Type::kUint64:
5027       __ LockCmpxchgq(field_addr, temp);
5028       break;
5029     default:
5030       LOG(FATAL) << "unexpected type in getAndBitwiseOp intrinsic";
5031       UNREACHABLE();
5032   }
5033 
5034   __ j(kNotZero, &retry);
5035 
5036   // The result is in RAX after CMPXCHG. Byte swap if necessary, but do not sign/zero extend,
5037   // as it has already been done by `LoadFromMemoryNoReference` above (and not altered by CMPXCHG).
5038   if (byte_swap) {
5039     codegen->GetInstructionCodegen()->Bswap(rax_loc, type);
5040   }
5041 }
5042 
CreateVarHandleGetAndAddLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)5043 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
5044   if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
5045     return;
5046   }
5047 
5048   // Get the type from the shorty as the invokes may not return a value.
5049   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
5050   uint32_t new_value_index = number_of_arguments - 1;
5051   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
5052   DataType::Type return_type = invoke->GetType();
5053   const bool is_void = return_type == DataType::Type::kVoid;
5054   DCHECK_IMPLIES(!is_void, return_type == value_type);
5055 
5056   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
5057 
5058   if (DataType::IsFloatingPointType(value_type)) {
5059     // Only set the `out` register if it's needed. In the void case we don't use `out`
5060     if (!is_void) {
5061       locations->SetOut(Location::RequiresFpuRegister());
5062     }
5063     // Require that the new FP value is in a register (and not a constant) for ADDSS/ADDSD.
5064     locations->SetInAt(new_value_index, Location::RequiresFpuRegister());
5065     // CMPXCHG clobbers RAX.
5066     locations->AddTemp(Location::RegisterLocation(RAX));
5067     // An FP temporary to load the old value from the field and perform FP addition.
5068     locations->AddTemp(Location::RequiresFpuRegister());
5069     // A temporary to hold the new value for CMPXCHG.
5070     locations->AddTemp(Location::RequiresRegister());
5071   } else {
5072     DCHECK_NE(value_type, DataType::Type::kReference);
5073     locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
5074     if (GetExpectedVarHandleCoordinatesCount(invoke) == 2) {
5075       // For byte array views with non-native endianness we need extra BSWAP operations, so we
5076       // cannot use XADD and have to fallback to a generic implementation based on CMPXCH. In that
5077       // case we need two temporary registers: one to hold value instead of RAX (which may get
5078       // clobbered by repeated CMPXCHG) and one for performing the operation. At compile time we
5079       // cannot distinguish this case from arrays or native-endian byte array views.
5080       locations->AddRegisterTemps(2);
5081     }
5082     // Only set the `out` register if it's needed. In the void case we can still use RAX in the
5083     // same manner as it is marked as a temp register.
5084     if (is_void) {
5085       locations->AddTemp(Location::RegisterLocation(RAX));
5086     } else {
5087       // Use the same register for both the new value and output to take advantage of XADD.
5088       // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls
5089       // back to GenerateVarHandleGetAndOp that expects out in RAX.
5090       locations->SetOut(Location::RegisterLocation(RAX));
5091     }
5092   }
5093 }
5094 
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,bool byte_swap)5095 static void GenerateVarHandleGetAndAdd(HInvoke* invoke,
5096                                        CodeGeneratorX86_64* codegen,
5097                                        Location value,
5098                                        DataType::Type type,
5099                                        Address field_addr,
5100                                        bool byte_swap) {
5101   X86_64Assembler* assembler = codegen->GetAssembler();
5102   LocationSummary* locations = invoke->GetLocations();
5103   Location out = locations->Out();
5104   uint32_t temp_count = locations->GetTempCount();
5105 
5106   DataType::Type return_type = invoke->GetType();
5107   const bool is_void = return_type == DataType::Type::kVoid;
5108   DCHECK_IMPLIES(!is_void, return_type == type);
5109 
5110   if (DataType::IsFloatingPointType(type)) {
5111     if (byte_swap) {
5112       // This code should never be executed: it is the case of a byte array view (since it requires
5113       // a byte swap), and varhandles for byte array views support numeric atomic update access mode
5114       // only for int and long, but not for floating-point types (see javadoc comments for
5115       // java.lang.invoke.MethodHandles.byteArrayViewVarHandle()). But ART varhandle implementation
5116       // for byte array views treats floating-point types them as numeric types in
5117       // ByteArrayViewVarHandle::Access(). Terefore we do generate intrinsic code, but it always
5118       // fails access mode check at runtime prior to reaching this point. Illegal instruction UD2
5119       // ensures that if control flow gets here by mistake, we will notice.
5120       __ ud2();
5121     }
5122 
5123     // `getAndAdd` for floating-point types: load the old FP value into a temporary FP register and
5124     // in RAX for CMPXCHG, add the new FP value to the old one, move it to a non-FP temporary for
5125     // CMPXCHG and loop until CMPXCHG succeeds. Move the result from RAX to the output FP register.
5126     bool is64bit = (type == DataType::Type::kFloat64);
5127     DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
5128     XmmRegister fptemp = locations->GetTemp(temp_count - 2).AsFpuRegister<XmmRegister>();
5129     Location rax_loc = Location::RegisterLocation(RAX);
5130     Location temp_loc = locations->GetTemp(temp_count - 1);
5131     CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5132 
5133     NearLabel retry;
5134     __ Bind(&retry);
5135 
5136     // Read value from memory into an FP register and copy in into RAX.
5137     if (is64bit) {
5138       __ movsd(fptemp, field_addr);
5139     } else {
5140       __ movss(fptemp, field_addr);
5141     }
5142     MoveFPToInt(CpuRegister(RAX), fptemp, is64bit, assembler);
5143     // If necessary, byte swap RAX and update the value in FP register to also be byte-swapped.
5144     if (byte_swap) {
5145       codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
5146       MoveIntToFP(fptemp, CpuRegister(RAX), is64bit, assembler);
5147     }
5148     // Perform the FP addition and move it to a temporary register to prepare for CMPXCHG.
5149     if (is64bit) {
5150       __ addsd(fptemp, value.AsFpuRegister<XmmRegister>());
5151     } else {
5152       __ addss(fptemp, value.AsFpuRegister<XmmRegister>());
5153     }
5154     MoveFPToInt(temp, fptemp, is64bit, assembler);
5155     // If necessary, byte swap RAX before CMPXCHG and the temporary before copying to FP register.
5156     if (byte_swap) {
5157       codegen->GetInstructionCodegen()->Bswap(temp_loc, bswap_type);
5158       codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
5159     }
5160     if (is64bit) {
5161       __ LockCmpxchgq(field_addr, temp);
5162     } else {
5163       __ LockCmpxchgl(field_addr, temp);
5164     }
5165 
5166     __ j(kNotZero, &retry);
5167 
5168     // The old value is in RAX, byte swap if necessary.
5169     if (byte_swap) {
5170       codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
5171     }
5172     if (!is_void) {
5173       MoveIntToFP(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit, assembler);
5174     }
5175   } else {
5176     if (byte_swap) {
5177       // We cannot use XADD since we need to byte-swap the old value when reading it from memory,
5178       // and then byte-swap the sum before writing it to memory. So fallback to the slower generic
5179       // implementation that is also used for bitwise operations.
5180       // Move value from RAX to a temporary register, as RAX may get clobbered by repeated CMPXCHG.
5181       DCHECK_EQ(GetExpectedVarHandleCoordinatesCount(invoke), 2u);
5182       // In the void case, we have an extra temp register, which is used to signal the register
5183       // allocator that we are clobering RAX.
5184       const uint32_t extra_temp = is_void ? 1u : 0u;
5185       DCHECK_IMPLIES(is_void,
5186                      locations->GetTemp(temp_count - 1u).Equals(Location::RegisterLocation(RAX)));
5187       Location temp = locations->GetTemp(temp_count - extra_temp - 2u);
5188       codegen->Move(temp, value);
5189       GenerateVarHandleGetAndOp(
5190           invoke, codegen, temp, type, field_addr, GetAndUpdateOp::kAdd, byte_swap);
5191     } else {
5192       // `getAndAdd` for integral types: atomically exchange the new value with the field and add
5193       // the old value to the field. Output register is the same as the one holding new value. Do
5194       // sign extend / zero extend as needed.
5195       CpuRegister valreg = value.AsRegister<CpuRegister>();
5196       DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
5197       switch (type) {
5198         case DataType::Type::kBool:
5199         case DataType::Type::kUint8:
5200           __ LockXaddb(field_addr, valreg);
5201           if (!is_void) {
5202             __ movzxb(valreg, valreg);
5203           }
5204           break;
5205         case DataType::Type::kInt8:
5206           __ LockXaddb(field_addr, valreg);
5207           if (!is_void) {
5208             __ movsxb(valreg, valreg);
5209           }
5210           break;
5211         case DataType::Type::kUint16:
5212           __ LockXaddw(field_addr, valreg);
5213           if (!is_void) {
5214             __ movzxw(valreg, valreg);
5215           }
5216           break;
5217         case DataType::Type::kInt16:
5218           __ LockXaddw(field_addr, valreg);
5219           if (!is_void) {
5220             __ movsxw(valreg, valreg);
5221           }
5222           break;
5223         case DataType::Type::kInt32:
5224         case DataType::Type::kUint32:
5225           __ LockXaddl(field_addr, valreg);
5226           break;
5227         case DataType::Type::kInt64:
5228         case DataType::Type::kUint64:
5229           __ LockXaddq(field_addr, valreg);
5230           break;
5231         default:
5232           LOG(FATAL) << "unexpected type in getAndAdd intrinsic";
5233           UNREACHABLE();
5234       }
5235     }
5236   }
5237 }
5238 
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op,bool need_any_store_barrier,bool need_any_any_barrier,bool byte_swap=false)5239 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5240                                           CodeGeneratorX86_64* codegen,
5241                                           GetAndUpdateOp get_and_update_op,
5242                                           bool need_any_store_barrier,
5243                                           bool need_any_any_barrier,
5244                                           bool byte_swap = false) {
5245   DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
5246 
5247   X86_64Assembler* assembler = codegen->GetAssembler();
5248   LocationSummary* locations = invoke->GetLocations();
5249 
5250   // Get the type from the shorty as the invokes may not return a value.
5251   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
5252   Location value = locations->InAt(number_of_arguments - 1);
5253   DataType::Type type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
5254 
5255   VarHandleSlowPathX86_64* slow_path = nullptr;
5256   VarHandleTarget target = GetVarHandleTarget(invoke);
5257   if (!byte_swap) {
5258     slow_path = GenerateVarHandleChecks(invoke, codegen, type);
5259     GenerateVarHandleTarget(invoke, target, codegen);
5260     if (slow_path != nullptr) {
5261       slow_path->SetGetAndUpdateOp(get_and_update_op);
5262       slow_path->SetNeedAnyStoreBarrier(need_any_store_barrier);
5263       slow_path->SetNeedAnyAnyBarrier(need_any_any_barrier);
5264       __ Bind(slow_path->GetNativeByteOrderLabel());
5265     }
5266   }
5267 
5268   CpuRegister ref(target.object);
5269   Address field_addr(ref, CpuRegister(target.offset), TIMES_1, 0);
5270 
5271   if (need_any_store_barrier) {
5272     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5273   }
5274 
5275   switch (get_and_update_op) {
5276     case GetAndUpdateOp::kSet:
5277       GenerateVarHandleGetAndSet(invoke, codegen, value, type, field_addr, ref, byte_swap);
5278       break;
5279     case GetAndUpdateOp::kAdd:
5280       GenerateVarHandleGetAndAdd(invoke, codegen, value, type, field_addr, byte_swap);
5281       break;
5282     case GetAndUpdateOp::kBitwiseAnd:
5283     case GetAndUpdateOp::kBitwiseOr:
5284     case GetAndUpdateOp::kBitwiseXor:
5285       GenerateVarHandleGetAndOp(
5286           invoke, codegen, value, type, field_addr, get_and_update_op, byte_swap);
5287       break;
5288   }
5289 
5290   if (need_any_any_barrier) {
5291     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5292   }
5293 
5294   if (slow_path != nullptr) {
5295     DCHECK(!byte_swap);
5296     __ Bind(slow_path->GetExitLabel());
5297   }
5298 }
5299 
VisitVarHandleGetAndSet(HInvoke * invoke)5300 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5301   CreateVarHandleGetAndSetLocations(invoke, codegen_);
5302 }
5303 
VisitVarHandleGetAndSet(HInvoke * invoke)5304 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5305   // `getAndSet` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5306   GenerateVarHandleGetAndUpdate(invoke,
5307                                 codegen_,
5308                                 GetAndUpdateOp::kSet,
5309                                 /*need_any_store_barrier=*/ true,
5310                                 /*need_any_any_barrier=*/ true);
5311 }
5312 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5313 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5314   CreateVarHandleGetAndSetLocations(invoke, codegen_);
5315 }
5316 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5317 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5318   // `getAndSetAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5319   GenerateVarHandleGetAndUpdate(invoke,
5320                                 codegen_,
5321                                 GetAndUpdateOp::kSet,
5322                                 /*need_any_store_barrier=*/ false,
5323                                 /*need_any_any_barrier=*/ false);
5324 }
5325 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5326 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5327   CreateVarHandleGetAndSetLocations(invoke, codegen_);
5328 }
5329 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5330 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5331   // `getAndSetRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5332   GenerateVarHandleGetAndUpdate(invoke,
5333                                 codegen_,
5334                                 GetAndUpdateOp::kSet,
5335                                 /*need_any_store_barrier=*/ true,
5336                                 /*need_any_any_barrier=*/ false);
5337 }
5338 
VisitVarHandleGetAndAdd(HInvoke * invoke)5339 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5340   CreateVarHandleGetAndAddLocations(invoke, codegen_);
5341 }
5342 
VisitVarHandleGetAndAdd(HInvoke * invoke)5343 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5344   // `getAndAdd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5345   GenerateVarHandleGetAndUpdate(invoke,
5346                                 codegen_,
5347                                 GetAndUpdateOp::kAdd,
5348                                 /*need_any_store_barrier=*/ true,
5349                                 /*need_any_any_barrier=*/ true);
5350 }
5351 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5352 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5353   CreateVarHandleGetAndAddLocations(invoke, codegen_);
5354 }
5355 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5356 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5357   // `getAndAddAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5358   GenerateVarHandleGetAndUpdate(invoke,
5359                                 codegen_,
5360                                 GetAndUpdateOp::kAdd,
5361                                 /*need_any_store_barrier=*/ false,
5362                                 /*need_any_any_barrier=*/ false);
5363 }
5364 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5365 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5366   CreateVarHandleGetAndAddLocations(invoke, codegen_);
5367 }
5368 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5369 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5370   // `getAndAddRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5371   GenerateVarHandleGetAndUpdate(invoke,
5372                                 codegen_,
5373                                 GetAndUpdateOp::kAdd,
5374                                 /*need_any_store_barrier=*/ true,
5375                                 /*need_any_any_barrier=*/ false);
5376 }
5377 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5378 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5379   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5380 }
5381 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5382 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5383   // `getAndBitwiseAnd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5384   GenerateVarHandleGetAndUpdate(invoke,
5385                                 codegen_,
5386                                 GetAndUpdateOp::kBitwiseAnd,
5387                                 /*need_any_store_barrier=*/ true,
5388                                 /*need_any_any_barrier=*/ true);
5389 }
5390 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5391 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5392   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5393 }
5394 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5395 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5396   // `getAndBitwiseAndAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5397   GenerateVarHandleGetAndUpdate(invoke,
5398                                 codegen_,
5399                                 GetAndUpdateOp::kBitwiseAnd,
5400                                 /*need_any_store_barrier=*/ false,
5401                                 /*need_any_any_barrier=*/ false);
5402 }
5403 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5404 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5405   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5406 }
5407 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5408 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5409   // `getAndBitwiseAndRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5410   GenerateVarHandleGetAndUpdate(invoke,
5411                                 codegen_,
5412                                 GetAndUpdateOp::kBitwiseAnd,
5413                                 /*need_any_store_barrier=*/ true,
5414                                 /*need_any_any_barrier=*/ false);
5415 }
5416 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5417 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5418   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5419 }
5420 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5421 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5422   // `getAndBitwiseOr` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5423   GenerateVarHandleGetAndUpdate(invoke,
5424                                 codegen_,
5425                                 GetAndUpdateOp::kBitwiseOr,
5426                                 /*need_any_store_barrier=*/ true,
5427                                 /*need_any_any_barrier=*/ true);
5428 }
5429 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5430 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5431   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5432 }
5433 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5434 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5435   // `getAndBitwiseOrAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5436   GenerateVarHandleGetAndUpdate(invoke,
5437                                 codegen_,
5438                                 GetAndUpdateOp::kBitwiseOr,
5439                                 /*need_any_store_barrier=*/ false,
5440                                 /*need_any_any_barrier=*/ false);
5441 }
5442 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5443 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5444   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5445 }
5446 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5447 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5448   // `getAndBitwiseOrRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5449   GenerateVarHandleGetAndUpdate(invoke,
5450                                 codegen_,
5451                                 GetAndUpdateOp::kBitwiseOr,
5452                                 /*need_any_store_barrier=*/ true,
5453                                 /*need_any_any_barrier=*/ false);
5454 }
5455 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5456 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5457   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5458 }
5459 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5460 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5461   // `getAndBitwiseXor` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5462   GenerateVarHandleGetAndUpdate(invoke,
5463                                 codegen_,
5464                                 GetAndUpdateOp::kBitwiseXor,
5465                                 /*need_any_store_barrier=*/ true,
5466                                 /*need_any_any_barrier=*/ true);
5467 }
5468 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5469 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5470   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5471 }
5472 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5473 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5474   // `getAndBitwiseXorAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5475   GenerateVarHandleGetAndUpdate(invoke,
5476                                 codegen_,
5477                                 GetAndUpdateOp::kBitwiseXor,
5478                                 /*need_any_store_barrier=*/ false,
5479                                 /*need_any_any_barrier=*/ false);
5480 }
5481 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5482 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5483   CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5484 }
5485 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5486 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5487   // `getAndBitwiseXorRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5488   GenerateVarHandleGetAndUpdate(invoke,
5489                                 codegen_,
5490                                 GetAndUpdateOp::kBitwiseXor,
5491                                 /*need_any_store_barrier=*/ true,
5492                                 /*need_any_any_barrier=*/ false);
5493 }
5494 
EmitByteArrayViewCode(CodeGeneratorX86_64 * codegen)5495 void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen) {
5496   DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5497   X86_64Assembler* assembler = codegen->GetAssembler();
5498 
5499   HInvoke* invoke = GetInvoke();
5500   LocationSummary* locations = invoke->GetLocations();
5501   mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5502   DataType::Type value_type =
5503       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5504   DCHECK_NE(value_type, DataType::Type::kReference);
5505   size_t size = DataType::Size(value_type);
5506   DCHECK_GT(size, 1u);
5507 
5508   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
5509   CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
5510   CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
5511   CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1u).AsRegister<CpuRegister>();
5512 
5513   MemberOffset class_offset = mirror::Object::ClassOffset();
5514   MemberOffset array_length_offset = mirror::Array::LengthOffset();
5515   MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5516   MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5517 
5518   VarHandleTarget target = GetVarHandleTarget(invoke);
5519 
5520   __ Bind(GetByteArrayViewCheckLabel());
5521 
5522   // The main path checked that the coordinateType0 is an array class that matches
5523   // the class of the actual coordinate argument but it does not match the value type.
5524   // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5525   codegen->LoadClassRootForIntrinsic(temp, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5526   assembler->MaybePoisonHeapReference(temp);
5527   __ cmpl(temp, Address(varhandle, class_offset.Int32Value()));
5528   __ j(kNotEqual, GetEntryLabel());
5529 
5530   // Check for array index out of bounds.
5531   __ movl(temp, Address(object, array_length_offset.Int32Value()));
5532   // SUB sets flags in the same way as CMP.
5533   __ subl(temp, index);
5534   __ j(kBelowEqual, GetEntryLabel());
5535   // The difference between index and array length must be enough for the `value_type` size.
5536   __ cmpl(temp, Immediate(size));
5537   __ j(kBelow, GetEntryLabel());
5538 
5539   // Construct the target.
5540   __ leal(CpuRegister(target.offset), Address(index, TIMES_1, data_offset.Int32Value()));
5541 
5542   // Alignment check. For unaligned access, go to the runtime.
5543   DCHECK(IsPowerOfTwo(size));
5544   __ testl(CpuRegister(target.offset), Immediate(size - 1u));
5545   __ j(kNotZero, GetEntryLabel());
5546 
5547   // Byte order check. For native byte order return to the main path.
5548   if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5549       IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5550     // There is no reason to differentiate between native byte order and byte-swap
5551     // for setting a zero bit pattern. Just return to the main path.
5552     __ jmp(GetNativeByteOrderLabel());
5553     return;
5554   }
5555   __ cmpl(Address(varhandle, native_byte_order_offset.Int32Value()), Immediate(0));
5556   __ j(kNotEqual, GetNativeByteOrderLabel());
5557 
5558   switch (access_mode_template) {
5559     case mirror::VarHandle::AccessModeTemplate::kGet:
5560       GenerateVarHandleGet(invoke, codegen, /*byte_swap=*/ true);
5561       break;
5562     case mirror::VarHandle::AccessModeTemplate::kSet:
5563       GenerateVarHandleSet(invoke, codegen, is_volatile_, is_atomic_, /*byte_swap=*/ true);
5564       break;
5565     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5566       GenerateVarHandleCompareAndSetOrExchange(
5567           invoke, codegen, /*is_cmpxchg=*/ false, /*byte_swap=*/ true);
5568       break;
5569     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5570       GenerateVarHandleCompareAndSetOrExchange(
5571           invoke, codegen, /*is_cmpxchg=*/ true, /*byte_swap=*/ true);
5572       break;
5573     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5574       GenerateVarHandleGetAndUpdate(invoke,
5575                                     codegen,
5576                                     get_and_update_op_,
5577                                     need_any_store_barrier_,
5578                                     need_any_any_barrier_,
5579                                     /*byte_swap=*/ true);
5580       break;
5581   }
5582 
5583   __ jmp(GetExitLabel());
5584 }
5585 
5586 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name)
5587 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED);
5588 #undef MARK_UNIMPLEMENTED
5589 
5590 UNREACHABLE_INTRINSICS(X86_64)
5591 
5592 #undef __
5593 
5594 }  // namespace x86_64
5595 }  // namespace art
5596