• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86_64.h"
18 
19 #include <limits>
20 
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86_64.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_utils.h"
29 #include "lock_word.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/object_array-inl.h"
32 #include "mirror/reference.h"
33 #include "mirror/string.h"
34 #include "scoped_thread_state_change-inl.h"
35 #include "thread-current-inl.h"
36 #include "utils/x86_64/assembler_x86_64.h"
37 #include "utils/x86_64/constants_x86_64.h"
38 
39 namespace art HIDDEN {
40 
41 namespace x86_64 {
42 
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)43 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
44   : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) {
45 }
46 
GetAssembler()47 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
48   return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
49 }
50 
GetAllocator()51 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
52   return codegen_->GetGraph()->GetAllocator();
53 }
54 
TryDispatch(HInvoke * invoke)55 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
56   Dispatch(invoke);
57   LocationSummary* res = invoke->GetLocations();
58   if (res == nullptr) {
59     return false;
60   }
61   return res->Intrinsified();
62 }
63 
64 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
65 
66 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
67 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
68 
69 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
70 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
71  public:
ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction * instruction)72   explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
73       : SlowPathCode(instruction) {
74     DCHECK(gUseReadBarrier);
75     DCHECK(kUseBakerReadBarrier);
76   }
77 
EmitNativeCode(CodeGenerator * codegen)78   void EmitNativeCode(CodeGenerator* codegen) override {
79     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
80     LocationSummary* locations = instruction_->GetLocations();
81     DCHECK(locations->CanCall());
82     DCHECK(instruction_->IsInvokeStaticOrDirect())
83         << "Unexpected instruction in read barrier arraycopy slow path: "
84         << instruction_->DebugName();
85     DCHECK(instruction_->GetLocations()->Intrinsified());
86     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
87 
88     int32_t element_size = DataType::Size(DataType::Type::kReference);
89 
90     CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
91     CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
92     CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
93 
94     __ Bind(GetEntryLabel());
95     NearLabel loop;
96     __ Bind(&loop);
97     __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
98     __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
99     // TODO: Inline the mark bit check before calling the runtime?
100     // TMP = ReadBarrier::Mark(TMP);
101     // No need to save live registers; it's taken care of by the
102     // entrypoint. Also, there is no need to update the stack mask,
103     // as this runtime call will not trigger a garbage collection.
104     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
105     // This runtime call does not require a stack map.
106     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
107     __ MaybePoisonHeapReference(CpuRegister(TMP));
108     __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
109     __ addl(src_curr_addr, Immediate(element_size));
110     __ addl(dst_curr_addr, Immediate(element_size));
111     __ cmpl(src_curr_addr, src_stop_addr);
112     __ j(kNotEqual, &loop);
113     __ jmp(GetExitLabel());
114   }
115 
GetDescription() const116   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
117 
118  private:
119   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
120 };
121 
122 #undef __
123 
124 #define __ assembler->
125 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)126 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
127   LocationSummary* locations =
128       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
129   locations->SetInAt(0, Location::RequiresFpuRegister());
130   locations->SetOut(Location::RequiresRegister());
131 }
132 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)133 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
134   LocationSummary* locations =
135       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
136   locations->SetInAt(0, Location::RequiresRegister());
137   locations->SetOut(Location::RequiresFpuRegister());
138 }
139 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)140 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
141   Location input = locations->InAt(0);
142   Location output = locations->Out();
143   __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
144 }
145 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)146 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
147   Location input = locations->InAt(0);
148   Location output = locations->Out();
149   __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
150 }
151 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)152 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
153   CreateFPToIntLocations(allocator_, invoke);
154 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)155 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
156   CreateIntToFPLocations(allocator_, invoke);
157 }
158 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)159 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
160   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
161 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)162 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
163   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
164 }
165 
VisitFloatFloatToRawIntBits(HInvoke * invoke)166 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
167   CreateFPToIntLocations(allocator_, invoke);
168 }
VisitFloatIntBitsToFloat(HInvoke * invoke)169 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
170   CreateIntToFPLocations(allocator_, invoke);
171 }
172 
VisitFloatFloatToRawIntBits(HInvoke * invoke)173 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
174   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
175 }
VisitFloatIntBitsToFloat(HInvoke * invoke)176 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
177   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
178 }
179 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)180 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
181   LocationSummary* locations =
182       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
183   locations->SetInAt(0, Location::RequiresRegister());
184   locations->SetOut(Location::SameAsFirstInput());
185 }
186 
VisitIntegerReverseBytes(HInvoke * invoke)187 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
188   CreateIntToIntLocations(allocator_, invoke);
189 }
190 
VisitIntegerReverseBytes(HInvoke * invoke)191 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
192   codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt32);
193 }
194 
VisitLongReverseBytes(HInvoke * invoke)195 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
196   CreateIntToIntLocations(allocator_, invoke);
197 }
198 
VisitLongReverseBytes(HInvoke * invoke)199 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
200   codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt64);
201 }
202 
VisitShortReverseBytes(HInvoke * invoke)203 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
204   CreateIntToIntLocations(allocator_, invoke);
205 }
206 
VisitShortReverseBytes(HInvoke * invoke)207 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
208   codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt16);
209 }
210 
GenIsInfinite(LocationSummary * locations,bool is64bit,CodeGeneratorX86_64 * codegen)211 static void GenIsInfinite(LocationSummary* locations,
212                           bool is64bit,
213                           CodeGeneratorX86_64* codegen) {
214   X86_64Assembler* assembler = codegen->GetAssembler();
215 
216   XmmRegister input = locations->InAt(0).AsFpuRegister<XmmRegister>();
217   CpuRegister output = locations->Out().AsRegister<CpuRegister>();
218 
219   NearLabel done1, done2;
220 
221   if (is64bit) {
222     double kPositiveInfinity = std::numeric_limits<double>::infinity();
223     double kNegativeInfinity = -1 * kPositiveInfinity;
224 
225      __ xorq(output, output);
226      __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
227      __ j(kNotEqual, &done1);
228      __ j(kParityEven, &done2);
229      __ movq(output, Immediate(1));
230      __ jmp(&done2);
231      __ Bind(&done1);
232      __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
233      __ j(kNotEqual, &done2);
234      __ j(kParityEven, &done2);
235      __ movq(output, Immediate(1));
236      __ Bind(&done2);
237   } else {
238     float kPositiveInfinity = std::numeric_limits<float>::infinity();
239     float kNegativeInfinity = -1 * kPositiveInfinity;
240 
241      __ xorl(output, output);
242      __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
243      __ j(kNotEqual, &done1);
244      __ j(kParityEven, &done2);
245      __ movl(output, Immediate(1));
246      __ jmp(&done2);
247      __ Bind(&done1);
248      __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
249      __ j(kNotEqual, &done2);
250      __ j(kParityEven, &done2);
251      __ movl(output, Immediate(1));
252      __ Bind(&done2);
253   }
254 }
255 
VisitFloatIsInfinite(HInvoke * invoke)256 void IntrinsicLocationsBuilderX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
257   CreateFPToIntLocations(allocator_, invoke);
258 }
259 
VisitFloatIsInfinite(HInvoke * invoke)260 void IntrinsicCodeGeneratorX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
261   GenIsInfinite(invoke->GetLocations(), /* is64bit=*/  false, codegen_);
262 }
263 
VisitDoubleIsInfinite(HInvoke * invoke)264 void IntrinsicLocationsBuilderX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
265   CreateFPToIntLocations(allocator_, invoke);
266 }
267 
VisitDoubleIsInfinite(HInvoke * invoke)268 void IntrinsicCodeGeneratorX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
269   GenIsInfinite(invoke->GetLocations(), /* is64bit=*/  true, codegen_);
270 }
271 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)272 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
273   LocationSummary* locations =
274       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
275   locations->SetInAt(0, Location::RequiresFpuRegister());
276   locations->SetOut(Location::RequiresFpuRegister());
277 }
278 
VisitMathSqrt(HInvoke * invoke)279 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
280   CreateFPToFPLocations(allocator_, invoke);
281 }
282 
VisitMathSqrt(HInvoke * invoke)283 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
284   LocationSummary* locations = invoke->GetLocations();
285   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
286   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
287 
288   GetAssembler()->sqrtsd(out, in);
289 }
290 
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)291 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
292                                        HInvoke* invoke,
293                                        CodeGeneratorX86_64* codegen) {
294   // Do we have instruction support?
295   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
296     return;
297   }
298 
299   CreateFPToFPLocations(allocator, invoke);
300 }
301 
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86_64Assembler * assembler,int round_mode)302 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86_64Assembler* assembler, int round_mode) {
303   LocationSummary* locations = invoke->GetLocations();
304   DCHECK(!locations->WillCall());
305   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
306   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
307   __ roundsd(out, in, Immediate(round_mode));
308 }
309 
VisitMathCeil(HInvoke * invoke)310 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
311   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
312 }
313 
VisitMathCeil(HInvoke * invoke)314 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
315   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
316 }
317 
VisitMathFloor(HInvoke * invoke)318 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
319   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
320 }
321 
VisitMathFloor(HInvoke * invoke)322 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
323   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
324 }
325 
VisitMathRint(HInvoke * invoke)326 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
327   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
328 }
329 
VisitMathRint(HInvoke * invoke)330 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
331   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
332 }
333 
CreateSSE41FPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)334 static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator,
335                                         HInvoke* invoke,
336                                         CodeGeneratorX86_64* codegen) {
337   // Do we have instruction support?
338   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
339     return;
340   }
341 
342   LocationSummary* locations =
343       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
344   locations->SetInAt(0, Location::RequiresFpuRegister());
345   locations->SetOut(Location::RequiresRegister());
346   locations->AddTemp(Location::RequiresFpuRegister());
347   locations->AddTemp(Location::RequiresFpuRegister());
348 }
349 
VisitMathRoundFloat(HInvoke * invoke)350 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
351   CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
352 }
353 
VisitMathRoundFloat(HInvoke * invoke)354 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
355   LocationSummary* locations = invoke->GetLocations();
356   DCHECK(!locations->WillCall());
357 
358   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
359   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
360   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
361   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
362   NearLabel skip_incr, done;
363   X86_64Assembler* assembler = GetAssembler();
364 
365   // Since no direct x86 rounding instruction matches the required semantics,
366   // this intrinsic is implemented as follows:
367   //  result = floor(in);
368   //  if (in - result >= 0.5f)
369   //    result = result + 1.0f;
370   __ movss(t2, in);
371   __ roundss(t1, in, Immediate(1));
372   __ subss(t2, t1);
373   __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
374   __ j(kBelow, &skip_incr);
375   __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
376   __ Bind(&skip_incr);
377 
378   // Final conversion to an integer. Unfortunately this also does not have a
379   // direct x86 instruction, since NaN should map to 0 and large positive
380   // values need to be clipped to the extreme value.
381   codegen_->Load32BitValue(out, kPrimIntMax);
382   __ cvtsi2ss(t2, out);
383   __ comiss(t1, t2);
384   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
385   __ movl(out, Immediate(0));  // does not change flags
386   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
387   __ cvttss2si(out, t1);
388   __ Bind(&done);
389 }
390 
VisitMathRoundDouble(HInvoke * invoke)391 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
392   CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
393 }
394 
VisitMathRoundDouble(HInvoke * invoke)395 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
396   LocationSummary* locations = invoke->GetLocations();
397   DCHECK(!locations->WillCall());
398 
399   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
400   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
401   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
402   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
403   NearLabel skip_incr, done;
404   X86_64Assembler* assembler = GetAssembler();
405 
406   // Since no direct x86 rounding instruction matches the required semantics,
407   // this intrinsic is implemented as follows:
408   //  result = floor(in);
409   //  if (in - result >= 0.5)
410   //    result = result + 1.0f;
411   __ movsd(t2, in);
412   __ roundsd(t1, in, Immediate(1));
413   __ subsd(t2, t1);
414   __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
415   __ j(kBelow, &skip_incr);
416   __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
417   __ Bind(&skip_incr);
418 
419   // Final conversion to an integer. Unfortunately this also does not have a
420   // direct x86 instruction, since NaN should map to 0 and large positive
421   // values need to be clipped to the extreme value.
422   codegen_->Load64BitValue(out, kPrimLongMax);
423   __ cvtsi2sd(t2, out, /* is64bit= */ true);
424   __ comisd(t1, t2);
425   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
426   __ movl(out, Immediate(0));  // does not change flags, implicit zero extension to 64-bit
427   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
428   __ cvttsd2si(out, t1, /* is64bit= */ true);
429   __ Bind(&done);
430 }
431 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)432 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
433   LocationSummary* locations =
434       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
435   InvokeRuntimeCallingConvention calling_convention;
436   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
437   locations->SetOut(Location::FpuRegisterLocation(XMM0));
438 
439   CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
440 }
441 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)442 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
443                           QuickEntrypointEnum entry) {
444   LocationSummary* locations = invoke->GetLocations();
445   DCHECK(locations->WillCall());
446   DCHECK(invoke->IsInvokeStaticOrDirect());
447 
448   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
449 }
450 
VisitMathCos(HInvoke * invoke)451 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
452   CreateFPToFPCallLocations(allocator_, invoke);
453 }
454 
VisitMathCos(HInvoke * invoke)455 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
456   GenFPToFPCall(invoke, codegen_, kQuickCos);
457 }
458 
VisitMathSin(HInvoke * invoke)459 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
460   CreateFPToFPCallLocations(allocator_, invoke);
461 }
462 
VisitMathSin(HInvoke * invoke)463 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
464   GenFPToFPCall(invoke, codegen_, kQuickSin);
465 }
466 
VisitMathAcos(HInvoke * invoke)467 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
468   CreateFPToFPCallLocations(allocator_, invoke);
469 }
470 
VisitMathAcos(HInvoke * invoke)471 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
472   GenFPToFPCall(invoke, codegen_, kQuickAcos);
473 }
474 
VisitMathAsin(HInvoke * invoke)475 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
476   CreateFPToFPCallLocations(allocator_, invoke);
477 }
478 
VisitMathAsin(HInvoke * invoke)479 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
480   GenFPToFPCall(invoke, codegen_, kQuickAsin);
481 }
482 
VisitMathAtan(HInvoke * invoke)483 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
484   CreateFPToFPCallLocations(allocator_, invoke);
485 }
486 
VisitMathAtan(HInvoke * invoke)487 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
488   GenFPToFPCall(invoke, codegen_, kQuickAtan);
489 }
490 
VisitMathCbrt(HInvoke * invoke)491 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
492   CreateFPToFPCallLocations(allocator_, invoke);
493 }
494 
VisitMathCbrt(HInvoke * invoke)495 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
496   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
497 }
498 
VisitMathCosh(HInvoke * invoke)499 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
500   CreateFPToFPCallLocations(allocator_, invoke);
501 }
502 
VisitMathCosh(HInvoke * invoke)503 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
504   GenFPToFPCall(invoke, codegen_, kQuickCosh);
505 }
506 
VisitMathExp(HInvoke * invoke)507 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
508   CreateFPToFPCallLocations(allocator_, invoke);
509 }
510 
VisitMathExp(HInvoke * invoke)511 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
512   GenFPToFPCall(invoke, codegen_, kQuickExp);
513 }
514 
VisitMathExpm1(HInvoke * invoke)515 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
516   CreateFPToFPCallLocations(allocator_, invoke);
517 }
518 
VisitMathExpm1(HInvoke * invoke)519 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
520   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
521 }
522 
VisitMathLog(HInvoke * invoke)523 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
524   CreateFPToFPCallLocations(allocator_, invoke);
525 }
526 
VisitMathLog(HInvoke * invoke)527 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
528   GenFPToFPCall(invoke, codegen_, kQuickLog);
529 }
530 
VisitMathLog10(HInvoke * invoke)531 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
532   CreateFPToFPCallLocations(allocator_, invoke);
533 }
534 
VisitMathLog10(HInvoke * invoke)535 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
536   GenFPToFPCall(invoke, codegen_, kQuickLog10);
537 }
538 
VisitMathSinh(HInvoke * invoke)539 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
540   CreateFPToFPCallLocations(allocator_, invoke);
541 }
542 
VisitMathSinh(HInvoke * invoke)543 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
544   GenFPToFPCall(invoke, codegen_, kQuickSinh);
545 }
546 
VisitMathTan(HInvoke * invoke)547 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
548   CreateFPToFPCallLocations(allocator_, invoke);
549 }
550 
VisitMathTan(HInvoke * invoke)551 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
552   GenFPToFPCall(invoke, codegen_, kQuickTan);
553 }
554 
VisitMathTanh(HInvoke * invoke)555 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
556   CreateFPToFPCallLocations(allocator_, invoke);
557 }
558 
VisitMathTanh(HInvoke * invoke)559 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
560   GenFPToFPCall(invoke, codegen_, kQuickTanh);
561 }
562 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)563 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
564   LocationSummary* locations =
565       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
566   InvokeRuntimeCallingConvention calling_convention;
567   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
568   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
569   locations->SetOut(Location::FpuRegisterLocation(XMM0));
570 
571   CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
572 }
573 
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)574 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
575   DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
576   LocationSummary* locations =
577       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
578   InvokeRuntimeCallingConvention calling_convention;
579   locations->SetInAt(0, Location::RequiresFpuRegister());
580   locations->SetInAt(1, Location::RequiresFpuRegister());
581   locations->SetInAt(2, Location::RequiresFpuRegister());
582   locations->SetOut(Location::SameAsFirstInput());
583 }
584 
VisitMathAtan2(HInvoke * invoke)585 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
586   CreateFPFPToFPCallLocations(allocator_, invoke);
587 }
588 
VisitMathAtan2(HInvoke * invoke)589 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
590   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
591 }
592 
VisitMathPow(HInvoke * invoke)593 void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
594   CreateFPFPToFPCallLocations(allocator_, invoke);
595 }
596 
VisitMathPow(HInvoke * invoke)597 void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
598   GenFPToFPCall(invoke, codegen_, kQuickPow);
599 }
600 
VisitMathHypot(HInvoke * invoke)601 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
602   CreateFPFPToFPCallLocations(allocator_, invoke);
603 }
604 
VisitMathHypot(HInvoke * invoke)605 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
606   GenFPToFPCall(invoke, codegen_, kQuickHypot);
607 }
608 
VisitMathNextAfter(HInvoke * invoke)609 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
610   CreateFPFPToFPCallLocations(allocator_, invoke);
611 }
612 
VisitMathNextAfter(HInvoke * invoke)613 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
614   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
615 }
616 
CreateSystemArrayCopyLocations(HInvoke * invoke)617 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
618   // Check to see if we have known failures that will cause us to have to bail out
619   // to the runtime, and just generate the runtime call directly.
620   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
621   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
622 
623   // The positions must be non-negative.
624   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
625       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
626     // We will have to fail anyways.
627     return;
628   }
629 
630   // The length must be > 0.
631   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
632   if (length != nullptr) {
633     int32_t len = length->GetValue();
634     if (len < 0) {
635       // Just call as normal.
636       return;
637     }
638   }
639   LocationSummary* locations =
640       new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary
641       (invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
642   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
643   locations->SetInAt(0, Location::RequiresRegister());
644   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
645   locations->SetInAt(2, Location::RequiresRegister());
646   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
647   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
648 
649   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
650   locations->AddTemp(Location::RegisterLocation(RSI));
651   locations->AddTemp(Location::RegisterLocation(RDI));
652   locations->AddTemp(Location::RegisterLocation(RCX));
653 }
654 
CheckPosition(X86_64Assembler * assembler,Location pos,CpuRegister input,Location length,SlowPathCode * slow_path,CpuRegister temp,bool length_is_input_length=false)655 static void CheckPosition(X86_64Assembler* assembler,
656                           Location pos,
657                           CpuRegister input,
658                           Location length,
659                           SlowPathCode* slow_path,
660                           CpuRegister temp,
661                           bool length_is_input_length = false) {
662   // Where is the length in the Array?
663   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
664 
665   if (pos.IsConstant()) {
666     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
667     if (pos_const == 0) {
668       if (!length_is_input_length) {
669         // Check that length(input) >= length.
670         if (length.IsConstant()) {
671           __ cmpl(Address(input, length_offset),
672                   Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
673         } else {
674           __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>());
675         }
676         __ j(kLess, slow_path->GetEntryLabel());
677       }
678     } else {
679       // Check that length(input) >= pos.
680       __ movl(temp, Address(input, length_offset));
681       __ subl(temp, Immediate(pos_const));
682       __ j(kLess, slow_path->GetEntryLabel());
683 
684       // Check that (length(input) - pos) >= length.
685       if (length.IsConstant()) {
686         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
687       } else {
688         __ cmpl(temp, length.AsRegister<CpuRegister>());
689       }
690       __ j(kLess, slow_path->GetEntryLabel());
691     }
692   } else if (length_is_input_length) {
693     // The only way the copy can succeed is if pos is zero.
694     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
695     __ testl(pos_reg, pos_reg);
696     __ j(kNotEqual, slow_path->GetEntryLabel());
697   } else {
698     // Check that pos >= 0.
699     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
700     __ testl(pos_reg, pos_reg);
701     __ j(kLess, slow_path->GetEntryLabel());
702 
703     // Check that pos <= length(input).
704     __ cmpl(Address(input, length_offset), pos_reg);
705     __ j(kLess, slow_path->GetEntryLabel());
706 
707     // Check that (length(input) - pos) >= length.
708     __ movl(temp, Address(input, length_offset));
709     __ subl(temp, pos_reg);
710     if (length.IsConstant()) {
711       __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
712     } else {
713       __ cmpl(temp, length.AsRegister<CpuRegister>());
714     }
715     __ j(kLess, slow_path->GetEntryLabel());
716   }
717 }
718 
SystemArrayCopyPrimitive(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,DataType::Type type)719 static void SystemArrayCopyPrimitive(HInvoke* invoke,
720                                      X86_64Assembler* assembler,
721                                      CodeGeneratorX86_64* codegen,
722                                      DataType::Type type) {
723   LocationSummary* locations = invoke->GetLocations();
724   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
725   Location src_pos = locations->InAt(1);
726   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
727   Location dest_pos = locations->InAt(3);
728   Location length = locations->InAt(4);
729 
730   // Temporaries that we need for MOVSB/W/L.
731   CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
732   DCHECK_EQ(src_base.AsRegister(), RSI);
733   CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
734   DCHECK_EQ(dest_base.AsRegister(), RDI);
735   CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
736   DCHECK_EQ(count.AsRegister(), RCX);
737 
738   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
739   codegen->AddSlowPath(slow_path);
740 
741   // Bail out if the source and destination are the same.
742   __ cmpl(src, dest);
743   __ j(kEqual, slow_path->GetEntryLabel());
744 
745   // Bail out if the source is null.
746   __ testl(src, src);
747   __ j(kEqual, slow_path->GetEntryLabel());
748 
749   // Bail out if the destination is null.
750   __ testl(dest, dest);
751   __ j(kEqual, slow_path->GetEntryLabel());
752 
753   // If the length is negative, bail out.
754   // We have already checked in the LocationsBuilder for the constant case.
755   if (!length.IsConstant()) {
756     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
757     __ j(kLess, slow_path->GetEntryLabel());
758   }
759 
760   // Validity checks: source. Use src_base as a temporary register.
761   CheckPosition(assembler, src_pos, src, length, slow_path, src_base);
762 
763   // Validity checks: dest. Use src_base as a temporary register.
764   CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base);
765 
766   // We need the count in RCX.
767   if (length.IsConstant()) {
768     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
769   } else {
770     __ movl(count, length.AsRegister<CpuRegister>());
771   }
772 
773   // Okay, everything checks out.  Finally time to do the copy.
774   // Check assumption that sizeof(Char) is 2 (used in scaling below).
775   const size_t data_size = DataType::Size(type);
776   const ScaleFactor scale_factor = CodeGenerator::ScaleFactorForType(type);
777   const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
778 
779   if (src_pos.IsConstant()) {
780     int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
781     __ leal(src_base, Address(src, data_size * src_pos_const + data_offset));
782   } else {
783     __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
784   }
785   if (dest_pos.IsConstant()) {
786     int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
787     __ leal(dest_base, Address(dest, data_size * dest_pos_const + data_offset));
788   } else {
789     __ leal(dest_base,
790             Address(dest, dest_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
791   }
792 
793   // Do the move.
794   switch (type) {
795     case DataType::Type::kInt8:
796        __ rep_movsb();
797        break;
798     case DataType::Type::kUint16:
799        __ rep_movsw();
800        break;
801     case DataType::Type::kInt32:
802        __ rep_movsl();
803        break;
804     default:
805        LOG(FATAL) << "Unexpected data type for intrinsic";
806   }
807   __ Bind(slow_path->GetExitLabel());
808 }
809 
VisitSystemArrayCopyChar(HInvoke * invoke)810 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
811   CreateSystemArrayCopyLocations(invoke);
812 }
VisitSystemArrayCopyChar(HInvoke * invoke)813 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
814   X86_64Assembler* assembler = GetAssembler();
815   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
816 }
817 
VisitSystemArrayCopyByte(HInvoke * invoke)818 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
819   X86_64Assembler* assembler = GetAssembler();
820   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
821 }
822 
VisitSystemArrayCopyByte(HInvoke * invoke)823 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
824   CreateSystemArrayCopyLocations(invoke);
825 }
826 
VisitSystemArrayCopyInt(HInvoke * invoke)827 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
828   X86_64Assembler* assembler = GetAssembler();
829   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
830 }
831 
VisitSystemArrayCopyInt(HInvoke * invoke)832 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
833   CreateSystemArrayCopyLocations(invoke);
834 }
835 
VisitSystemArrayCopy(HInvoke * invoke)836 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
837   // The only read barrier implementation supporting the
838   // SystemArrayCopy intrinsic is the Baker-style read barriers.
839   if (gUseReadBarrier && !kUseBakerReadBarrier) {
840     return;
841   }
842 
843   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
844 }
845 
846 // Compute base source address, base destination address, and end
847 // source address for the System.arraycopy intrinsic in `src_base`,
848 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(X86_64Assembler * assembler,DataType::Type type,const CpuRegister & src,const Location & src_pos,const CpuRegister & dst,const Location & dst_pos,const Location & copy_length,const CpuRegister & src_base,const CpuRegister & dst_base,const CpuRegister & src_end)849 static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler,
850                                         DataType::Type type,
851                                         const CpuRegister& src,
852                                         const Location& src_pos,
853                                         const CpuRegister& dst,
854                                         const Location& dst_pos,
855                                         const Location& copy_length,
856                                         const CpuRegister& src_base,
857                                         const CpuRegister& dst_base,
858                                         const CpuRegister& src_end) {
859   // This routine is only used by the SystemArrayCopy intrinsic.
860   DCHECK_EQ(type, DataType::Type::kReference);
861   const int32_t element_size = DataType::Size(type);
862   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
863   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
864 
865   if (src_pos.IsConstant()) {
866     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
867     __ leal(src_base, Address(src, element_size * constant + data_offset));
868   } else {
869     __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
870   }
871 
872   if (dst_pos.IsConstant()) {
873     int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
874     __ leal(dst_base, Address(dst, element_size * constant + data_offset));
875   } else {
876     __ leal(dst_base, Address(dst, dst_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
877   }
878 
879   if (copy_length.IsConstant()) {
880     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
881     __ leal(src_end, Address(src_base, element_size * constant));
882   } else {
883     __ leal(src_end, Address(src_base, copy_length.AsRegister<CpuRegister>(), scale_factor, 0));
884   }
885 }
886 
VisitSystemArrayCopy(HInvoke * invoke)887 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
888   // The only read barrier implementation supporting the
889   // SystemArrayCopy intrinsic is the Baker-style read barriers.
890   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
891 
892   X86_64Assembler* assembler = GetAssembler();
893   LocationSummary* locations = invoke->GetLocations();
894 
895   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
896   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
897   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
898   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
899   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
900 
901   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
902   Location src_pos = locations->InAt(1);
903   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
904   Location dest_pos = locations->InAt(3);
905   Location length = locations->InAt(4);
906   Location temp1_loc = locations->GetTemp(0);
907   CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
908   Location temp2_loc = locations->GetTemp(1);
909   CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
910   Location temp3_loc = locations->GetTemp(2);
911   CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
912   Location TMP_loc = Location::RegisterLocation(TMP);
913 
914   SlowPathCode* intrinsic_slow_path =
915       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
916   codegen_->AddSlowPath(intrinsic_slow_path);
917 
918   NearLabel conditions_on_positions_validated;
919   SystemArrayCopyOptimizations optimizations(invoke);
920 
921   // If source and destination are the same, we go to slow path if we need to do
922   // forward copying.
923   if (src_pos.IsConstant()) {
924     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
925     if (dest_pos.IsConstant()) {
926       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
927       if (optimizations.GetDestinationIsSource()) {
928         // Checked when building locations.
929         DCHECK_GE(src_pos_constant, dest_pos_constant);
930       } else if (src_pos_constant < dest_pos_constant) {
931         __ cmpl(src, dest);
932         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
933       }
934     } else {
935       if (!optimizations.GetDestinationIsSource()) {
936         __ cmpl(src, dest);
937         __ j(kNotEqual, &conditions_on_positions_validated);
938       }
939       __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
940       __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
941     }
942   } else {
943     if (!optimizations.GetDestinationIsSource()) {
944       __ cmpl(src, dest);
945       __ j(kNotEqual, &conditions_on_positions_validated);
946     }
947     if (dest_pos.IsConstant()) {
948       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
949       __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
950       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
951     } else {
952       __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
953       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
954     }
955   }
956 
957   __ Bind(&conditions_on_positions_validated);
958 
959   if (!optimizations.GetSourceIsNotNull()) {
960     // Bail out if the source is null.
961     __ testl(src, src);
962     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
963   }
964 
965   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
966     // Bail out if the destination is null.
967     __ testl(dest, dest);
968     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
969   }
970 
971   // If the length is negative, bail out.
972   // We have already checked in the LocationsBuilder for the constant case.
973   if (!length.IsConstant() &&
974       !optimizations.GetCountIsSourceLength() &&
975       !optimizations.GetCountIsDestinationLength()) {
976     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
977     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
978   }
979 
980   // Validity checks: source.
981   CheckPosition(assembler,
982                 src_pos,
983                 src,
984                 length,
985                 intrinsic_slow_path,
986                 temp1,
987                 optimizations.GetCountIsSourceLength());
988 
989   // Validity checks: dest.
990   CheckPosition(assembler,
991                 dest_pos,
992                 dest,
993                 length,
994                 intrinsic_slow_path,
995                 temp1,
996                 optimizations.GetCountIsDestinationLength());
997 
998   if (!optimizations.GetDoesNotNeedTypeCheck()) {
999     // Check whether all elements of the source array are assignable to the component
1000     // type of the destination array. We do two checks: the classes are the same,
1001     // or the destination is Object[]. If none of these checks succeed, we go to the
1002     // slow path.
1003 
1004     bool did_unpoison = false;
1005     if (gUseReadBarrier && kUseBakerReadBarrier) {
1006       // /* HeapReference<Class> */ temp1 = dest->klass_
1007       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1008           invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
1009       // Register `temp1` is not trashed by the read barrier emitted
1010       // by GenerateFieldLoadWithBakerReadBarrier below, as that
1011       // method produces a call to a ReadBarrierMarkRegX entry point,
1012       // which saves all potentially live registers, including
1013       // temporaries such a `temp1`.
1014       // /* HeapReference<Class> */ temp2 = src->klass_
1015       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1016           invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
1017       // If heap poisoning is enabled, `temp1` and `temp2` have been
1018       // unpoisoned by the the previous calls to
1019       // GenerateFieldLoadWithBakerReadBarrier.
1020     } else {
1021       // /* HeapReference<Class> */ temp1 = dest->klass_
1022       __ movl(temp1, Address(dest, class_offset));
1023       // /* HeapReference<Class> */ temp2 = src->klass_
1024       __ movl(temp2, Address(src, class_offset));
1025       if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1026           !optimizations.GetSourceIsNonPrimitiveArray()) {
1027         // One or two of the references need to be unpoisoned. Unpoison them
1028         // both to make the identity check valid.
1029         __ MaybeUnpoisonHeapReference(temp1);
1030         __ MaybeUnpoisonHeapReference(temp2);
1031         did_unpoison = true;
1032       }
1033     }
1034 
1035     if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1036       // Bail out if the destination is not a non primitive array.
1037       if (gUseReadBarrier && kUseBakerReadBarrier) {
1038         // /* HeapReference<Class> */ TMP = temp1->component_type_
1039         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1040             invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
1041         __ testl(CpuRegister(TMP), CpuRegister(TMP));
1042         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1043         // If heap poisoning is enabled, `TMP` has been unpoisoned by
1044         // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1045       } else {
1046         // /* HeapReference<Class> */ TMP = temp1->component_type_
1047         __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1048         __ testl(CpuRegister(TMP), CpuRegister(TMP));
1049         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1050         __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1051       }
1052       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1053       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1054     }
1055 
1056     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1057       // Bail out if the source is not a non primitive array.
1058       if (gUseReadBarrier && kUseBakerReadBarrier) {
1059         // For the same reason given earlier, `temp1` is not trashed by the
1060         // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1061         // /* HeapReference<Class> */ TMP = temp2->component_type_
1062         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1063             invoke, TMP_loc, temp2, component_offset, /* needs_null_check= */ false);
1064         __ testl(CpuRegister(TMP), CpuRegister(TMP));
1065         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1066         // If heap poisoning is enabled, `TMP` has been unpoisoned by
1067         // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1068       } else {
1069         // /* HeapReference<Class> */ TMP = temp2->component_type_
1070         __ movl(CpuRegister(TMP), Address(temp2, component_offset));
1071         __ testl(CpuRegister(TMP), CpuRegister(TMP));
1072         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1073         __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1074       }
1075       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1076       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1077     }
1078 
1079     __ cmpl(temp1, temp2);
1080 
1081     if (optimizations.GetDestinationIsTypedObjectArray()) {
1082       NearLabel do_copy;
1083       __ j(kEqual, &do_copy);
1084       if (gUseReadBarrier && kUseBakerReadBarrier) {
1085         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1086         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1087             invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
1088         // We do not need to emit a read barrier for the following
1089         // heap reference load, as `temp1` is only used in a
1090         // comparison with null below, and this reference is not
1091         // kept afterwards.
1092         __ cmpl(Address(temp1, super_offset), Immediate(0));
1093       } else {
1094         if (!did_unpoison) {
1095           __ MaybeUnpoisonHeapReference(temp1);
1096         }
1097         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1098         __ movl(temp1, Address(temp1, component_offset));
1099         __ MaybeUnpoisonHeapReference(temp1);
1100         // No need to unpoison the following heap reference load, as
1101         // we're comparing against null.
1102         __ cmpl(Address(temp1, super_offset), Immediate(0));
1103       }
1104       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1105       __ Bind(&do_copy);
1106     } else {
1107       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1108     }
1109   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1110     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1111     // Bail out if the source is not a non primitive array.
1112     if (gUseReadBarrier && kUseBakerReadBarrier) {
1113       // /* HeapReference<Class> */ temp1 = src->klass_
1114       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1115           invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
1116       // /* HeapReference<Class> */ TMP = temp1->component_type_
1117       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1118           invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
1119       __ testl(CpuRegister(TMP), CpuRegister(TMP));
1120       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1121     } else {
1122       // /* HeapReference<Class> */ temp1 = src->klass_
1123       __ movl(temp1, Address(src, class_offset));
1124       __ MaybeUnpoisonHeapReference(temp1);
1125       // /* HeapReference<Class> */ TMP = temp1->component_type_
1126       __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1127       // No need to unpoison `TMP` now, as we're comparing against null.
1128       __ testl(CpuRegister(TMP), CpuRegister(TMP));
1129       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1130       __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1131     }
1132     __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1133     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1134   }
1135 
1136   const DataType::Type type = DataType::Type::kReference;
1137   const int32_t element_size = DataType::Size(type);
1138 
1139   // Compute base source address, base destination address, and end
1140   // source address in `temp1`, `temp2` and `temp3` respectively.
1141   GenSystemArrayCopyAddresses(
1142       GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3);
1143 
1144   if (gUseReadBarrier && kUseBakerReadBarrier) {
1145     // SystemArrayCopy implementation for Baker read barriers (see
1146     // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
1147     //
1148     //   if (src_ptr != end_ptr) {
1149     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1150     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1151     //     bool is_gray = (rb_state == ReadBarrier::GrayState());
1152     //     if (is_gray) {
1153     //       // Slow-path copy.
1154     //       do {
1155     //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1156     //       } while (src_ptr != end_ptr)
1157     //     } else {
1158     //       // Fast-path copy.
1159     //       do {
1160     //         *dest_ptr++ = *src_ptr++;
1161     //       } while (src_ptr != end_ptr)
1162     //     }
1163     //   }
1164 
1165     NearLabel loop, done;
1166 
1167     // Don't enter copy loop if `length == 0`.
1168     __ cmpl(temp1, temp3);
1169     __ j(kEqual, &done);
1170 
1171     // Given the numeric representation, it's enough to check the low bit of the rb_state.
1172     static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1173     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1174     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
1175     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
1176     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
1177 
1178     // if (rb_state == ReadBarrier::GrayState())
1179     //   goto slow_path;
1180     // At this point, just do the "if" and make sure that flags are preserved until the branch.
1181     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
1182 
1183     // Load fence to prevent load-load reordering.
1184     // Note that this is a no-op, thanks to the x86-64 memory model.
1185     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
1186 
1187     // Slow path used to copy array when `src` is gray.
1188     SlowPathCode* read_barrier_slow_path =
1189         new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
1190     codegen_->AddSlowPath(read_barrier_slow_path);
1191 
1192     // We have done the "if" of the gray bit check above, now branch based on the flags.
1193     __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
1194 
1195     // Fast-path copy.
1196     // Iterate over the arrays and do a raw copy of the objects. We don't need to
1197     // poison/unpoison.
1198     __ Bind(&loop);
1199     __ movl(CpuRegister(TMP), Address(temp1, 0));
1200     __ movl(Address(temp2, 0), CpuRegister(TMP));
1201     __ addl(temp1, Immediate(element_size));
1202     __ addl(temp2, Immediate(element_size));
1203     __ cmpl(temp1, temp3);
1204     __ j(kNotEqual, &loop);
1205 
1206     __ Bind(read_barrier_slow_path->GetExitLabel());
1207     __ Bind(&done);
1208   } else {
1209     // Non read barrier code.
1210 
1211     // Iterate over the arrays and do a raw copy of the objects. We don't need to
1212     // poison/unpoison.
1213     NearLabel loop, done;
1214     __ cmpl(temp1, temp3);
1215     __ j(kEqual, &done);
1216     __ Bind(&loop);
1217     __ movl(CpuRegister(TMP), Address(temp1, 0));
1218     __ movl(Address(temp2, 0), CpuRegister(TMP));
1219     __ addl(temp1, Immediate(element_size));
1220     __ addl(temp2, Immediate(element_size));
1221     __ cmpl(temp1, temp3);
1222     __ j(kNotEqual, &loop);
1223     __ Bind(&done);
1224   }
1225 
1226   // We only need one card marking on the destination array.
1227   codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* emit_null_check= */ false);
1228 
1229   __ Bind(intrinsic_slow_path->GetExitLabel());
1230 }
1231 
VisitStringCompareTo(HInvoke * invoke)1232 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1233   LocationSummary* locations = new (allocator_) LocationSummary(
1234       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1235   InvokeRuntimeCallingConvention calling_convention;
1236   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1237   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1238   locations->SetOut(Location::RegisterLocation(RAX));
1239 }
1240 
VisitStringCompareTo(HInvoke * invoke)1241 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1242   X86_64Assembler* assembler = GetAssembler();
1243   LocationSummary* locations = invoke->GetLocations();
1244 
1245   // Note that the null check must have been done earlier.
1246   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1247 
1248   CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1249   __ testl(argument, argument);
1250   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1251   codegen_->AddSlowPath(slow_path);
1252   __ j(kEqual, slow_path->GetEntryLabel());
1253 
1254   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1255   __ Bind(slow_path->GetExitLabel());
1256 }
1257 
VisitStringEquals(HInvoke * invoke)1258 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1259   LocationSummary* locations =
1260       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1261   locations->SetInAt(0, Location::RequiresRegister());
1262   locations->SetInAt(1, Location::RequiresRegister());
1263 
1264   // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1265   locations->AddTemp(Location::RegisterLocation(RCX));
1266   locations->AddTemp(Location::RegisterLocation(RDI));
1267 
1268   // Set output, RSI needed for repe_cmpsq instruction anyways.
1269   locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1270 }
1271 
VisitStringEquals(HInvoke * invoke)1272 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1273   X86_64Assembler* assembler = GetAssembler();
1274   LocationSummary* locations = invoke->GetLocations();
1275 
1276   CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1277   CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1278   CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1279   CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1280   CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1281 
1282   NearLabel end, return_true, return_false;
1283 
1284   // Get offsets of count, value, and class fields within a string object.
1285   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1286   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1287   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1288 
1289   // Note that the null check must have been done earlier.
1290   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1291 
1292   StringEqualsOptimizations optimizations(invoke);
1293   if (!optimizations.GetArgumentNotNull()) {
1294     // Check if input is null, return false if it is.
1295     __ testl(arg, arg);
1296     __ j(kEqual, &return_false);
1297   }
1298 
1299   if (!optimizations.GetArgumentIsString()) {
1300     // Instanceof check for the argument by comparing class fields.
1301     // All string objects must have the same type since String cannot be subclassed.
1302     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1303     // If the argument is a string object, its class field must be equal to receiver's class field.
1304     //
1305     // As the String class is expected to be non-movable, we can read the class
1306     // field from String.equals' arguments without read barriers.
1307     AssertNonMovableStringClass();
1308     // Also, because we use the loaded class references only to compare them, we
1309     // don't need to unpoison them.
1310     // /* HeapReference<Class> */ rcx = str->klass_
1311     __ movl(rcx, Address(str, class_offset));
1312     // if (rcx != /* HeapReference<Class> */ arg->klass_) return false
1313     __ cmpl(rcx, Address(arg, class_offset));
1314     __ j(kNotEqual, &return_false);
1315   }
1316 
1317   // Reference equality check, return true if same reference.
1318   __ cmpl(str, arg);
1319   __ j(kEqual, &return_true);
1320 
1321   // Load length and compression flag of receiver string.
1322   __ movl(rcx, Address(str, count_offset));
1323   // Check if lengths and compressiond flags are equal, return false if they're not.
1324   // Two identical strings will always have same compression style since
1325   // compression style is decided on alloc.
1326   __ cmpl(rcx, Address(arg, count_offset));
1327   __ j(kNotEqual, &return_false);
1328   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1329   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1330                 "Expecting 0=compressed, 1=uncompressed");
1331   __ jrcxz(&return_true);
1332 
1333   if (mirror::kUseStringCompression) {
1334     NearLabel string_uncompressed;
1335     // Extract length and differentiate between both compressed or both uncompressed.
1336     // Different compression style is cut above.
1337     __ shrl(rcx, Immediate(1));
1338     __ j(kCarrySet, &string_uncompressed);
1339     // Divide string length by 2, rounding up, and continue as if uncompressed.
1340     // Merge clearing the compression flag with +1 for rounding.
1341     __ addl(rcx, Immediate(1));
1342     __ shrl(rcx, Immediate(1));
1343     __ Bind(&string_uncompressed);
1344   }
1345   // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1346   __ leal(rsi, Address(str, value_offset));
1347   __ leal(rdi, Address(arg, value_offset));
1348 
1349   // Divide string length by 4 and adjust for lengths not divisible by 4.
1350   __ addl(rcx, Immediate(3));
1351   __ shrl(rcx, Immediate(2));
1352 
1353   // Assertions that must hold in order to compare strings 4 characters (uncompressed)
1354   // or 8 characters (compressed) at a time.
1355   DCHECK_ALIGNED(value_offset, 8);
1356   static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1357 
1358   // Loop to compare strings four characters at a time starting at the beginning of the string.
1359   __ repe_cmpsq();
1360   // If strings are not equal, zero flag will be cleared.
1361   __ j(kNotEqual, &return_false);
1362 
1363   // Return true and exit the function.
1364   // If loop does not result in returning false, we return true.
1365   __ Bind(&return_true);
1366   __ movl(rsi, Immediate(1));
1367   __ jmp(&end);
1368 
1369   // Return false and exit the function.
1370   __ Bind(&return_false);
1371   __ xorl(rsi, rsi);
1372   __ Bind(&end);
1373 }
1374 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1375 static void CreateStringIndexOfLocations(HInvoke* invoke,
1376                                          ArenaAllocator* allocator,
1377                                          bool start_at_zero) {
1378   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1379                                                                LocationSummary::kCallOnSlowPath,
1380                                                                kIntrinsified);
1381   // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1382   locations->SetInAt(0, Location::RegisterLocation(RDI));
1383   // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1384   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1385   // of the instruction explicitly.
1386   // Note: This works as we don't clobber RAX anywhere.
1387   locations->SetInAt(1, Location::RegisterLocation(RAX));
1388   if (!start_at_zero) {
1389     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1390   }
1391   // As we clobber RDI during execution anyways, also use it as the output.
1392   locations->SetOut(Location::SameAsFirstInput());
1393 
1394   // repne scasw uses RCX as the counter.
1395   locations->AddTemp(Location::RegisterLocation(RCX));
1396   // Need another temporary to be able to compute the result.
1397   locations->AddTemp(Location::RequiresRegister());
1398 }
1399 
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,bool start_at_zero)1400 static void GenerateStringIndexOf(HInvoke* invoke,
1401                                   X86_64Assembler* assembler,
1402                                   CodeGeneratorX86_64* codegen,
1403                                   bool start_at_zero) {
1404   LocationSummary* locations = invoke->GetLocations();
1405 
1406   // Note that the null check must have been done earlier.
1407   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1408 
1409   CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1410   CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1411   CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1412   CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1413   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1414 
1415   // Check our assumptions for registers.
1416   DCHECK_EQ(string_obj.AsRegister(), RDI);
1417   DCHECK_EQ(search_value.AsRegister(), RAX);
1418   DCHECK_EQ(counter.AsRegister(), RCX);
1419   DCHECK_EQ(out.AsRegister(), RDI);
1420 
1421   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1422   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1423   SlowPathCode* slow_path = nullptr;
1424   HInstruction* code_point = invoke->InputAt(1);
1425   if (code_point->IsIntConstant()) {
1426     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1427     std::numeric_limits<uint16_t>::max()) {
1428       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1429       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1430       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1431       codegen->AddSlowPath(slow_path);
1432       __ jmp(slow_path->GetEntryLabel());
1433       __ Bind(slow_path->GetExitLabel());
1434       return;
1435     }
1436   } else if (code_point->GetType() != DataType::Type::kUint16) {
1437     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1438     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1439     codegen->AddSlowPath(slow_path);
1440     __ j(kAbove, slow_path->GetEntryLabel());
1441   }
1442 
1443   // From here down, we know that we are looking for a char that fits in
1444   // 16 bits (uncompressed) or 8 bits (compressed).
1445   // Location of reference to data array within the String object.
1446   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1447   // Location of count within the String object.
1448   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1449 
1450   // Load the count field of the string containing the length and compression flag.
1451   __ movl(string_length, Address(string_obj, count_offset));
1452 
1453   // Do a zero-length check. Even with string compression `count == 0` means empty.
1454   // TODO: Support jecxz.
1455   NearLabel not_found_label;
1456   __ testl(string_length, string_length);
1457   __ j(kEqual, &not_found_label);
1458 
1459   if (mirror::kUseStringCompression) {
1460     // Use TMP to keep string_length_flagged.
1461     __ movl(CpuRegister(TMP), string_length);
1462     // Mask out first bit used as compression flag.
1463     __ shrl(string_length, Immediate(1));
1464   }
1465 
1466   if (start_at_zero) {
1467     // Number of chars to scan is the same as the string length.
1468     __ movl(counter, string_length);
1469     // Move to the start of the string.
1470     __ addq(string_obj, Immediate(value_offset));
1471   } else {
1472     CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1473 
1474     // Do a start_index check.
1475     __ cmpl(start_index, string_length);
1476     __ j(kGreaterEqual, &not_found_label);
1477 
1478     // Ensure we have a start index >= 0;
1479     __ xorl(counter, counter);
1480     __ cmpl(start_index, Immediate(0));
1481     __ cmov(kGreater, counter, start_index, /* is64bit= */ false);  // 32-bit copy is enough.
1482 
1483     if (mirror::kUseStringCompression) {
1484       NearLabel modify_counter, offset_uncompressed_label;
1485       __ testl(CpuRegister(TMP), Immediate(1));
1486       __ j(kNotZero, &offset_uncompressed_label);
1487       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1488       __ jmp(&modify_counter);
1489       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1490       __ Bind(&offset_uncompressed_label);
1491       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1492       __ Bind(&modify_counter);
1493     } else {
1494       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1495     }
1496     // Now update ecx, the work counter: it's gonna be string.length - start_index.
1497     __ negq(counter);  // Needs to be 64-bit negation, as the address computation is 64-bit.
1498     __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1499   }
1500 
1501   if (mirror::kUseStringCompression) {
1502     NearLabel uncompressed_string_comparison;
1503     NearLabel comparison_done;
1504     __ testl(CpuRegister(TMP), Immediate(1));
1505     __ j(kNotZero, &uncompressed_string_comparison);
1506     // Check if RAX (search_value) is ASCII.
1507     __ cmpl(search_value, Immediate(127));
1508     __ j(kGreater, &not_found_label);
1509     // Comparing byte-per-byte.
1510     __ repne_scasb();
1511     __ jmp(&comparison_done);
1512     // Everything is set up for repne scasw:
1513     //   * Comparison address in RDI.
1514     //   * Counter in ECX.
1515     __ Bind(&uncompressed_string_comparison);
1516     __ repne_scasw();
1517     __ Bind(&comparison_done);
1518   } else {
1519     __ repne_scasw();
1520   }
1521   // Did we find a match?
1522   __ j(kNotEqual, &not_found_label);
1523 
1524   // Yes, we matched.  Compute the index of the result.
1525   __ subl(string_length, counter);
1526   __ leal(out, Address(string_length, -1));
1527 
1528   NearLabel done;
1529   __ jmp(&done);
1530 
1531   // Failed to match; return -1.
1532   __ Bind(&not_found_label);
1533   __ movl(out, Immediate(-1));
1534 
1535   // And join up at the end.
1536   __ Bind(&done);
1537   if (slow_path != nullptr) {
1538     __ Bind(slow_path->GetExitLabel());
1539   }
1540 }
1541 
VisitStringIndexOf(HInvoke * invoke)1542 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1543   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1544 }
1545 
VisitStringIndexOf(HInvoke * invoke)1546 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1547   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1548 }
1549 
VisitStringIndexOfAfter(HInvoke * invoke)1550 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1551   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1552 }
1553 
VisitStringIndexOfAfter(HInvoke * invoke)1554 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1555   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1556 }
1557 
VisitStringNewStringFromBytes(HInvoke * invoke)1558 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1559   LocationSummary* locations = new (allocator_) LocationSummary(
1560       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1561   InvokeRuntimeCallingConvention calling_convention;
1562   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1563   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1564   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1565   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1566   locations->SetOut(Location::RegisterLocation(RAX));
1567 }
1568 
VisitStringNewStringFromBytes(HInvoke * invoke)1569 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1570   X86_64Assembler* assembler = GetAssembler();
1571   LocationSummary* locations = invoke->GetLocations();
1572 
1573   CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1574   __ testl(byte_array, byte_array);
1575   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1576   codegen_->AddSlowPath(slow_path);
1577   __ j(kEqual, slow_path->GetEntryLabel());
1578 
1579   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1580   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1581   __ Bind(slow_path->GetExitLabel());
1582 }
1583 
VisitStringNewStringFromChars(HInvoke * invoke)1584 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1585   LocationSummary* locations =
1586       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1587   InvokeRuntimeCallingConvention calling_convention;
1588   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1589   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1590   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1591   locations->SetOut(Location::RegisterLocation(RAX));
1592 }
1593 
VisitStringNewStringFromChars(HInvoke * invoke)1594 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1595   // No need to emit code checking whether `locations->InAt(2)` is a null
1596   // pointer, as callers of the native method
1597   //
1598   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1599   //
1600   // all include a null check on `data` before calling that method.
1601   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1602   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1603 }
1604 
VisitStringNewStringFromString(HInvoke * invoke)1605 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1606   LocationSummary* locations = new (allocator_) LocationSummary(
1607       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1608   InvokeRuntimeCallingConvention calling_convention;
1609   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1610   locations->SetOut(Location::RegisterLocation(RAX));
1611 }
1612 
VisitStringNewStringFromString(HInvoke * invoke)1613 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1614   X86_64Assembler* assembler = GetAssembler();
1615   LocationSummary* locations = invoke->GetLocations();
1616 
1617   CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1618   __ testl(string_to_copy, string_to_copy);
1619   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1620   codegen_->AddSlowPath(slow_path);
1621   __ j(kEqual, slow_path->GetEntryLabel());
1622 
1623   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1624   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1625   __ Bind(slow_path->GetExitLabel());
1626 }
1627 
VisitStringGetCharsNoCheck(HInvoke * invoke)1628 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1629   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1630   LocationSummary* locations =
1631       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1632   locations->SetInAt(0, Location::RequiresRegister());
1633   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1634   locations->SetInAt(2, Location::RequiresRegister());
1635   locations->SetInAt(3, Location::RequiresRegister());
1636   locations->SetInAt(4, Location::RequiresRegister());
1637 
1638   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1639   locations->AddTemp(Location::RegisterLocation(RSI));
1640   locations->AddTemp(Location::RegisterLocation(RDI));
1641   locations->AddTemp(Location::RegisterLocation(RCX));
1642 }
1643 
VisitStringGetCharsNoCheck(HInvoke * invoke)1644 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1645   X86_64Assembler* assembler = GetAssembler();
1646   LocationSummary* locations = invoke->GetLocations();
1647 
1648   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1649   // Location of data in char array buffer.
1650   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1651   // Location of char array data in string.
1652   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1653 
1654   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1655   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1656   Location srcBegin = locations->InAt(1);
1657   int srcBegin_value =
1658     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1659   CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1660   CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1661   CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1662 
1663   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1664   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1665   DCHECK_EQ(char_size, 2u);
1666 
1667   NearLabel done;
1668   // Compute the number of chars (words) to move.
1669   __ movl(CpuRegister(RCX), srcEnd);
1670   if (srcBegin.IsConstant()) {
1671     __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1672   } else {
1673     DCHECK(srcBegin.IsRegister());
1674     __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1675   }
1676   if (mirror::kUseStringCompression) {
1677     NearLabel copy_uncompressed, copy_loop;
1678     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1679     DCHECK_EQ(c_char_size, 1u);
1680     // Location of count in string.
1681     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1682 
1683     __ testl(Address(obj, count_offset), Immediate(1));
1684     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1685                   "Expecting 0=compressed, 1=uncompressed");
1686     __ j(kNotZero, &copy_uncompressed);
1687     // Compute the address of the source string by adding the number of chars from
1688     // the source beginning to the value offset of a string.
1689     __ leaq(CpuRegister(RSI),
1690             CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1691     // Start the loop to copy String's value to Array of Char.
1692     __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1693 
1694     __ Bind(&copy_loop);
1695     __ jrcxz(&done);
1696     // Use TMP as temporary (convert byte from RSI to word).
1697     // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
1698     __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
1699     __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
1700     __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
1701     __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
1702     // TODO: Add support for LOOP to X86_64Assembler.
1703     __ subl(CpuRegister(RCX), Immediate(1));
1704     __ jmp(&copy_loop);
1705 
1706     __ Bind(&copy_uncompressed);
1707   }
1708 
1709   __ leaq(CpuRegister(RSI),
1710           CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1711   // Compute the address of the destination buffer.
1712   __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1713   // Do the move.
1714   __ rep_movsw();
1715 
1716   __ Bind(&done);
1717 }
1718 
GenPeek(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1719 static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1720   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1721   CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
1722   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1723   // to avoid a SIGBUS.
1724   switch (size) {
1725     case DataType::Type::kInt8:
1726       __ movsxb(out, Address(address, 0));
1727       break;
1728     case DataType::Type::kInt16:
1729       __ movsxw(out, Address(address, 0));
1730       break;
1731     case DataType::Type::kInt32:
1732       __ movl(out, Address(address, 0));
1733       break;
1734     case DataType::Type::kInt64:
1735       __ movq(out, Address(address, 0));
1736       break;
1737     default:
1738       LOG(FATAL) << "Type not recognized for peek: " << size;
1739       UNREACHABLE();
1740   }
1741 }
1742 
VisitMemoryPeekByte(HInvoke * invoke)1743 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1744   CreateIntToIntLocations(allocator_, invoke);
1745 }
1746 
VisitMemoryPeekByte(HInvoke * invoke)1747 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1748   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1749 }
1750 
VisitMemoryPeekIntNative(HInvoke * invoke)1751 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1752   CreateIntToIntLocations(allocator_, invoke);
1753 }
1754 
VisitMemoryPeekIntNative(HInvoke * invoke)1755 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1756   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1757 }
1758 
VisitMemoryPeekLongNative(HInvoke * invoke)1759 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1760   CreateIntToIntLocations(allocator_, invoke);
1761 }
1762 
VisitMemoryPeekLongNative(HInvoke * invoke)1763 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1764   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1765 }
1766 
VisitMemoryPeekShortNative(HInvoke * invoke)1767 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1768   CreateIntToIntLocations(allocator_, invoke);
1769 }
1770 
VisitMemoryPeekShortNative(HInvoke * invoke)1771 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1772   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1773 }
1774 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)1775 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1776   LocationSummary* locations =
1777       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1778   locations->SetInAt(0, Location::RequiresRegister());
1779   locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
1780 }
1781 
GenPoke(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1782 static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1783   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1784   Location value = locations->InAt(1);
1785   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1786   // to avoid a SIGBUS.
1787   switch (size) {
1788     case DataType::Type::kInt8:
1789       if (value.IsConstant()) {
1790         __ movb(Address(address, 0),
1791                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1792       } else {
1793         __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1794       }
1795       break;
1796     case DataType::Type::kInt16:
1797       if (value.IsConstant()) {
1798         __ movw(Address(address, 0),
1799                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1800       } else {
1801         __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1802       }
1803       break;
1804     case DataType::Type::kInt32:
1805       if (value.IsConstant()) {
1806         __ movl(Address(address, 0),
1807                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1808       } else {
1809         __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1810       }
1811       break;
1812     case DataType::Type::kInt64:
1813       if (value.IsConstant()) {
1814         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1815         DCHECK(IsInt<32>(v));
1816         int32_t v_32 = v;
1817         __ movq(Address(address, 0), Immediate(v_32));
1818       } else {
1819         __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1820       }
1821       break;
1822     default:
1823       LOG(FATAL) << "Type not recognized for poke: " << size;
1824       UNREACHABLE();
1825   }
1826 }
1827 
VisitMemoryPokeByte(HInvoke * invoke)1828 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1829   CreateIntIntToVoidLocations(allocator_, invoke);
1830 }
1831 
VisitMemoryPokeByte(HInvoke * invoke)1832 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1833   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1834 }
1835 
VisitMemoryPokeIntNative(HInvoke * invoke)1836 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1837   CreateIntIntToVoidLocations(allocator_, invoke);
1838 }
1839 
VisitMemoryPokeIntNative(HInvoke * invoke)1840 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1841   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1842 }
1843 
VisitMemoryPokeLongNative(HInvoke * invoke)1844 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1845   CreateIntIntToVoidLocations(allocator_, invoke);
1846 }
1847 
VisitMemoryPokeLongNative(HInvoke * invoke)1848 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1849   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1850 }
1851 
VisitMemoryPokeShortNative(HInvoke * invoke)1852 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1853   CreateIntIntToVoidLocations(allocator_, invoke);
1854 }
1855 
VisitMemoryPokeShortNative(HInvoke * invoke)1856 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1857   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1858 }
1859 
VisitThreadCurrentThread(HInvoke * invoke)1860 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1861   LocationSummary* locations =
1862       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1863   locations->SetOut(Location::RequiresRegister());
1864 }
1865 
VisitThreadCurrentThread(HInvoke * invoke)1866 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1867   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1868   GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
1869                                                     /* no_rip= */ true));
1870 }
1871 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile ATTRIBUTE_UNUSED,CodeGeneratorX86_64 * codegen)1872 static void GenUnsafeGet(HInvoke* invoke,
1873                          DataType::Type type,
1874                          bool is_volatile ATTRIBUTE_UNUSED,
1875                          CodeGeneratorX86_64* codegen) {
1876   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1877   LocationSummary* locations = invoke->GetLocations();
1878   Location base_loc = locations->InAt(1);
1879   CpuRegister base = base_loc.AsRegister<CpuRegister>();
1880   Location offset_loc = locations->InAt(2);
1881   CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1882   Location output_loc = locations->Out();
1883   CpuRegister output = output_loc.AsRegister<CpuRegister>();
1884 
1885   switch (type) {
1886     case DataType::Type::kInt32:
1887       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1888       break;
1889 
1890     case DataType::Type::kReference: {
1891       if (gUseReadBarrier) {
1892         if (kUseBakerReadBarrier) {
1893           Address src(base, offset, ScaleFactor::TIMES_1, 0);
1894           codegen->GenerateReferenceLoadWithBakerReadBarrier(
1895               invoke, output_loc, base, src, /* needs_null_check= */ false);
1896         } else {
1897           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1898           codegen->GenerateReadBarrierSlow(
1899               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1900         }
1901       } else {
1902         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1903         __ MaybeUnpoisonHeapReference(output);
1904       }
1905       break;
1906     }
1907 
1908     case DataType::Type::kInt64:
1909       __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1910       break;
1911 
1912     default:
1913       LOG(FATAL) << "Unsupported op size " << type;
1914       UNREACHABLE();
1915   }
1916 }
1917 
UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic)1918 static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
1919   switch (intrinsic) {
1920     case Intrinsics::kUnsafeGetObject:
1921     case Intrinsics::kUnsafeGetObjectVolatile:
1922     case Intrinsics::kJdkUnsafeGetObject:
1923     case Intrinsics::kJdkUnsafeGetObjectVolatile:
1924     case Intrinsics::kJdkUnsafeGetObjectAcquire:
1925       return true;
1926     default:
1927       break;
1928   }
1929   return false;
1930 }
1931 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)1932 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1933   bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
1934   LocationSummary* locations =
1935       new (allocator) LocationSummary(invoke,
1936                                       can_call
1937                                           ? LocationSummary::kCallOnSlowPath
1938                                           : LocationSummary::kNoCall,
1939                                       kIntrinsified);
1940   if (can_call && kUseBakerReadBarrier) {
1941     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1942   }
1943   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1944   locations->SetInAt(1, Location::RequiresRegister());
1945   locations->SetInAt(2, Location::RequiresRegister());
1946   locations->SetOut(Location::RequiresRegister(),
1947                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1948 }
1949 
VisitUnsafeGet(HInvoke * invoke)1950 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1951   VisitJdkUnsafeGet(invoke);
1952 }
VisitUnsafeGetVolatile(HInvoke * invoke)1953 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1954   VisitJdkUnsafeGetVolatile(invoke);
1955 }
VisitUnsafeGetLong(HInvoke * invoke)1956 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1957   VisitJdkUnsafeGetLong(invoke);
1958 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1959 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1960   VisitJdkUnsafeGetLongVolatile(invoke);
1961 }
VisitUnsafeGetObject(HInvoke * invoke)1962 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1963   VisitJdkUnsafeGetObject(invoke);
1964 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1965 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1966   VisitJdkUnsafeGetObjectVolatile(invoke);
1967 }
1968 
VisitJdkUnsafeGet(HInvoke * invoke)1969 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
1970   CreateIntIntIntToIntLocations(allocator_, invoke);
1971 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1972 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1973   CreateIntIntIntToIntLocations(allocator_, invoke);
1974 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1975 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1976   CreateIntIntIntToIntLocations(allocator_, invoke);
1977 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1978 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1979   CreateIntIntIntToIntLocations(allocator_, invoke);
1980 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1981 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1982   CreateIntIntIntToIntLocations(allocator_, invoke);
1983 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1984 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1985   CreateIntIntIntToIntLocations(allocator_, invoke);
1986 }
VisitJdkUnsafeGetObject(HInvoke * invoke)1987 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetObject(HInvoke* invoke) {
1988   CreateIntIntIntToIntLocations(allocator_, invoke);
1989 }
VisitJdkUnsafeGetObjectVolatile(HInvoke * invoke)1990 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
1991   CreateIntIntIntToIntLocations(allocator_, invoke);
1992 }
VisitJdkUnsafeGetObjectAcquire(HInvoke * invoke)1993 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
1994   CreateIntIntIntToIntLocations(allocator_, invoke);
1995 }
1996 
1997 
VisitUnsafeGet(HInvoke * invoke)1998 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
1999   VisitJdkUnsafeGet(invoke);
2000 }
VisitUnsafeGetVolatile(HInvoke * invoke)2001 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2002   VisitJdkUnsafeGetVolatile(invoke);
2003 }
VisitUnsafeGetLong(HInvoke * invoke)2004 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2005   VisitJdkUnsafeGetLong(invoke);
2006 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2007 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2008   VisitJdkUnsafeGetLongVolatile(invoke);
2009 }
VisitUnsafeGetObject(HInvoke * invoke)2010 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2011   VisitJdkUnsafeGetObject(invoke);
2012 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2013 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2014   VisitJdkUnsafeGetObjectVolatile(invoke);
2015 }
2016 
VisitJdkUnsafeGet(HInvoke * invoke)2017 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
2018   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2019 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2020 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2021   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2022 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2023 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2024   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2025 }
VisitJdkUnsafeGetLong(HInvoke * invoke)2026 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2027   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2028 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2029 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2030   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2031 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2032 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2033   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2034 }
VisitJdkUnsafeGetObject(HInvoke * invoke)2035 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetObject(HInvoke* invoke) {
2036   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2037 }
VisitJdkUnsafeGetObjectVolatile(HInvoke * invoke)2038 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
2039   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2040 }
VisitJdkUnsafeGetObjectAcquire(HInvoke * invoke)2041 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
2042   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2043 }
2044 
2045 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2046 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
2047                                                        DataType::Type type,
2048                                                        HInvoke* invoke) {
2049   LocationSummary* locations =
2050       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2051   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2052   locations->SetInAt(1, Location::RequiresRegister());
2053   locations->SetInAt(2, Location::RequiresRegister());
2054   locations->SetInAt(3, Location::RequiresRegister());
2055   if (type == DataType::Type::kReference) {
2056     // Need temp registers for card-marking.
2057     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2058     locations->AddTemp(Location::RequiresRegister());
2059   }
2060 }
2061 
VisitUnsafePut(HInvoke * invoke)2062 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
2063   VisitJdkUnsafePut(invoke);
2064 }
VisitUnsafePutOrdered(HInvoke * invoke)2065 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2066   VisitJdkUnsafePutOrdered(invoke);
2067 }
VisitUnsafePutVolatile(HInvoke * invoke)2068 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2069   VisitJdkUnsafePutVolatile(invoke);
2070 }
VisitUnsafePutObject(HInvoke * invoke)2071 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2072   VisitJdkUnsafePutObject(invoke);
2073 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2074 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2075   VisitJdkUnsafePutObjectOrdered(invoke);
2076 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2077 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2078   VisitJdkUnsafePutObjectVolatile(invoke);
2079 }
VisitUnsafePutLong(HInvoke * invoke)2080 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2081   VisitJdkUnsafePutLong(invoke);
2082 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2083 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2084   VisitJdkUnsafePutLongOrdered(invoke);
2085 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2086 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2087   VisitJdkUnsafePutLongVolatile(invoke);
2088 }
2089 
VisitJdkUnsafePut(HInvoke * invoke)2090 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2091   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2092 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2093 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2094   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2095 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2096 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2097   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2098 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2099 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2100   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2101 }
VisitJdkUnsafePutObject(HInvoke * invoke)2102 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObject(HInvoke* invoke) {
2103   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2104 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2105 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2106   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2107 }
VisitJdkUnsafePutObjectVolatile(HInvoke * invoke)2108 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
2109   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2110 }
VisitJdkUnsafePutObjectRelease(HInvoke * invoke)2111 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
2112   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2113 }
VisitJdkUnsafePutLong(HInvoke * invoke)2114 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2115   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2116 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2117 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2118   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2119 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2120 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2121   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2122 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2123 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2124   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2125 }
2126 
2127 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2128 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2129 static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile,
2130                          CodeGeneratorX86_64* codegen) {
2131   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2132   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2133   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2134   CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2135 
2136   if (type == DataType::Type::kInt64) {
2137     __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2138   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2139     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2140     __ movl(temp, value);
2141     __ PoisonHeapReference(temp);
2142     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2143   } else {
2144     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2145   }
2146 
2147   if (is_volatile) {
2148     codegen->MemoryFence();
2149   }
2150 
2151   if (type == DataType::Type::kReference) {
2152     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2153     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2154                         locations->GetTemp(1).AsRegister<CpuRegister>(),
2155                         base,
2156                         value,
2157                         value_can_be_null);
2158   }
2159 }
2160 
VisitUnsafePut(HInvoke * invoke)2161 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
2162   VisitJdkUnsafePut(invoke);
2163 }
VisitUnsafePutOrdered(HInvoke * invoke)2164 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2165   VisitJdkUnsafePutOrdered(invoke);
2166 }
VisitUnsafePutVolatile(HInvoke * invoke)2167 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2168   VisitJdkUnsafePutVolatile(invoke);
2169 }
VisitUnsafePutObject(HInvoke * invoke)2170 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2171   VisitJdkUnsafePutObject(invoke);
2172 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2173 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2174   VisitJdkUnsafePutObjectOrdered(invoke);
2175 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2176 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2177   VisitJdkUnsafePutObjectVolatile(invoke);
2178 }
VisitUnsafePutLong(HInvoke * invoke)2179 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2180   VisitJdkUnsafePutLong(invoke);
2181 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2182 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2183   VisitJdkUnsafePutLongOrdered(invoke);
2184 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2185 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2186   VisitJdkUnsafePutLongVolatile(invoke);
2187 }
2188 
VisitJdkUnsafePut(HInvoke * invoke)2189 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2190   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2191 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2192 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2193   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2194 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2195 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2196   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2197 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2198 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2199   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
2200 }
VisitJdkUnsafePutObject(HInvoke * invoke)2201 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObject(HInvoke* invoke) {
2202   GenUnsafePut(
2203       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2204 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2205 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2206   GenUnsafePut(
2207       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2208 }
VisitJdkUnsafePutObjectVolatile(HInvoke * invoke)2209 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
2210   GenUnsafePut(
2211       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2212 }
VisitJdkUnsafePutObjectRelease(HInvoke * invoke)2213 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
2214   GenUnsafePut(
2215       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2216 }
VisitJdkUnsafePutLong(HInvoke * invoke)2217 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2218   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2219 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2220 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2221   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2222 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2223 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2224   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2225 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2226 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2227   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2228 }
2229 
CreateUnsafeCASLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2230 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
2231                                      DataType::Type type,
2232                                      HInvoke* invoke) {
2233   const bool can_call = gUseReadBarrier &&
2234                         kUseBakerReadBarrier &&
2235                         IsUnsafeCASObject(invoke);
2236   LocationSummary* locations =
2237       new (allocator) LocationSummary(invoke,
2238                                       can_call
2239                                           ? LocationSummary::kCallOnSlowPath
2240                                           : LocationSummary::kNoCall,
2241                                       kIntrinsified);
2242   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2243   locations->SetInAt(1, Location::RequiresRegister());
2244   locations->SetInAt(2, Location::RequiresRegister());
2245   // expected value must be in EAX/RAX.
2246   locations->SetInAt(3, Location::RegisterLocation(RAX));
2247   locations->SetInAt(4, Location::RequiresRegister());
2248 
2249   // RAX is clobbered in CMPXCHG, but we set it as out so no need to add it as temporary.
2250   locations->SetOut(Location::RegisterLocation(RAX));
2251 
2252   if (type == DataType::Type::kReference) {
2253     // Need two temporaries for MarkGCCard.
2254     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2255     locations->AddTemp(Location::RequiresRegister());
2256     if (gUseReadBarrier) {
2257       // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
2258       DCHECK(kUseBakerReadBarrier);
2259       locations->AddTemp(Location::RequiresRegister());
2260     }
2261   }
2262 }
2263 
VisitUnsafeCASInt(HInvoke * invoke)2264 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2265   VisitJdkUnsafeCASInt(invoke);
2266 }
2267 
VisitUnsafeCASLong(HInvoke * invoke)2268 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2269   VisitJdkUnsafeCASLong(invoke);
2270 }
2271 
VisitUnsafeCASObject(HInvoke * invoke)2272 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2273   VisitJdkUnsafeCASObject(invoke);
2274 }
2275 
VisitJdkUnsafeCASInt(HInvoke * invoke)2276 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2277   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2278   VisitJdkUnsafeCompareAndSetInt(invoke);
2279 }
2280 
VisitJdkUnsafeCASLong(HInvoke * invoke)2281 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2282   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2283   VisitJdkUnsafeCompareAndSetLong(invoke);
2284 }
2285 
VisitJdkUnsafeCASObject(HInvoke * invoke)2286 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2287   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2288   VisitJdkUnsafeCompareAndSetObject(invoke);
2289 }
2290 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2291 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2292   CreateUnsafeCASLocations(allocator_, DataType::Type::kInt32, invoke);
2293 }
2294 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2295 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2296   CreateUnsafeCASLocations(allocator_, DataType::Type::kInt64, invoke);
2297 }
2298 
VisitJdkUnsafeCompareAndSetObject(HInvoke * invoke)2299 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
2300   // The only supported read barrier implementation is the Baker-style read barriers.
2301   if (gUseReadBarrier && !kUseBakerReadBarrier) {
2302     return;
2303   }
2304 
2305   CreateUnsafeCASLocations(allocator_, DataType::Type::kReference, invoke);
2306 }
2307 
2308 // Convert ZF into the Boolean result.
GenZFlagToResult(X86_64Assembler * assembler,CpuRegister out)2309 static inline void GenZFlagToResult(X86_64Assembler* assembler, CpuRegister out) {
2310   __ setcc(kZero, out);
2311   __ movzxb(out, out);
2312 }
2313 
2314 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64 * codegen,DataType::Type type,Address field_addr,Location value,bool is_cmpxchg,bool byte_swap)2315 static void GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64* codegen,
2316                                           DataType::Type type,
2317                                           Address field_addr,
2318                                           Location value,
2319                                           bool is_cmpxchg,
2320                                           bool byte_swap) {
2321   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2322   InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2323 
2324   if (byte_swap) {
2325     instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2326     instr_codegen->Bswap(value, type);
2327   }
2328 
2329   switch (type) {
2330     case DataType::Type::kBool:
2331     case DataType::Type::kInt8:
2332       __ LockCmpxchgb(field_addr, value.AsRegister<CpuRegister>());
2333       break;
2334     case DataType::Type::kInt16:
2335     case DataType::Type::kUint16:
2336       __ LockCmpxchgw(field_addr, value.AsRegister<CpuRegister>());
2337       break;
2338     case DataType::Type::kInt32:
2339     case DataType::Type::kUint32:
2340       __ LockCmpxchgl(field_addr, value.AsRegister<CpuRegister>());
2341       break;
2342     case DataType::Type::kInt64:
2343     case DataType::Type::kUint64:
2344       __ LockCmpxchgq(field_addr, value.AsRegister<CpuRegister>());
2345       break;
2346     default:
2347       LOG(FATAL) << "Unexpected non-integral CAS type " << type;
2348   }
2349   // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2350 
2351   if (byte_swap) {
2352     // Restore byte order for value.
2353     instr_codegen->Bswap(value, type);
2354   }
2355 
2356   CpuRegister rax(RAX);
2357   if (is_cmpxchg) {
2358     if (byte_swap) {
2359       instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2360     }
2361     // Sign-extend or zero-extend the result as necessary.
2362     switch (type) {
2363       case DataType::Type::kBool:
2364         __ movzxb(rax, rax);
2365         break;
2366       case DataType::Type::kInt8:
2367         __ movsxb(rax, rax);
2368         break;
2369       case DataType::Type::kInt16:
2370         __ movsxw(rax, rax);
2371         break;
2372       case DataType::Type::kUint16:
2373         __ movzxw(rax, rax);
2374         break;
2375       default:
2376         break;  // No need to do anything.
2377     }
2378   } else {
2379     GenZFlagToResult(assembler, rax);
2380   }
2381 }
2382 
GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64 * codegen,Address field_addr,CpuRegister temp,Location value,Location expected,Location out,bool is64bit,bool is_cmpxchg,bool byte_swap)2383 static void GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64* codegen,
2384                                          Address field_addr,
2385                                          CpuRegister temp,
2386                                          Location value,
2387                                          Location expected,
2388                                          Location out,
2389                                          bool is64bit,
2390                                          bool is_cmpxchg,
2391                                          bool byte_swap) {
2392   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2393   InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2394 
2395   Location rax_loc = Location::RegisterLocation(RAX);
2396   Location temp_loc = Location::RegisterLocation(temp.AsRegister());
2397 
2398   DataType::Type type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
2399 
2400   // Copy `expected` to RAX (required by the CMPXCHG instruction).
2401   codegen->Move(rax_loc, expected);
2402 
2403   // Copy value to some other register (ensure it's not RAX).
2404   DCHECK_NE(temp.AsRegister(), RAX);
2405   codegen->Move(temp_loc, value);
2406 
2407   if (byte_swap) {
2408     instr_codegen->Bswap(rax_loc, type);
2409     instr_codegen->Bswap(temp_loc, type);
2410   }
2411 
2412   if (is64bit) {
2413     __ LockCmpxchgq(field_addr, temp);
2414   } else {
2415     __ LockCmpxchgl(field_addr, temp);
2416   }
2417   // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2418   // No need to restore byte order for temporary register.
2419 
2420   if (is_cmpxchg) {
2421     if (byte_swap) {
2422       instr_codegen->Bswap(rax_loc, type);
2423     }
2424     __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
2425   } else {
2426     GenZFlagToResult(assembler, out.AsRegister<CpuRegister>());
2427   }
2428 }
2429 
2430 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64 * codegen,HInvoke * invoke,CpuRegister base,CpuRegister offset,CpuRegister value,CpuRegister temp1,CpuRegister temp2,CpuRegister temp3,bool is_cmpxchg)2431 static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
2432                                           HInvoke* invoke,
2433                                           CpuRegister base,
2434                                           CpuRegister offset,
2435                                           CpuRegister value,
2436                                           CpuRegister temp1,
2437                                           CpuRegister temp2,
2438                                           CpuRegister temp3,
2439                                           bool is_cmpxchg) {
2440   // The only supported read barrier implementation is the Baker-style read barriers.
2441   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
2442 
2443   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2444 
2445   // Mark card for object assuming new value is stored.
2446   bool value_can_be_null = true;  // TODO: Worth finding out this information?
2447   codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
2448 
2449   Address field_addr(base, offset, TIMES_1, 0);
2450   if (gUseReadBarrier && kUseBakerReadBarrier) {
2451     // Need to make sure the reference stored in the field is a to-space
2452     // one before attempting the CAS or the CAS could fail incorrectly.
2453     codegen->GenerateReferenceLoadWithBakerReadBarrier(
2454         invoke,
2455         Location::RegisterLocation(temp3.AsRegister()),
2456         base,
2457         field_addr,
2458         /* needs_null_check= */ false,
2459         /* always_update_field= */ true,
2460         &temp1,
2461         &temp2);
2462   } else {
2463     // Nothing to do, the value will be loaded into the out register by CMPXCHG.
2464   }
2465 
2466   bool base_equals_value = (base.AsRegister() == value.AsRegister());
2467   Register value_reg = value.AsRegister();
2468   if (kPoisonHeapReferences) {
2469     if (base_equals_value) {
2470       // If `base` and `value` are the same register location, move `value_reg` to a temporary
2471       // register.  This way, poisoning `value_reg` won't invalidate `base`.
2472       value_reg = temp1.AsRegister();
2473       __ movl(CpuRegister(value_reg), base);
2474     }
2475 
2476     // Check that the register allocator did not assign the location of expected value (RAX) to
2477     // `value` nor to `base`, so that heap poisoning (when enabled) works as intended below.
2478     // - If `value` were equal to RAX, both references would be poisoned twice, meaning they would
2479     //   not be poisoned at all, as heap poisoning uses address negation.
2480     // - If `base` were equal to RAX, poisoning RAX would invalidate `base`.
2481     DCHECK_NE(RAX, value_reg);
2482     DCHECK_NE(RAX, base.AsRegister());
2483 
2484     __ PoisonHeapReference(CpuRegister(RAX));
2485     __ PoisonHeapReference(CpuRegister(value_reg));
2486   }
2487 
2488   __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
2489   // LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
2490 
2491   if (is_cmpxchg) {
2492     // Output is in RAX, so we can rely on CMPXCHG and do nothing.
2493     __ MaybeUnpoisonHeapReference(CpuRegister(RAX));
2494   } else {
2495     GenZFlagToResult(assembler, CpuRegister(RAX));
2496   }
2497 
2498   // If heap poisoning is enabled, we need to unpoison the values that were poisoned earlier.
2499   if (kPoisonHeapReferences) {
2500     if (base_equals_value) {
2501       // `value_reg` has been moved to a temporary register, no need to unpoison it.
2502     } else {
2503       // Ensure `value` is not RAX, so that unpoisoning the former does not invalidate the latter.
2504       DCHECK_NE(RAX, value_reg);
2505       __ UnpoisonHeapReference(CpuRegister(value_reg));
2506     }
2507   }
2508 }
2509 
2510 // In debug mode, return true if all registers are pairwise different. In release mode, do nothing
2511 // and always return true.
RegsAreAllDifferent(const std::vector<CpuRegister> & regs)2512 static bool RegsAreAllDifferent(const std::vector<CpuRegister>& regs) {
2513   if (kIsDebugBuild) {
2514     for (size_t i = 0; i < regs.size(); ++i) {
2515       for (size_t j = 0; j < i; ++j) {
2516         if (regs[i].AsRegister() == regs[j].AsRegister()) {
2517           return false;
2518         }
2519       }
2520     }
2521   }
2522   return true;
2523 }
2524 
2525 // GenCompareAndSetOrExchange handles all value types and therefore accepts generic locations and
2526 // temporary indices that may not correspond to real registers for code paths that do not use them.
GenCompareAndSetOrExchange(CodeGeneratorX86_64 * codegen,HInvoke * invoke,DataType::Type type,CpuRegister base,CpuRegister offset,uint32_t temp1_index,uint32_t temp2_index,uint32_t temp3_index,Location new_value,Location expected,Location out,bool is_cmpxchg,bool byte_swap)2527 static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen,
2528                                        HInvoke* invoke,
2529                                        DataType::Type type,
2530                                        CpuRegister base,
2531                                        CpuRegister offset,
2532                                        uint32_t temp1_index,
2533                                        uint32_t temp2_index,
2534                                        uint32_t temp3_index,
2535                                        Location new_value,
2536                                        Location expected,
2537                                        Location out,
2538                                        bool is_cmpxchg,
2539                                        bool byte_swap) {
2540   LocationSummary* locations = invoke->GetLocations();
2541   Address field_address(base, offset, TIMES_1, 0);
2542 
2543   if (DataType::IsFloatingPointType(type)) {
2544     bool is64bit = (type == DataType::Type::kFloat64);
2545     CpuRegister temp = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2546     DCHECK(RegsAreAllDifferent({base, offset, temp, CpuRegister(RAX)}));
2547 
2548     GenCompareAndSetOrExchangeFP(
2549         codegen, field_address, temp, new_value, expected, out, is64bit, is_cmpxchg, byte_swap);
2550   } else {
2551     // Both the expected value for CMPXCHG and the output are in RAX.
2552     DCHECK_EQ(RAX, expected.AsRegister<Register>());
2553     DCHECK_EQ(RAX, out.AsRegister<Register>());
2554 
2555     if (type == DataType::Type::kReference) {
2556       CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>();
2557       CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2558       CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>();
2559       CpuRegister temp3 = gUseReadBarrier
2560           ? locations->GetTemp(temp3_index).AsRegister<CpuRegister>()
2561           : CpuRegister(kNoRegister);
2562       DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3}));
2563 
2564       DCHECK(!byte_swap);
2565       GenCompareAndSetOrExchangeRef(
2566           codegen, invoke, base, offset, new_value_reg, temp1, temp2, temp3, is_cmpxchg);
2567     } else {
2568       GenCompareAndSetOrExchangeInt(codegen, type, field_address, new_value, is_cmpxchg, byte_swap);
2569     }
2570   }
2571 }
2572 
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2573 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2574   LocationSummary* locations = invoke->GetLocations();
2575   GenCompareAndSetOrExchange(codegen,
2576                              invoke,
2577                              type,
2578                              /*base=*/ locations->InAt(1).AsRegister<CpuRegister>(),
2579                              /*offset=*/ locations->InAt(2).AsRegister<CpuRegister>(),
2580                              /*temp1_index=*/ 0,
2581                              /*temp2_index=*/ 1,
2582                              /*temp3_index=*/ 2,
2583                              /*new_value=*/ locations->InAt(4),
2584                              /*expected=*/ locations->InAt(3),
2585                              locations->Out(),
2586                              /*is_cmpxchg=*/ false,
2587                              /*byte_swap=*/ false);
2588 }
2589 
VisitUnsafeCASInt(HInvoke * invoke)2590 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2591   VisitJdkUnsafeCASInt(invoke);
2592 }
2593 
VisitUnsafeCASLong(HInvoke * invoke)2594 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2595   VisitJdkUnsafeCASLong(invoke);
2596 }
2597 
VisitUnsafeCASObject(HInvoke * invoke)2598 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2599   VisitJdkUnsafeCASObject(invoke);
2600 }
2601 
VisitJdkUnsafeCASInt(HInvoke * invoke)2602 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2603   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2604   VisitJdkUnsafeCompareAndSetInt(invoke);
2605 }
2606 
VisitJdkUnsafeCASLong(HInvoke * invoke)2607 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2608   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2609   VisitJdkUnsafeCompareAndSetLong(invoke);
2610 }
2611 
VisitJdkUnsafeCASObject(HInvoke * invoke)2612 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2613   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2614   VisitJdkUnsafeCompareAndSetObject(invoke);
2615 }
2616 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2617 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2618   GenCAS(DataType::Type::kInt32, invoke, codegen_);
2619 }
2620 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2621 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2622   GenCAS(DataType::Type::kInt64, invoke, codegen_);
2623 }
2624 
VisitJdkUnsafeCompareAndSetObject(HInvoke * invoke)2625 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
2626   // The only supported read barrier implementation is the Baker-style read barriers.
2627   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
2628 
2629   GenCAS(DataType::Type::kReference, invoke, codegen_);
2630 }
2631 
VisitIntegerReverse(HInvoke * invoke)2632 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2633   LocationSummary* locations =
2634       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2635   locations->SetInAt(0, Location::RequiresRegister());
2636   locations->SetOut(Location::SameAsFirstInput());
2637   locations->AddTemp(Location::RequiresRegister());
2638 }
2639 
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2640 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2641                      X86_64Assembler* assembler) {
2642   Immediate imm_shift(shift);
2643   Immediate imm_mask(mask);
2644   __ movl(temp, reg);
2645   __ shrl(reg, imm_shift);
2646   __ andl(temp, imm_mask);
2647   __ andl(reg, imm_mask);
2648   __ shll(temp, imm_shift);
2649   __ orl(reg, temp);
2650 }
2651 
VisitIntegerReverse(HInvoke * invoke)2652 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2653   X86_64Assembler* assembler = GetAssembler();
2654   LocationSummary* locations = invoke->GetLocations();
2655 
2656   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2657   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2658 
2659   /*
2660    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2661    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2662    * compared to generic luni implementation which has 5 rounds of swapping bits.
2663    * x = bswap x
2664    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2665    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2666    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2667    */
2668   __ bswapl(reg);
2669   SwapBits(reg, temp, 1, 0x55555555, assembler);
2670   SwapBits(reg, temp, 2, 0x33333333, assembler);
2671   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2672 }
2673 
VisitLongReverse(HInvoke * invoke)2674 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2675   LocationSummary* locations =
2676       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2677   locations->SetInAt(0, Location::RequiresRegister());
2678   locations->SetOut(Location::SameAsFirstInput());
2679   locations->AddTemp(Location::RequiresRegister());
2680   locations->AddTemp(Location::RequiresRegister());
2681 }
2682 
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2683 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2684                        int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2685   Immediate imm_shift(shift);
2686   __ movq(temp_mask, Immediate(mask));
2687   __ movq(temp, reg);
2688   __ shrq(reg, imm_shift);
2689   __ andq(temp, temp_mask);
2690   __ andq(reg, temp_mask);
2691   __ shlq(temp, imm_shift);
2692   __ orq(reg, temp);
2693 }
2694 
VisitLongReverse(HInvoke * invoke)2695 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2696   X86_64Assembler* assembler = GetAssembler();
2697   LocationSummary* locations = invoke->GetLocations();
2698 
2699   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2700   CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2701   CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2702 
2703   /*
2704    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2705    * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2706    * compared to generic luni implementation which has 5 rounds of swapping bits.
2707    * x = bswap x
2708    * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2709    * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2710    * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2711    */
2712   __ bswapq(reg);
2713   SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2714   SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2715   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
2716 }
2717 
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen,HInvoke * invoke)2718 static void CreateBitCountLocations(
2719     ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
2720   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2721     // Do nothing if there is no popcnt support. This results in generating
2722     // a call for the intrinsic rather than direct code.
2723     return;
2724   }
2725   LocationSummary* locations =
2726       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2727   locations->SetInAt(0, Location::Any());
2728   locations->SetOut(Location::RequiresRegister());
2729 }
2730 
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2731 static void GenBitCount(X86_64Assembler* assembler,
2732                         CodeGeneratorX86_64* codegen,
2733                         HInvoke* invoke,
2734                         bool is_long) {
2735   LocationSummary* locations = invoke->GetLocations();
2736   Location src = locations->InAt(0);
2737   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2738 
2739   if (invoke->InputAt(0)->IsConstant()) {
2740     // Evaluate this at compile time.
2741     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2742     int32_t result = is_long
2743         ? POPCOUNT(static_cast<uint64_t>(value))
2744         : POPCOUNT(static_cast<uint32_t>(value));
2745     codegen->Load32BitValue(out, result);
2746     return;
2747   }
2748 
2749   if (src.IsRegister()) {
2750     if (is_long) {
2751       __ popcntq(out, src.AsRegister<CpuRegister>());
2752     } else {
2753       __ popcntl(out, src.AsRegister<CpuRegister>());
2754     }
2755   } else if (is_long) {
2756     DCHECK(src.IsDoubleStackSlot());
2757     __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2758   } else {
2759     DCHECK(src.IsStackSlot());
2760     __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2761   }
2762 }
2763 
VisitIntegerBitCount(HInvoke * invoke)2764 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2765   CreateBitCountLocations(allocator_, codegen_, invoke);
2766 }
2767 
VisitIntegerBitCount(HInvoke * invoke)2768 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2769   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2770 }
2771 
VisitLongBitCount(HInvoke * invoke)2772 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
2773   CreateBitCountLocations(allocator_, codegen_, invoke);
2774 }
2775 
VisitLongBitCount(HInvoke * invoke)2776 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
2777   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2778 }
2779 
CreateOneBitLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_high)2780 static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) {
2781   LocationSummary* locations =
2782       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2783   locations->SetInAt(0, Location::Any());
2784   locations->SetOut(Location::RequiresRegister());
2785   locations->AddTemp(is_high ? Location::RegisterLocation(RCX)  // needs CL
2786                              : Location::RequiresRegister());  // any will do
2787 }
2788 
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)2789 static void GenOneBit(X86_64Assembler* assembler,
2790                       CodeGeneratorX86_64* codegen,
2791                       HInvoke* invoke,
2792                       bool is_high, bool is_long) {
2793   LocationSummary* locations = invoke->GetLocations();
2794   Location src = locations->InAt(0);
2795   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2796 
2797   if (invoke->InputAt(0)->IsConstant()) {
2798     // Evaluate this at compile time.
2799     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2800     if (value == 0) {
2801       __ xorl(out, out);  // Clears upper bits too.
2802       return;
2803     }
2804     // Nonzero value.
2805     if (is_high) {
2806       value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
2807                       : 31 - CLZ(static_cast<uint32_t>(value));
2808     } else {
2809       value = is_long ? CTZ(static_cast<uint64_t>(value))
2810                       : CTZ(static_cast<uint32_t>(value));
2811     }
2812     if (is_long) {
2813       codegen->Load64BitValue(out, 1ULL << value);
2814     } else {
2815       codegen->Load32BitValue(out, 1 << value);
2816     }
2817     return;
2818   }
2819 
2820   // Handle the non-constant cases.
2821   if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() &&
2822       src.IsRegister()) {
2823       __ blsi(out, src.AsRegister<CpuRegister>());
2824   } else {
2825     CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
2826     if (is_high) {
2827       // Use architectural support: basically 1 << bsr.
2828       if (src.IsRegister()) {
2829         if (is_long) {
2830           __ bsrq(tmp, src.AsRegister<CpuRegister>());
2831         } else {
2832           __ bsrl(tmp, src.AsRegister<CpuRegister>());
2833         }
2834       } else if (is_long) {
2835         DCHECK(src.IsDoubleStackSlot());
2836         __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2837       } else {
2838         DCHECK(src.IsStackSlot());
2839         __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2840       }
2841       // BSR sets ZF if the input was zero.
2842       NearLabel is_zero, done;
2843       __ j(kEqual, &is_zero);
2844       __ movl(out, Immediate(1));  // Clears upper bits too.
2845       if (is_long) {
2846         __ shlq(out, tmp);
2847       } else {
2848         __ shll(out, tmp);
2849       }
2850       __ jmp(&done);
2851       __ Bind(&is_zero);
2852       __ xorl(out, out);  // Clears upper bits too.
2853       __ Bind(&done);
2854     } else  {
2855       // Copy input into temporary.
2856       if (src.IsRegister()) {
2857         if (is_long) {
2858           __ movq(tmp, src.AsRegister<CpuRegister>());
2859         } else {
2860           __ movl(tmp, src.AsRegister<CpuRegister>());
2861         }
2862       } else if (is_long) {
2863         DCHECK(src.IsDoubleStackSlot());
2864         __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2865       } else {
2866         DCHECK(src.IsStackSlot());
2867         __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2868       }
2869       // Do the bit twiddling: basically tmp & -tmp;
2870       if (is_long) {
2871         __ movq(out, tmp);
2872         __ negq(tmp);
2873         __ andq(out, tmp);
2874       } else {
2875         __ movl(out, tmp);
2876         __ negl(tmp);
2877         __ andl(out, tmp);
2878       }
2879     }
2880   }
2881 }
2882 
VisitIntegerHighestOneBit(HInvoke * invoke)2883 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2884   CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
2885 }
2886 
VisitIntegerHighestOneBit(HInvoke * invoke)2887 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2888   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false);
2889 }
2890 
VisitLongHighestOneBit(HInvoke * invoke)2891 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2892   CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
2893 }
2894 
VisitLongHighestOneBit(HInvoke * invoke)2895 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2896   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true);
2897 }
2898 
VisitIntegerLowestOneBit(HInvoke * invoke)2899 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2900   CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
2901 }
2902 
VisitIntegerLowestOneBit(HInvoke * invoke)2903 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2904   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false);
2905 }
2906 
VisitLongLowestOneBit(HInvoke * invoke)2907 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2908   CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
2909 }
2910 
VisitLongLowestOneBit(HInvoke * invoke)2911 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2912   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true);
2913 }
2914 
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)2915 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2916   LocationSummary* locations =
2917       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2918   locations->SetInAt(0, Location::Any());
2919   locations->SetOut(Location::RequiresRegister());
2920 }
2921 
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2922 static void GenLeadingZeros(X86_64Assembler* assembler,
2923                             CodeGeneratorX86_64* codegen,
2924                             HInvoke* invoke, bool is_long) {
2925   LocationSummary* locations = invoke->GetLocations();
2926   Location src = locations->InAt(0);
2927   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2928 
2929   int zero_value_result = is_long ? 64 : 32;
2930   if (invoke->InputAt(0)->IsConstant()) {
2931     // Evaluate this at compile time.
2932     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2933     if (value == 0) {
2934       value = zero_value_result;
2935     } else {
2936       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2937     }
2938     codegen->Load32BitValue(out, value);
2939     return;
2940   }
2941 
2942   // Handle the non-constant cases.
2943   if (src.IsRegister()) {
2944     if (is_long) {
2945       __ bsrq(out, src.AsRegister<CpuRegister>());
2946     } else {
2947       __ bsrl(out, src.AsRegister<CpuRegister>());
2948     }
2949   } else if (is_long) {
2950     DCHECK(src.IsDoubleStackSlot());
2951     __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2952   } else {
2953     DCHECK(src.IsStackSlot());
2954     __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2955   }
2956 
2957   // BSR sets ZF if the input was zero, and the output is undefined.
2958   NearLabel is_zero, done;
2959   __ j(kEqual, &is_zero);
2960 
2961   // Correct the result from BSR to get the CLZ result.
2962   __ xorl(out, Immediate(zero_value_result - 1));
2963   __ jmp(&done);
2964 
2965   // Fix the zero case with the expected result.
2966   __ Bind(&is_zero);
2967   __ movl(out, Immediate(zero_value_result));
2968 
2969   __ Bind(&done);
2970 }
2971 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2972 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2973   CreateLeadingZeroLocations(allocator_, invoke);
2974 }
2975 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2976 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2977   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2978 }
2979 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2980 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2981   CreateLeadingZeroLocations(allocator_, invoke);
2982 }
2983 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2984 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2985   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2986 }
2987 
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)2988 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2989   LocationSummary* locations =
2990       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2991   locations->SetInAt(0, Location::Any());
2992   locations->SetOut(Location::RequiresRegister());
2993 }
2994 
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2995 static void GenTrailingZeros(X86_64Assembler* assembler,
2996                              CodeGeneratorX86_64* codegen,
2997                              HInvoke* invoke, bool is_long) {
2998   LocationSummary* locations = invoke->GetLocations();
2999   Location src = locations->InAt(0);
3000   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3001 
3002   int zero_value_result = is_long ? 64 : 32;
3003   if (invoke->InputAt(0)->IsConstant()) {
3004     // Evaluate this at compile time.
3005     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3006     if (value == 0) {
3007       value = zero_value_result;
3008     } else {
3009       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
3010     }
3011     codegen->Load32BitValue(out, value);
3012     return;
3013   }
3014 
3015   // Handle the non-constant cases.
3016   if (src.IsRegister()) {
3017     if (is_long) {
3018       __ bsfq(out, src.AsRegister<CpuRegister>());
3019     } else {
3020       __ bsfl(out, src.AsRegister<CpuRegister>());
3021     }
3022   } else if (is_long) {
3023     DCHECK(src.IsDoubleStackSlot());
3024     __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3025   } else {
3026     DCHECK(src.IsStackSlot());
3027     __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3028   }
3029 
3030   // BSF sets ZF if the input was zero, and the output is undefined.
3031   NearLabel done;
3032   __ j(kNotEqual, &done);
3033 
3034   // Fix the zero case with the expected result.
3035   __ movl(out, Immediate(zero_value_result));
3036 
3037   __ Bind(&done);
3038 }
3039 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3040 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3041   CreateTrailingZeroLocations(allocator_, invoke);
3042 }
3043 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3044 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3045   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3046 }
3047 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3048 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3049   CreateTrailingZeroLocations(allocator_, invoke);
3050 }
3051 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3052 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3053   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3054 }
3055 
VisitIntegerValueOf(HInvoke * invoke)3056 void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
3057   InvokeRuntimeCallingConvention calling_convention;
3058   IntrinsicVisitor::ComputeIntegerValueOfLocations(
3059       invoke,
3060       codegen_,
3061       Location::RegisterLocation(RAX),
3062       Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3063 }
3064 
VisitIntegerValueOf(HInvoke * invoke)3065 void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
3066   IntrinsicVisitor::IntegerValueOfInfo info =
3067       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
3068   LocationSummary* locations = invoke->GetLocations();
3069   X86_64Assembler* assembler = GetAssembler();
3070 
3071   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3072   InvokeRuntimeCallingConvention calling_convention;
3073   CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
3074   auto allocate_instance = [&]() {
3075     codegen_->LoadIntrinsicDeclaringClass(argument, invoke);
3076     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3077     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3078   };
3079   if (invoke->InputAt(0)->IsIntConstant()) {
3080     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3081     if (static_cast<uint32_t>(value - info.low) < info.length) {
3082       // Just embed the j.l.Integer in the code.
3083       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
3084       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
3085     } else {
3086       DCHECK(locations->CanCall());
3087       // Allocate and initialize a new j.l.Integer.
3088       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3089       // JIT object table.
3090       allocate_instance();
3091       __ movl(Address(out, info.value_offset), Immediate(value));
3092     }
3093   } else {
3094     DCHECK(locations->CanCall());
3095     CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
3096     // Check bounds of our cache.
3097     __ leal(out, Address(in, -info.low));
3098     __ cmpl(out, Immediate(info.length));
3099     NearLabel allocate, done;
3100     __ j(kAboveEqual, &allocate);
3101     // If the value is within the bounds, load the j.l.Integer directly from the array.
3102     DCHECK_NE(out.AsRegister(), argument.AsRegister());
3103     codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference);
3104     static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3105                   "Check heap reference size.");
3106     __ movl(out, Address(argument, out, TIMES_4, 0));
3107     __ MaybeUnpoisonHeapReference(out);
3108     __ jmp(&done);
3109     __ Bind(&allocate);
3110     // Otherwise allocate and initialize a new j.l.Integer.
3111     allocate_instance();
3112     __ movl(Address(out, info.value_offset), in);
3113     __ Bind(&done);
3114   }
3115 }
3116 
VisitReferenceGetReferent(HInvoke * invoke)3117 void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3118   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3119 }
3120 
VisitReferenceGetReferent(HInvoke * invoke)3121 void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3122   X86_64Assembler* assembler = GetAssembler();
3123   LocationSummary* locations = invoke->GetLocations();
3124 
3125   Location obj = locations->InAt(0);
3126   Location out = locations->Out();
3127 
3128   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
3129   codegen_->AddSlowPath(slow_path);
3130 
3131   if (gUseReadBarrier) {
3132     // Check self->GetWeakRefAccessEnabled().
3133     ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
3134     __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true),
3135                   Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3136     __ j(kNotEqual, slow_path->GetEntryLabel());
3137   }
3138 
3139   // Load the java.lang.ref.Reference class, use the output register as a temporary.
3140   codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<CpuRegister>(), invoke);
3141 
3142   // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3143   MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3144   DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3145   DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3146             IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3147   __ cmpw(Address(out.AsRegister<CpuRegister>(), disable_intrinsic_offset.Uint32Value()),
3148           Immediate(0));
3149   __ j(kNotEqual, slow_path->GetEntryLabel());
3150 
3151   // Load the value from the field.
3152   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3153   if (gUseReadBarrier && kUseBakerReadBarrier) {
3154     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3155                                                     out,
3156                                                     obj.AsRegister<CpuRegister>(),
3157                                                     referent_offset,
3158                                                     /*needs_null_check=*/ true);
3159     // Note that the fence is a no-op, thanks to the x86-64 memory model.
3160     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3161   } else {
3162     __ movl(out.AsRegister<CpuRegister>(), Address(obj.AsRegister<CpuRegister>(), referent_offset));
3163     codegen_->MaybeRecordImplicitNullCheck(invoke);
3164     // Note that the fence is a no-op, thanks to the x86-64 memory model.
3165     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3166     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3167   }
3168   __ Bind(slow_path->GetExitLabel());
3169 }
3170 
VisitReferenceRefersTo(HInvoke * invoke)3171 void IntrinsicLocationsBuilderX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3172   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
3173 }
3174 
VisitReferenceRefersTo(HInvoke * invoke)3175 void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3176   X86_64Assembler* assembler = GetAssembler();
3177   LocationSummary* locations = invoke->GetLocations();
3178 
3179   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
3180   CpuRegister other = locations->InAt(1).AsRegister<CpuRegister>();
3181   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3182 
3183   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3184   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3185 
3186   __ movl(out, Address(obj, referent_offset));
3187   codegen_->MaybeRecordImplicitNullCheck(invoke);
3188   __ MaybeUnpoisonHeapReference(out);
3189   // Note that the fence is a no-op, thanks to the x86-64 memory model.
3190   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3191 
3192   __ cmpl(out, other);
3193 
3194   if (gUseReadBarrier) {
3195     DCHECK(kUseBakerReadBarrier);
3196 
3197     NearLabel calculate_result;
3198     __ j(kEqual, &calculate_result);  // ZF set if taken.
3199 
3200     // Check if the loaded reference is null in a way that leaves ZF clear for null.
3201     __ cmpl(out, Immediate(1));
3202     __ j(kBelow, &calculate_result);  // ZF clear if taken.
3203 
3204     // For correct memory visibility, we need a barrier before loading the lock word
3205     // but we already have the barrier emitted for volatile load above which is sufficient.
3206 
3207     // Load the lockword and check if it is a forwarding address.
3208     static_assert(LockWord::kStateShift == 30u);
3209     static_assert(LockWord::kStateForwardingAddress == 3u);
3210     __ movl(out, Address(out, monitor_offset));
3211     __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3212     __ j(kBelow, &calculate_result);   // ZF clear if taken.
3213 
3214     // Extract the forwarding address and compare with `other`.
3215     __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3216     __ cmpl(out, other);
3217 
3218     __ Bind(&calculate_result);
3219   }
3220 
3221   // Convert ZF into the Boolean result.
3222   __ setcc(kEqual, out);
3223   __ movzxb(out, out);
3224 }
3225 
VisitThreadInterrupted(HInvoke * invoke)3226 void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3227   LocationSummary* locations =
3228       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3229   locations->SetOut(Location::RequiresRegister());
3230 }
3231 
VisitThreadInterrupted(HInvoke * invoke)3232 void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3233   X86_64Assembler* assembler = GetAssembler();
3234   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
3235   Address address = Address::Absolute
3236       (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true);
3237   NearLabel done;
3238   __ gs()->movl(out, address);
3239   __ testl(out, out);
3240   __ j(kEqual, &done);
3241   __ gs()->movl(address, Immediate(0));
3242   codegen_->MemoryFence();
3243   __ Bind(&done);
3244 }
3245 
VisitReachabilityFence(HInvoke * invoke)3246 void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
3247   LocationSummary* locations =
3248       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3249   locations->SetInAt(0, Location::Any());
3250 }
3251 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3252 void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3253 
CreateDivideUnsignedLocations(HInvoke * invoke,ArenaAllocator * allocator)3254 static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) {
3255   LocationSummary* locations =
3256       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3257   locations->SetInAt(0, Location::RegisterLocation(RAX));
3258   locations->SetInAt(1, Location::RequiresRegister());
3259   locations->SetOut(Location::SameAsFirstInput());
3260   // Intel uses edx:eax as the dividend.
3261   locations->AddTemp(Location::RegisterLocation(RDX));
3262 }
3263 
GenerateDivideUnsigned(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type data_type)3264 static void GenerateDivideUnsigned(HInvoke* invoke,
3265                                    CodeGeneratorX86_64* codegen,
3266                                    DataType::Type data_type) {
3267   LocationSummary* locations = invoke->GetLocations();
3268   Location out = locations->Out();
3269   Location first = locations->InAt(0);
3270   Location second = locations->InAt(1);
3271   CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3272   CpuRegister second_reg = second.AsRegister<CpuRegister>();
3273 
3274   DCHECK_EQ(RAX, first.AsRegister<Register>());
3275   DCHECK_EQ(RAX, out.AsRegister<Register>());
3276   DCHECK_EQ(RDX, rdx.AsRegister());
3277 
3278   // We check if the divisor is zero and bail to the slow path to handle if so.
3279   auto* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
3280   codegen->AddSlowPath(slow_path);
3281 
3282   X86_64Assembler* assembler = codegen->GetAssembler();
3283   if (data_type == DataType::Type::kInt32) {
3284     __ testl(second_reg, second_reg);
3285     __ j(kEqual, slow_path->GetEntryLabel());
3286     __ xorl(rdx, rdx);
3287     __ divl(second_reg);
3288   } else {
3289     DCHECK(data_type == DataType::Type::kInt64);
3290     __ testq(second_reg, second_reg);
3291     __ j(kEqual, slow_path->GetEntryLabel());
3292     __ xorq(rdx, rdx);
3293     __ divq(second_reg);
3294   }
3295   __ Bind(slow_path->GetExitLabel());
3296 }
3297 
VisitIntegerDivideUnsigned(HInvoke * invoke)3298 void IntrinsicLocationsBuilderX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3299   CreateDivideUnsignedLocations(invoke, allocator_);
3300 }
3301 
VisitIntegerDivideUnsigned(HInvoke * invoke)3302 void IntrinsicCodeGeneratorX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3303   GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt32);
3304 }
3305 
VisitLongDivideUnsigned(HInvoke * invoke)3306 void IntrinsicLocationsBuilderX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3307   CreateDivideUnsignedLocations(invoke, allocator_);
3308 }
3309 
VisitLongDivideUnsigned(HInvoke * invoke)3310 void IntrinsicCodeGeneratorX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3311   GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt64);
3312 }
3313 
VisitMathMultiplyHigh(HInvoke * invoke)3314 void IntrinsicLocationsBuilderX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3315   LocationSummary* locations =
3316       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3317   locations->SetInAt(0, Location::RegisterLocation(RAX));
3318   locations->SetInAt(1, Location::RequiresRegister());
3319   locations->SetOut(Location::RegisterLocation(RDX));
3320   locations->AddTemp(Location::RegisterLocation(RAX));
3321 }
3322 
VisitMathMultiplyHigh(HInvoke * invoke)3323 void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3324   X86_64Assembler* assembler = GetAssembler();
3325   LocationSummary* locations = invoke->GetLocations();
3326 
3327   CpuRegister y = locations->InAt(1).AsRegister<CpuRegister>();
3328 
3329   DCHECK_EQ(locations->InAt(0).AsRegister<Register>(), RAX);
3330   DCHECK_EQ(locations->Out().AsRegister<Register>(), RDX);
3331 
3332   __ imulq(y);
3333 }
3334 
3335 enum class GetAndUpdateOp {
3336   kSet,
3337   kAdd,
3338   kBitwiseAnd,
3339   kBitwiseOr,
3340   kBitwiseXor
3341 };
3342 
3343 class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
3344  public:
VarHandleSlowPathX86_64(HInvoke * invoke)3345   explicit VarHandleSlowPathX86_64(HInvoke* invoke)
3346       : IntrinsicSlowPathX86_64(invoke) {
3347   }
3348 
SetVolatile(bool is_volatile)3349   void SetVolatile(bool is_volatile) {
3350     is_volatile_ = is_volatile;
3351   }
3352 
SetAtomic(bool is_atomic)3353   void SetAtomic(bool is_atomic) {
3354     is_atomic_ = is_atomic;
3355   }
3356 
SetNeedAnyStoreBarrier(bool need_any_store_barrier)3357   void SetNeedAnyStoreBarrier(bool need_any_store_barrier) {
3358     need_any_store_barrier_ = need_any_store_barrier;
3359   }
3360 
SetNeedAnyAnyBarrier(bool need_any_any_barrier)3361   void SetNeedAnyAnyBarrier(bool need_any_any_barrier) {
3362     need_any_any_barrier_ = need_any_any_barrier;
3363   }
3364 
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3365   void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3366     get_and_update_op_ = get_and_update_op;
3367   }
3368 
GetByteArrayViewCheckLabel()3369   Label* GetByteArrayViewCheckLabel() {
3370     return &byte_array_view_check_label_;
3371   }
3372 
GetNativeByteOrderLabel()3373   Label* GetNativeByteOrderLabel() {
3374     return &native_byte_order_label_;
3375   }
3376 
EmitNativeCode(CodeGenerator * codegen)3377   void EmitNativeCode(CodeGenerator* codegen) override {
3378     if (GetByteArrayViewCheckLabel()->IsLinked()) {
3379       EmitByteArrayViewCode(down_cast<CodeGeneratorX86_64*>(codegen));
3380     }
3381     IntrinsicSlowPathX86_64::EmitNativeCode(codegen);
3382   }
3383 
3384  private:
GetInvoke() const3385   HInvoke* GetInvoke() const {
3386     return GetInstruction()->AsInvoke();
3387   }
3388 
GetAccessModeTemplate() const3389   mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3390     return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3391   }
3392 
3393   void EmitByteArrayViewCode(CodeGeneratorX86_64* codegen);
3394 
3395   Label byte_array_view_check_label_;
3396   Label native_byte_order_label_;
3397 
3398   // Arguments forwarded to specific methods.
3399   bool is_volatile_;
3400   bool is_atomic_;
3401   bool need_any_store_barrier_;
3402   bool need_any_any_barrier_;
3403   GetAndUpdateOp get_and_update_op_;
3404 };
3405 
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3406 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3407   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
3408   X86_64Assembler* assembler = codegen->GetAssembler();
3409   LocationSummary* locations = invoke->GetLocations();
3410   DCHECK(locations->InAt(0).Equals(locations->Out()));
3411   XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
3412   XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
3413   XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
3414   if (invoke->GetType() == DataType::Type::kFloat32) {
3415     __ vfmadd213ss(left, right, accumulator);
3416   } else {
3417     DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
3418     __ vfmadd213sd(left, right, accumulator);
3419   }
3420 }
3421 
VisitMathFmaDouble(HInvoke * invoke)3422 void IntrinsicCodeGeneratorX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3423   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3424   GenerateMathFma(invoke, codegen_);
3425 }
3426 
VisitMathFmaDouble(HInvoke * invoke)3427 void IntrinsicLocationsBuilderX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3428   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3429     CreateFPFPFPToFPCallLocations(allocator_, invoke);
3430   }
3431 }
3432 
VisitMathFmaFloat(HInvoke * invoke)3433 void IntrinsicCodeGeneratorX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3434   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3435   GenerateMathFma(invoke, codegen_);
3436 }
3437 
VisitMathFmaFloat(HInvoke * invoke)3438 void IntrinsicLocationsBuilderX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3439   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3440     CreateFPFPFPToFPCallLocations(allocator_, invoke);
3441   }
3442 }
3443 
3444 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path,CpuRegister object,CpuRegister temp,Address type_address,bool object_can_be_null=true)3445 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64* codegen,
3446                                                     VarHandleSlowPathX86_64* slow_path,
3447                                                     CpuRegister object,
3448                                                     CpuRegister temp,
3449                                                     Address type_address,
3450                                                     bool object_can_be_null = true) {
3451   X86_64Assembler* assembler = codegen->GetAssembler();
3452 
3453   const MemberOffset class_offset = mirror::Object::ClassOffset();
3454   const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3455 
3456   NearLabel check_type_compatibility, type_matched;
3457 
3458   // If the object is null, there is no need to check the type
3459   if (object_can_be_null) {
3460     __ testl(object, object);
3461     __ j(kZero, &type_matched);
3462   }
3463 
3464   // Do not unpoison for in-memory comparison.
3465   // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3466   __ movl(temp, Address(object, class_offset));
3467   __ Bind(&check_type_compatibility);
3468   __ cmpl(temp, type_address);
3469   __ j(kEqual, &type_matched);
3470   // Load the super class.
3471   __ MaybeUnpoisonHeapReference(temp);
3472   __ movl(temp, Address(temp, super_class_offset));
3473   // If the super class is null, we reached the root of the hierarchy without a match.
3474   // We let the slow path handle uncovered cases (e.g. interfaces).
3475   __ testl(temp, temp);
3476   __ j(kEqual, slow_path->GetEntryLabel());
3477   __ jmp(&check_type_compatibility);
3478   __ Bind(&type_matched);
3479 }
3480 
3481 // Check access mode and the primitive type from VarHandle.varType.
3482 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3483 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path,DataType::Type type)3484 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3485                                                         CodeGeneratorX86_64* codegen,
3486                                                         VarHandleSlowPathX86_64* slow_path,
3487                                                         DataType::Type type) {
3488   X86_64Assembler* assembler = codegen->GetAssembler();
3489 
3490   LocationSummary* locations = invoke->GetLocations();
3491   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3492   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3493 
3494   mirror::VarHandle::AccessMode access_mode =
3495       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3496   Primitive::Type primitive_type = DataTypeToPrimitive(type);
3497 
3498   const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3499   const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3500   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3501 
3502   // Check that the operation is permitted.
3503   __ testl(Address(varhandle, access_mode_bit_mask_offset),
3504            Immediate(1u << static_cast<uint32_t>(access_mode)));
3505   __ j(kZero, slow_path->GetEntryLabel());
3506 
3507   // For primitive types, we do not need a read barrier when loading a reference only for loading
3508   // constant field through the reference. For reference types, we deliberately avoid the read
3509   // barrier, letting the slow path handle the false negatives.
3510   __ movl(temp, Address(varhandle, var_type_offset));
3511   __ MaybeUnpoisonHeapReference(temp);
3512 
3513   // Check check the varType.primitiveType field against the type we're trying to retrieve.
3514   __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3515   __ j(kNotEqual, slow_path->GetEntryLabel());
3516 
3517   if (type == DataType::Type::kReference) {
3518     // Check reference arguments against the varType.
3519     // False negatives due to varType being an interface or array type
3520     // or due to the missing read barrier are handled by the slow path.
3521     size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3522     uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3523     uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3524     for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3525       HInstruction* arg = invoke->InputAt(arg_index);
3526       DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3527       if (!arg->IsNullConstant()) {
3528         CpuRegister arg_reg = invoke->GetLocations()->InAt(arg_index).AsRegister<CpuRegister>();
3529         Address type_addr(varhandle, var_type_offset);
3530         GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp, type_addr);
3531       }
3532     }
3533   }
3534 }
3535 
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3536 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3537                                               CodeGeneratorX86_64* codegen,
3538                                               VarHandleSlowPathX86_64* slow_path) {
3539   X86_64Assembler* assembler = codegen->GetAssembler();
3540 
3541   LocationSummary* locations = invoke->GetLocations();
3542   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3543 
3544   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3545 
3546   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3547   // Do not emit read barrier (or unpoison the reference) for comparing to null.
3548   __ cmpl(Address(varhandle, coordinate_type0_offset), Immediate(0));
3549   __ j(kNotEqual, slow_path->GetEntryLabel());
3550 }
3551 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3552 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3553                                                  CodeGeneratorX86_64* codegen,
3554                                                  VarHandleSlowPathX86_64* slow_path) {
3555   VarHandleOptimizations optimizations(invoke);
3556   X86_64Assembler* assembler = codegen->GetAssembler();
3557 
3558   LocationSummary* locations = invoke->GetLocations();
3559   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3560   CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3561   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3562 
3563   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3564   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3565 
3566   // Null-check the object.
3567   if (!optimizations.GetSkipObjectNullCheck()) {
3568     __ testl(object, object);
3569     __ j(kZero, slow_path->GetEntryLabel());
3570   }
3571 
3572   if (!optimizations.GetUseKnownBootImageVarHandle()) {
3573     // Check that the VarHandle references an instance field by checking that
3574     // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3575     // type compatibility check with the source object's type, which will fail for null.
3576     __ cmpl(Address(varhandle, coordinate_type1_offset), Immediate(0));
3577     __ j(kNotEqual, slow_path->GetEntryLabel());
3578 
3579     // Check that the object has the correct type.
3580     // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3581     GenerateSubTypeObjectCheckNoReadBarrier(codegen,
3582                                             slow_path,
3583                                             object,
3584                                             temp,
3585                                             Address(varhandle, coordinate_type0_offset),
3586                                             /*object_can_be_null=*/ false);
3587   }
3588 }
3589 
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3590 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3591                                          CodeGeneratorX86_64* codegen,
3592                                          VarHandleSlowPathX86_64* slow_path) {
3593   VarHandleOptimizations optimizations(invoke);
3594   X86_64Assembler* assembler = codegen->GetAssembler();
3595   LocationSummary* locations = invoke->GetLocations();
3596 
3597   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3598   CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3599   CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
3600   DataType::Type value_type =
3601       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3602   Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3603 
3604   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3605   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3606   const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3607   const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3608   const MemberOffset class_offset = mirror::Object::ClassOffset();
3609   const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3610 
3611   // Null-check the object.
3612   if (!optimizations.GetSkipObjectNullCheck()) {
3613     __ testl(object, object);
3614     __ j(kZero, slow_path->GetEntryLabel());
3615   }
3616 
3617   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3618 
3619   // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3620   // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3621   // coordinateType0 shall not be null but we do not explicitly verify that.
3622   // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3623   __ cmpl(Address(varhandle, coordinate_type1_offset.Int32Value()), Immediate(0));
3624   __ j(kEqual, slow_path->GetEntryLabel());
3625 
3626   // Check object class against componentType0.
3627   //
3628   // This is an exact check and we defer other cases to the runtime. This includes
3629   // conversion to array of superclass references, which is valid but subsequently
3630   // requires all update operations to check that the value can indeed be stored.
3631   // We do not want to perform such extra checks in the intrinsified code.
3632   //
3633   // We do this check without read barrier, so there can be false negatives which we
3634   // defer to the slow path. There shall be no false negatives for array classes in the
3635   // boot image (including Object[] and primitive arrays) because they are non-movable.
3636   __ movl(temp, Address(object, class_offset.Int32Value()));
3637   __ cmpl(temp, Address(varhandle, coordinate_type0_offset.Int32Value()));
3638   __ j(kNotEqual, slow_path->GetEntryLabel());
3639 
3640   // Check that the coordinateType0 is an array type. We do not need a read barrier
3641   // for loading constant reference fields (or chains of them) for comparison with null,
3642   // nor for finally loading a constant primitive field (primitive type) below.
3643   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3644   __ movl(temp, Address(temp, component_type_offset.Int32Value()));
3645   codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3646   __ testl(temp, temp);
3647   __ j(kZero, slow_path->GetEntryLabel());
3648 
3649   // Check that the array component type matches the primitive type.
3650   Label* slow_path_label;
3651   if (primitive_type == Primitive::kPrimNot) {
3652     slow_path_label = slow_path->GetEntryLabel();
3653   } else {
3654     // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
3655     // we shall check for a byte array view in the slow path.
3656     // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
3657     // so we cannot emit that if we're JITting without boot image.
3658     bool boot_image_available =
3659         codegen->GetCompilerOptions().IsBootImage() ||
3660         !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
3661     bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
3662     slow_path_label =
3663         can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
3664   }
3665   __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3666   __ j(kNotEqual, slow_path_label);
3667 
3668   // Check for array index out of bounds.
3669   __ cmpl(index, Address(object, array_length_offset.Int32Value()));
3670   __ j(kAboveEqual, slow_path->GetEntryLabel());
3671 }
3672 
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3673 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
3674                                               CodeGeneratorX86_64* codegen,
3675                                               VarHandleSlowPathX86_64* slow_path) {
3676   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3677   if (expected_coordinates_count == 0u) {
3678     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
3679   } else if (expected_coordinates_count == 1u) {
3680     GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
3681   } else {
3682     DCHECK_EQ(expected_coordinates_count, 2u);
3683     GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
3684   }
3685 }
3686 
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)3687 static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
3688                                                         CodeGeneratorX86_64* codegen,
3689                                                         DataType::Type type) {
3690   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3691   VarHandleOptimizations optimizations(invoke);
3692   if (optimizations.GetUseKnownBootImageVarHandle()) {
3693     DCHECK_NE(expected_coordinates_count, 2u);
3694     if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
3695       return nullptr;
3696     }
3697   }
3698 
3699   VarHandleSlowPathX86_64* slow_path =
3700       new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
3701   codegen->AddSlowPath(slow_path);
3702 
3703   if (!optimizations.GetUseKnownBootImageVarHandle()) {
3704     GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
3705   }
3706   GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
3707 
3708   return slow_path;
3709 }
3710 
3711 struct VarHandleTarget {
3712   Register object;  // The object holding the value to operate on.
3713   Register offset;  // The offset of the value to operate on.
3714 };
3715 
GetVarHandleTarget(HInvoke * invoke)3716 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
3717   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3718   LocationSummary* locations = invoke->GetLocations();
3719 
3720   VarHandleTarget target;
3721   // The temporary allocated for loading the offset.
3722   target.offset = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
3723   // The reference to the object that holds the value to operate on.
3724   target.object = (expected_coordinates_count == 0u)
3725       ? locations->GetTemp(1).AsRegister<CpuRegister>().AsRegister()
3726       : locations->InAt(1).AsRegister<CpuRegister>().AsRegister();
3727   return target;
3728 }
3729 
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorX86_64 * codegen)3730 static void GenerateVarHandleTarget(HInvoke* invoke,
3731                                     const VarHandleTarget& target,
3732                                     CodeGeneratorX86_64* codegen) {
3733   LocationSummary* locations = invoke->GetLocations();
3734   X86_64Assembler* assembler = codegen->GetAssembler();
3735   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3736 
3737   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3738 
3739   if (expected_coordinates_count <= 1u) {
3740     if (VarHandleOptimizations(invoke).GetUseKnownBootImageVarHandle()) {
3741       ScopedObjectAccess soa(Thread::Current());
3742       ArtField* target_field = GetBootImageVarHandleField(invoke);
3743       if (expected_coordinates_count == 0u) {
3744         ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
3745         __ movl(CpuRegister(target.object),
3746                 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /*no_rip=*/ false));
3747         if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
3748           codegen->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(declaring_class));
3749         } else {
3750           codegen->RecordBootImageTypePatch(declaring_class->GetDexFile(),
3751                                             declaring_class->GetDexTypeIndex());
3752         }
3753       }
3754       __ movl(CpuRegister(target.offset), Immediate(target_field->GetOffset().Uint32Value()));
3755     } else {
3756       // For static fields, we need to fill the `target.object` with the declaring class,
3757       // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
3758       // we do not need the declaring class, so we can forget the `ArtMethod*` when
3759       // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
3760       CpuRegister method((expected_coordinates_count == 0) ? target.object : target.offset);
3761 
3762       const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
3763       const MemberOffset offset_offset = ArtField::OffsetOffset();
3764 
3765       // Load the ArtField, the offset and, if needed, declaring class.
3766       __ movq(method, Address(varhandle, art_field_offset));
3767       __ movl(CpuRegister(target.offset), Address(method, offset_offset));
3768       if (expected_coordinates_count == 0u) {
3769         InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
3770         instr_codegen->GenerateGcRootFieldLoad(invoke,
3771                                                Location::RegisterLocation(target.object),
3772                                                Address(method, ArtField::DeclaringClassOffset()),
3773                                                /*fixup_label=*/ nullptr,
3774                                                gCompilerReadBarrierOption);
3775       }
3776     }
3777   } else {
3778     DCHECK_EQ(expected_coordinates_count, 2u);
3779 
3780     DataType::Type value_type =
3781         GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3782     ScaleFactor scale = CodeGenerator::ScaleFactorForType(value_type);
3783     MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
3784     CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
3785 
3786     // The effect of LEA is `target.offset = index * scale + data_offset`.
3787     __ leal(CpuRegister(target.offset), Address(index, scale, data_offset.Int32Value()));
3788   }
3789 }
3790 
HasVarHandleIntrinsicImplementation(HInvoke * invoke)3791 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
3792   // The only supported read barrier implementation is the Baker-style read barriers.
3793   if (gUseReadBarrier && !kUseBakerReadBarrier) {
3794     return false;
3795   }
3796 
3797   VarHandleOptimizations optimizations(invoke);
3798   if (optimizations.GetDoNotIntrinsify()) {
3799     return false;
3800   }
3801 
3802   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3803   DCHECK_LE(expected_coordinates_count, 2u);  // Filtered by the `DoNotIntrinsify` flag above.
3804   return true;
3805 }
3806 
CreateVarHandleCommonLocations(HInvoke * invoke)3807 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
3808   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3809   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3810   LocationSummary* locations = new (allocator) LocationSummary(
3811       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3812 
3813   locations->SetInAt(0, Location::RequiresRegister());
3814   // Require coordinates in registers. These are the object holding the value
3815   // to operate on (except for static fields) and index (for arrays and views).
3816   for (size_t i = 0; i != expected_coordinates_count; ++i) {
3817     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
3818   }
3819 
3820   uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3821   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3822   for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3823     HInstruction* arg = invoke->InputAt(arg_index);
3824     if (DataType::IsFloatingPointType(arg->GetType())) {
3825       locations->SetInAt(arg_index, Location::FpuRegisterOrConstant(arg));
3826     } else {
3827       locations->SetInAt(arg_index, Location::RegisterOrConstant(arg));
3828     }
3829   }
3830 
3831   // Add a temporary for offset.
3832   locations->AddTemp(Location::RequiresRegister());
3833 
3834   if (expected_coordinates_count == 0u) {
3835     // Add a temporary to hold the declaring class.
3836     locations->AddTemp(Location::RequiresRegister());
3837   }
3838 
3839   return locations;
3840 }
3841 
CreateVarHandleGetLocations(HInvoke * invoke)3842 static void CreateVarHandleGetLocations(HInvoke* invoke) {
3843   if (!HasVarHandleIntrinsicImplementation(invoke)) {
3844     return;
3845   }
3846 
3847   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
3848   if (DataType::IsFloatingPointType(invoke->GetType())) {
3849     locations->SetOut(Location::RequiresFpuRegister());
3850   } else {
3851     locations->SetOut(Location::RequiresRegister());
3852   }
3853 }
3854 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool byte_swap=false)3855 static void GenerateVarHandleGet(HInvoke* invoke,
3856                                  CodeGeneratorX86_64* codegen,
3857                                  bool byte_swap = false) {
3858   DataType::Type type = invoke->GetType();
3859   DCHECK_NE(type, DataType::Type::kVoid);
3860 
3861   LocationSummary* locations = invoke->GetLocations();
3862   X86_64Assembler* assembler = codegen->GetAssembler();
3863 
3864   VarHandleTarget target = GetVarHandleTarget(invoke);
3865   VarHandleSlowPathX86_64* slow_path = nullptr;
3866   if (!byte_swap) {
3867     slow_path = GenerateVarHandleChecks(invoke, codegen, type);
3868     GenerateVarHandleTarget(invoke, target, codegen);
3869     if (slow_path != nullptr) {
3870       __ Bind(slow_path->GetNativeByteOrderLabel());
3871     }
3872   }
3873 
3874   // Load the value from the field
3875   Address src(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
3876   Location out = locations->Out();
3877 
3878   if (type == DataType::Type::kReference) {
3879     if (gUseReadBarrier) {
3880       DCHECK(kUseBakerReadBarrier);
3881       codegen->GenerateReferenceLoadWithBakerReadBarrier(
3882           invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false);
3883     } else {
3884       __ movl(out.AsRegister<CpuRegister>(), src);
3885       __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
3886     }
3887     DCHECK(!byte_swap);
3888   } else {
3889     codegen->LoadFromMemoryNoReference(type, out, src);
3890     if (byte_swap) {
3891       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3892       codegen->GetInstructionCodegen()->Bswap(out, type, &temp);
3893     }
3894   }
3895 
3896   if (slow_path != nullptr) {
3897     DCHECK(!byte_swap);
3898     __ Bind(slow_path->GetExitLabel());
3899   }
3900 }
3901 
VisitVarHandleGet(HInvoke * invoke)3902 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGet(HInvoke* invoke) {
3903   CreateVarHandleGetLocations(invoke);
3904 }
3905 
VisitVarHandleGet(HInvoke * invoke)3906 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGet(HInvoke* invoke) {
3907   GenerateVarHandleGet(invoke, codegen_);
3908 }
3909 
VisitVarHandleGetAcquire(HInvoke * invoke)3910 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
3911   CreateVarHandleGetLocations(invoke);
3912 }
3913 
VisitVarHandleGetAcquire(HInvoke * invoke)3914 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
3915   // VarHandleGetAcquire is the same as VarHandleGet on x86-64 due to the x86 memory model.
3916   GenerateVarHandleGet(invoke, codegen_);
3917 }
3918 
VisitVarHandleGetOpaque(HInvoke * invoke)3919 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
3920   CreateVarHandleGetLocations(invoke);
3921 }
3922 
VisitVarHandleGetOpaque(HInvoke * invoke)3923 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
3924   // VarHandleGetOpaque is the same as VarHandleGet on x86-64 due to the x86 memory model.
3925   GenerateVarHandleGet(invoke, codegen_);
3926 }
3927 
VisitVarHandleGetVolatile(HInvoke * invoke)3928 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
3929   CreateVarHandleGetLocations(invoke);
3930 }
3931 
VisitVarHandleGetVolatile(HInvoke * invoke)3932 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
3933   // VarHandleGetVolatile is the same as VarHandleGet on x86-64 due to the x86 memory model.
3934   GenerateVarHandleGet(invoke, codegen_);
3935 }
3936 
CreateVarHandleSetLocations(HInvoke * invoke)3937 static void CreateVarHandleSetLocations(HInvoke* invoke) {
3938   if (!HasVarHandleIntrinsicImplementation(invoke)) {
3939     return;
3940   }
3941 
3942   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
3943 
3944   // Extra temporary is used for card in MarkGCCard and to move 64-bit constants to memory.
3945   locations->AddTemp(Location::RequiresRegister());
3946 }
3947 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_volatile,bool is_atomic,bool byte_swap=false)3948 static void GenerateVarHandleSet(HInvoke* invoke,
3949                                  CodeGeneratorX86_64* codegen,
3950                                  bool is_volatile,
3951                                  bool is_atomic,
3952                                  bool byte_swap = false) {
3953   X86_64Assembler* assembler = codegen->GetAssembler();
3954 
3955   LocationSummary* locations = invoke->GetLocations();
3956   const uint32_t last_temp_index = locations->GetTempCount() - 1;
3957 
3958   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3959   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3960 
3961   VarHandleTarget target = GetVarHandleTarget(invoke);
3962   VarHandleSlowPathX86_64* slow_path = nullptr;
3963   if (!byte_swap) {
3964     slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
3965     GenerateVarHandleTarget(invoke, target, codegen);
3966     if (slow_path != nullptr) {
3967       slow_path->SetVolatile(is_volatile);
3968       slow_path->SetAtomic(is_atomic);
3969       __ Bind(slow_path->GetNativeByteOrderLabel());
3970     }
3971   }
3972 
3973   switch (invoke->GetIntrinsic()) {
3974     case Intrinsics::kVarHandleSetRelease:
3975       codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
3976       break;
3977     case Intrinsics::kVarHandleSetVolatile:
3978       // setVolatile needs kAnyStore barrier, but HandleFieldSet takes care of that.
3979       break;
3980     default:
3981       // Other intrinsics don't need a barrier.
3982       break;
3983   }
3984 
3985   Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
3986 
3987   // Store the value to the field.
3988   codegen->GetInstructionCodegen()->HandleFieldSet(
3989       invoke,
3990       value_index,
3991       last_temp_index,
3992       value_type,
3993       dst,
3994       CpuRegister(target.object),
3995       is_volatile,
3996       is_atomic,
3997       /*value_can_be_null=*/true,
3998       byte_swap,
3999       // Value can be null, and this write barrier is not being relied on for other sets.
4000       WriteBarrierKind::kEmitWithNullCheck);
4001 
4002   // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
4003 
4004   if (slow_path != nullptr) {
4005     DCHECK(!byte_swap);
4006     __ Bind(slow_path->GetExitLabel());
4007   }
4008 }
4009 
VisitVarHandleSet(HInvoke * invoke)4010 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) {
4011   CreateVarHandleSetLocations(invoke);
4012 }
4013 
VisitVarHandleSet(HInvoke * invoke)4014 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
4015   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4016 }
4017 
VisitVarHandleSetOpaque(HInvoke * invoke)4018 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4019   CreateVarHandleSetLocations(invoke);
4020 }
4021 
VisitVarHandleSetOpaque(HInvoke * invoke)4022 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4023   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4024 }
4025 
VisitVarHandleSetRelease(HInvoke * invoke)4026 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4027   CreateVarHandleSetLocations(invoke);
4028 }
4029 
VisitVarHandleSetRelease(HInvoke * invoke)4030 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4031   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4032 }
4033 
VisitVarHandleSetVolatile(HInvoke * invoke)4034 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4035   CreateVarHandleSetLocations(invoke);
4036 }
4037 
VisitVarHandleSetVolatile(HInvoke * invoke)4038 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4039   GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ true, /*is_atomic=*/ true);
4040 }
4041 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke)4042 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
4043   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4044     return;
4045   }
4046 
4047   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4048   uint32_t expected_value_index = number_of_arguments - 2;
4049   uint32_t new_value_index = number_of_arguments - 1;
4050   DataType::Type return_type = invoke->GetType();
4051   DataType::Type expected_type = GetDataTypeFromShorty(invoke, expected_value_index);
4052   DCHECK_EQ(expected_type, GetDataTypeFromShorty(invoke, new_value_index));
4053 
4054   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4055 
4056   if (DataType::IsFloatingPointType(return_type)) {
4057     locations->SetOut(Location::RequiresFpuRegister());
4058   } else {
4059     // Take advantage of the fact that CMPXCHG writes result to RAX.
4060     locations->SetOut(Location::RegisterLocation(RAX));
4061   }
4062 
4063   if (DataType::IsFloatingPointType(expected_type)) {
4064     // RAX is needed to load the expected floating-point value into a register for CMPXCHG.
4065     locations->AddTemp(Location::RegisterLocation(RAX));
4066     // Another temporary is needed to load the new floating-point value into a register for CMPXCHG.
4067     locations->AddTemp(Location::RequiresRegister());
4068   } else {
4069     // Ensure that expected value is in RAX, as required by CMPXCHG.
4070     locations->SetInAt(expected_value_index, Location::RegisterLocation(RAX));
4071     locations->SetInAt(new_value_index, Location::RequiresRegister());
4072     if (expected_type == DataType::Type::kReference) {
4073       // Need two temporaries for MarkGCCard.
4074       locations->AddTemp(Location::RequiresRegister());
4075       locations->AddTemp(Location::RequiresRegister());
4076       if (gUseReadBarrier) {
4077         // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
4078         DCHECK(kUseBakerReadBarrier);
4079         locations->AddTemp(Location::RequiresRegister());
4080       }
4081     }
4082     // RAX is clobbered in CMPXCHG, but no need to mark it as temporary as it's the output register.
4083     DCHECK_EQ(RAX, locations->Out().AsRegister<Register>());
4084   }
4085 }
4086 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_cmpxchg,bool byte_swap=false)4087 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4088                                                      CodeGeneratorX86_64* codegen,
4089                                                      bool is_cmpxchg,
4090                                                      bool byte_swap = false) {
4091   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
4092 
4093   X86_64Assembler* assembler = codegen->GetAssembler();
4094   LocationSummary* locations = invoke->GetLocations();
4095 
4096   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4097   uint32_t expected_value_index = number_of_arguments - 2;
4098   uint32_t new_value_index = number_of_arguments - 1;
4099   DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4100 
4101   VarHandleSlowPathX86_64* slow_path = nullptr;
4102   VarHandleTarget target = GetVarHandleTarget(invoke);
4103   if (!byte_swap) {
4104     slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4105     GenerateVarHandleTarget(invoke, target, codegen);
4106     if (slow_path != nullptr) {
4107       __ Bind(slow_path->GetNativeByteOrderLabel());
4108     }
4109   }
4110 
4111   uint32_t temp_count = locations->GetTempCount();
4112   GenCompareAndSetOrExchange(codegen,
4113                              invoke,
4114                              type,
4115                              CpuRegister(target.object),
4116                              CpuRegister(target.offset),
4117                              /*temp1_index=*/ temp_count - 1,
4118                              /*temp2_index=*/ temp_count - 2,
4119                              /*temp3_index=*/ temp_count - 3,
4120                              locations->InAt(new_value_index),
4121                              locations->InAt(expected_value_index),
4122                              locations->Out(),
4123                              is_cmpxchg,
4124                              byte_swap);
4125 
4126   // We are using LOCK CMPXCHG in all cases because there is no CAS equivalent that has weak
4127   // failure semantics. LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
4128 
4129   if (slow_path != nullptr) {
4130     DCHECK(!byte_swap);
4131     __ Bind(slow_path->GetExitLabel());
4132   }
4133 }
4134 
VisitVarHandleCompareAndSet(HInvoke * invoke)4135 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4136   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4137 }
4138 
VisitVarHandleCompareAndSet(HInvoke * invoke)4139 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4140   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4141 }
4142 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4143 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4144   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4145 }
4146 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4147 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4148   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4149 }
4150 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4151 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4152   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4153 }
4154 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4155 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4156   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4157 }
4158 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4159 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4160   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4161 }
4162 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4163 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4164   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4165 }
4166 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4167 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4168   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4169 }
4170 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4171 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4172   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4173 }
4174 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4175 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4176   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4177 }
4178 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4179 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4180   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4181 }
4182 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4183 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4184   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4185 }
4186 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4187 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4188   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4189 }
4190 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4191 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4192   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4193 }
4194 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4195 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4196   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4197 }
4198 
CreateVarHandleGetAndSetLocations(HInvoke * invoke)4199 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
4200   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4201     return;
4202   }
4203 
4204   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4205   uint32_t new_value_index = number_of_arguments - 1;
4206   DataType::Type type = invoke->GetType();
4207   DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4208 
4209   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4210 
4211   if (DataType::IsFloatingPointType(type)) {
4212     locations->SetOut(Location::RequiresFpuRegister());
4213     // A temporary is needed to load the new floating-point value into a register for XCHG.
4214     locations->AddTemp(Location::RequiresRegister());
4215   } else {
4216     // Use the same register for both the new value and output to take advantage of XCHG.
4217     // It doesn't have to be RAX, but we need to choose some to make sure it's the same.
4218     locations->SetOut(Location::RegisterLocation(RAX));
4219     locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
4220     if (type == DataType::Type::kReference) {
4221       // Need two temporaries for MarkGCCard.
4222       locations->AddTemp(Location::RequiresRegister());
4223       locations->AddTemp(Location::RequiresRegister());
4224       if (gUseReadBarrier) {
4225         // Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier.
4226         DCHECK(kUseBakerReadBarrier);
4227         locations->AddTemp(Location::RequiresRegister());
4228       }
4229     }
4230   }
4231 }
4232 
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,CpuRegister ref,bool byte_swap)4233 static void GenerateVarHandleGetAndSet(HInvoke* invoke,
4234                                        CodeGeneratorX86_64* codegen,
4235                                        Location value,
4236                                        DataType::Type type,
4237                                        Address field_addr,
4238                                        CpuRegister ref,
4239                                        bool byte_swap) {
4240   X86_64Assembler* assembler = codegen->GetAssembler();
4241   LocationSummary* locations = invoke->GetLocations();
4242   Location out = locations->Out();
4243   uint32_t temp_count = locations->GetTempCount();
4244 
4245   if (DataType::IsFloatingPointType(type)) {
4246     // `getAndSet` for floating-point types: move the new FP value into a register, atomically
4247     // exchange it with the field, and move the old value into the output FP register.
4248     Location temp = locations->GetTemp(temp_count - 1);
4249     codegen->Move(temp, value);
4250     bool is64bit = (type == DataType::Type::kFloat64);
4251     DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
4252     if (byte_swap) {
4253       codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4254     }
4255     if (is64bit) {
4256       __ xchgq(temp.AsRegister<CpuRegister>(), field_addr);
4257     } else {
4258       __ xchgl(temp.AsRegister<CpuRegister>(), field_addr);
4259     }
4260     if (byte_swap) {
4261       codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4262     }
4263     __ movd(out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit);
4264   } else if (type == DataType::Type::kReference) {
4265     // `getAndSet` for references: load reference and atomically exchange it with the field.
4266     // Output register is the same as the one holding new value, so no need to move the result.
4267     DCHECK(!byte_swap);
4268 
4269     CpuRegister temp1 = locations->GetTemp(temp_count - 1).AsRegister<CpuRegister>();
4270     CpuRegister temp2 = locations->GetTemp(temp_count - 2).AsRegister<CpuRegister>();
4271     CpuRegister valreg = value.AsRegister<CpuRegister>();
4272 
4273     if (gUseReadBarrier && kUseBakerReadBarrier) {
4274       codegen->GenerateReferenceLoadWithBakerReadBarrier(
4275           invoke,
4276           locations->GetTemp(temp_count - 3),
4277           ref,
4278           field_addr,
4279           /*needs_null_check=*/ false,
4280           /*always_update_field=*/ true,
4281           &temp1,
4282           &temp2);
4283     }
4284     codegen->MarkGCCard(temp1, temp2, ref, valreg, /* emit_null_check= */ false);
4285 
4286     DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
4287     if (kPoisonHeapReferences) {
4288       // Use a temp to avoid poisoning base of the field address, which might happen if `valreg` is
4289       // the same as `target.object` (for code like `vh.getAndSet(obj, obj)`).
4290       __ movl(temp1, valreg);
4291       __ PoisonHeapReference(temp1);
4292       __ xchgl(temp1, field_addr);
4293       __ UnpoisonHeapReference(temp1);
4294       __ movl(valreg, temp1);
4295     } else {
4296       __ xchgl(valreg, field_addr);
4297     }
4298   } else {
4299     // `getAndSet` for integral types: atomically exchange the new value with the field. Output
4300     // register is the same as the one holding new value. Do sign extend / zero extend as needed.
4301     if (byte_swap) {
4302       codegen->GetInstructionCodegen()->Bswap(value, type);
4303     }
4304     CpuRegister valreg = value.AsRegister<CpuRegister>();
4305     DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
4306     switch (type) {
4307       case DataType::Type::kBool:
4308       case DataType::Type::kUint8:
4309         __ xchgb(valreg, field_addr);
4310         __ movzxb(valreg, valreg);
4311         break;
4312       case DataType::Type::kInt8:
4313         __ xchgb(valreg, field_addr);
4314         __ movsxb(valreg, valreg);
4315         break;
4316       case DataType::Type::kUint16:
4317         __ xchgw(valreg, field_addr);
4318         __ movzxw(valreg, valreg);
4319         break;
4320       case DataType::Type::kInt16:
4321         __ xchgw(valreg, field_addr);
4322         __ movsxw(valreg, valreg);
4323         break;
4324       case DataType::Type::kInt32:
4325       case DataType::Type::kUint32:
4326         __ xchgl(valreg, field_addr);
4327         break;
4328       case DataType::Type::kInt64:
4329       case DataType::Type::kUint64:
4330         __ xchgq(valreg, field_addr);
4331         break;
4332       default:
4333         DCHECK(false) << "unexpected type in getAndSet intrinsic";
4334         UNREACHABLE();
4335     }
4336     if (byte_swap) {
4337       codegen->GetInstructionCodegen()->Bswap(value, type);
4338     }
4339   }
4340 }
4341 
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke)4342 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) {
4343   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4344     return;
4345   }
4346 
4347   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4348   uint32_t new_value_index = number_of_arguments - 1;
4349   DataType::Type type = invoke->GetType();
4350   DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4351 
4352   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4353 
4354   DCHECK_NE(DataType::Type::kReference, type);
4355   DCHECK(!DataType::IsFloatingPointType(type));
4356 
4357   // A temporary to compute the bitwise operation on the old and the new values.
4358   locations->AddTemp(Location::RequiresRegister());
4359   // We need value to be either in a register, or a 32-bit constant (as there are no arithmetic
4360   // instructions that accept 64-bit immediate on x86_64).
4361   locations->SetInAt(new_value_index, DataType::Is64BitType(type)
4362       ? Location::RequiresRegister()
4363       : Location::RegisterOrConstant(invoke->InputAt(new_value_index)));
4364   // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary.
4365   locations->SetOut(Location::RegisterLocation(RAX));
4366 }
4367 
GenerateVarHandleGetAndOp(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,GetAndUpdateOp get_and_update_op,bool byte_swap)4368 static void GenerateVarHandleGetAndOp(HInvoke* invoke,
4369                                       CodeGeneratorX86_64* codegen,
4370                                       Location value,
4371                                       DataType::Type type,
4372                                       Address field_addr,
4373                                       GetAndUpdateOp get_and_update_op,
4374                                       bool byte_swap) {
4375   X86_64Assembler* assembler = codegen->GetAssembler();
4376   LocationSummary* locations = invoke->GetLocations();
4377   Location temp_loc = locations->GetTemp(locations->GetTempCount() - 1);
4378   Location rax_loc = locations->Out();
4379   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4380   CpuRegister rax = rax_loc.AsRegister<CpuRegister>();
4381   DCHECK_EQ(rax.AsRegister(), RAX);
4382   bool is64Bit = DataType::Is64BitType(type);
4383 
4384   NearLabel retry;
4385   __ Bind(&retry);
4386 
4387   // Load field value into RAX and copy it into a temporary register for the operation.
4388   codegen->LoadFromMemoryNoReference(type, Location::RegisterLocation(RAX), field_addr);
4389   codegen->Move(temp_loc, rax_loc);
4390   if (byte_swap) {
4391     // Byte swap the temporary, since we need to perform operation in native endianness.
4392     codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4393   }
4394 
4395   DCHECK_IMPLIES(value.IsConstant(), !is64Bit);
4396   int32_t const_value = value.IsConstant()
4397       ? CodeGenerator::GetInt32ValueOf(value.GetConstant())
4398       : 0;
4399 
4400   // Use 32-bit registers for 8/16/32-bit types to save on the REX prefix.
4401   switch (get_and_update_op) {
4402     case GetAndUpdateOp::kAdd:
4403       DCHECK(byte_swap);  // The non-byte-swapping path should use a faster XADD instruction.
4404       if (is64Bit) {
4405         __ addq(temp, value.AsRegister<CpuRegister>());
4406       } else if (value.IsConstant()) {
4407         __ addl(temp, Immediate(const_value));
4408       } else {
4409         __ addl(temp, value.AsRegister<CpuRegister>());
4410       }
4411       break;
4412     case GetAndUpdateOp::kBitwiseAnd:
4413       if (is64Bit) {
4414         __ andq(temp, value.AsRegister<CpuRegister>());
4415       } else if (value.IsConstant()) {
4416         __ andl(temp, Immediate(const_value));
4417       } else {
4418         __ andl(temp, value.AsRegister<CpuRegister>());
4419       }
4420       break;
4421     case GetAndUpdateOp::kBitwiseOr:
4422       if (is64Bit) {
4423         __ orq(temp, value.AsRegister<CpuRegister>());
4424       } else if (value.IsConstant()) {
4425         __ orl(temp, Immediate(const_value));
4426       } else {
4427         __ orl(temp, value.AsRegister<CpuRegister>());
4428       }
4429       break;
4430     case GetAndUpdateOp::kBitwiseXor:
4431       if (is64Bit) {
4432         __ xorq(temp, value.AsRegister<CpuRegister>());
4433       } else if (value.IsConstant()) {
4434         __ xorl(temp, Immediate(const_value));
4435       } else {
4436         __ xorl(temp, value.AsRegister<CpuRegister>());
4437       }
4438       break;
4439     default:
4440       DCHECK(false) <<  "unexpected operation";
4441       UNREACHABLE();
4442   }
4443 
4444   if (byte_swap) {
4445     // RAX still contains the original value, but we need to byte swap the temporary back.
4446     codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4447   }
4448 
4449   switch (type) {
4450     case DataType::Type::kBool:
4451     case DataType::Type::kUint8:
4452     case DataType::Type::kInt8:
4453       __ LockCmpxchgb(field_addr, temp);
4454       break;
4455     case DataType::Type::kUint16:
4456     case DataType::Type::kInt16:
4457       __ LockCmpxchgw(field_addr, temp);
4458       break;
4459     case DataType::Type::kInt32:
4460     case DataType::Type::kUint32:
4461       __ LockCmpxchgl(field_addr, temp);
4462       break;
4463     case DataType::Type::kInt64:
4464     case DataType::Type::kUint64:
4465       __ LockCmpxchgq(field_addr, temp);
4466       break;
4467     default:
4468       DCHECK(false) << "unexpected type in getAndBitwiseOp intrinsic";
4469       UNREACHABLE();
4470   }
4471 
4472   __ j(kNotZero, &retry);
4473 
4474   // The result is in RAX after CMPXCHG. Byte swap if necessary, but do not sign/zero extend,
4475   // as it has already been done by `LoadFromMemoryNoReference` above (and not altered by CMPXCHG).
4476   if (byte_swap) {
4477     codegen->GetInstructionCodegen()->Bswap(rax_loc, type);
4478   }
4479 }
4480 
CreateVarHandleGetAndAddLocations(HInvoke * invoke)4481 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
4482   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4483     return;
4484   }
4485 
4486   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4487   uint32_t new_value_index = number_of_arguments - 1;
4488   DataType::Type type = invoke->GetType();
4489   DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4490 
4491   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4492 
4493   if (DataType::IsFloatingPointType(type)) {
4494     locations->SetOut(Location::RequiresFpuRegister());
4495     // Require that the new FP value is in a register (and not a constant) for ADDSS/ADDSD.
4496     locations->SetInAt(new_value_index, Location::RequiresFpuRegister());
4497     // CMPXCHG clobbers RAX.
4498     locations->AddTemp(Location::RegisterLocation(RAX));
4499     // An FP temporary to load the old value from the field and perform FP addition.
4500     locations->AddTemp(Location::RequiresFpuRegister());
4501     // A temporary to hold the new value for CMPXCHG.
4502     locations->AddTemp(Location::RequiresRegister());
4503   } else {
4504     DCHECK_NE(type, DataType::Type::kReference);
4505     // Use the same register for both the new value and output to take advantage of XADD.
4506     // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls
4507     // back to GenerateVarHandleGetAndOp that expects out in RAX.
4508     locations->SetOut(Location::RegisterLocation(RAX));
4509     locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
4510     if (GetExpectedVarHandleCoordinatesCount(invoke) == 2) {
4511       // For byte array views with non-native endianness we need extra BSWAP operations, so we
4512       // cannot use XADD and have to fallback to a generic implementation based on CMPXCH. In that
4513       // case we need two temporary registers: one to hold value instead of RAX (which may get
4514       // clobbered by repeated CMPXCHG) and one for performing the operation. At compile time we
4515       // cannot distinguish this case from arrays or native-endian byte array views.
4516       locations->AddTemp(Location::RequiresRegister());
4517       locations->AddTemp(Location::RequiresRegister());
4518     }
4519   }
4520 }
4521 
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,bool byte_swap)4522 static void GenerateVarHandleGetAndAdd(HInvoke* invoke,
4523                                        CodeGeneratorX86_64* codegen,
4524                                        Location value,
4525                                        DataType::Type type,
4526                                        Address field_addr,
4527                                        bool byte_swap) {
4528   X86_64Assembler* assembler = codegen->GetAssembler();
4529   LocationSummary* locations = invoke->GetLocations();
4530   Location out = locations->Out();
4531   uint32_t temp_count = locations->GetTempCount();
4532 
4533   if (DataType::IsFloatingPointType(type)) {
4534     if (byte_swap) {
4535       // This code should never be executed: it is the case of a byte array view (since it requires
4536       // a byte swap), and varhandles for byte array views support numeric atomic update access mode
4537       // only for int and long, but not for floating-point types (see javadoc comments for
4538       // java.lang.invoke.MethodHandles.byteArrayViewVarHandle()). But ART varhandle implementation
4539       // for byte array views treats floating-point types them as numeric types in
4540       // ByteArrayViewVarHandle::Access(). Terefore we do generate intrinsic code, but it always
4541       // fails access mode check at runtime prior to reaching this point. Illegal instruction UD2
4542       // ensures that if control flow gets here by mistake, we will notice.
4543       __ ud2();
4544     }
4545 
4546     // `getAndAdd` for floating-point types: load the old FP value into a temporary FP register and
4547     // in RAX for CMPXCHG, add the new FP value to the old one, move it to a non-FP temporary for
4548     // CMPXCHG and loop until CMPXCHG succeeds. Move the result from RAX to the output FP register.
4549     bool is64bit = (type == DataType::Type::kFloat64);
4550     DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
4551     XmmRegister fptemp = locations->GetTemp(temp_count - 2).AsFpuRegister<XmmRegister>();
4552     Location rax_loc = Location::RegisterLocation(RAX);
4553     Location temp_loc = locations->GetTemp(temp_count - 1);
4554     CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4555 
4556     NearLabel retry;
4557     __ Bind(&retry);
4558 
4559     // Read value from memory into an FP register and copy in into RAX.
4560     if (is64bit) {
4561       __ movsd(fptemp, field_addr);
4562     } else {
4563       __ movss(fptemp, field_addr);
4564     }
4565     __ movd(CpuRegister(RAX), fptemp, is64bit);
4566     // If necessary, byte swap RAX and update the value in FP register to also be byte-swapped.
4567     if (byte_swap) {
4568       codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
4569       __ movd(fptemp, CpuRegister(RAX), is64bit);
4570     }
4571     // Perform the FP addition and move it to a temporary register to prepare for CMPXCHG.
4572     if (is64bit) {
4573       __ addsd(fptemp, value.AsFpuRegister<XmmRegister>());
4574     } else {
4575       __ addss(fptemp, value.AsFpuRegister<XmmRegister>());
4576     }
4577     __ movd(temp, fptemp, is64bit);
4578     // If necessary, byte swap RAX before CMPXCHG and the temporary before copying to FP register.
4579     if (byte_swap) {
4580       codegen->GetInstructionCodegen()->Bswap(temp_loc, bswap_type);
4581       codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
4582     }
4583     if (is64bit) {
4584       __ LockCmpxchgq(field_addr, temp);
4585     } else {
4586       __ LockCmpxchgl(field_addr, temp);
4587     }
4588 
4589     __ j(kNotZero, &retry);
4590 
4591     // The old value is in RAX, byte swap if necessary.
4592     if (byte_swap) {
4593       codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
4594     }
4595     __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
4596   } else {
4597     if (byte_swap) {
4598       // We cannot use XADD since we need to byte-swap the old value when reading it from memory,
4599       // and then byte-swap the sum before writing it to memory. So fallback to the slower generic
4600       // implementation that is also used for bitwise operations.
4601       // Move value from RAX to a temporary register, as RAX may get clobbered by repeated CMPXCHG.
4602       DCHECK_EQ(GetExpectedVarHandleCoordinatesCount(invoke), 2u);
4603       Location temp = locations->GetTemp(temp_count - 2);
4604       codegen->Move(temp, value);
4605       GenerateVarHandleGetAndOp(
4606           invoke, codegen, temp, type, field_addr, GetAndUpdateOp::kAdd, byte_swap);
4607     } else {
4608       // `getAndAdd` for integral types: atomically exchange the new value with the field and add
4609       // the old value to the field. Output register is the same as the one holding new value. Do
4610       // sign extend / zero extend as needed.
4611       CpuRegister valreg = value.AsRegister<CpuRegister>();
4612       DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
4613       switch (type) {
4614         case DataType::Type::kBool:
4615         case DataType::Type::kUint8:
4616           __ LockXaddb(field_addr, valreg);
4617           __ movzxb(valreg, valreg);
4618           break;
4619         case DataType::Type::kInt8:
4620           __ LockXaddb(field_addr, valreg);
4621           __ movsxb(valreg, valreg);
4622           break;
4623         case DataType::Type::kUint16:
4624           __ LockXaddw(field_addr, valreg);
4625           __ movzxw(valreg, valreg);
4626           break;
4627         case DataType::Type::kInt16:
4628           __ LockXaddw(field_addr, valreg);
4629           __ movsxw(valreg, valreg);
4630           break;
4631         case DataType::Type::kInt32:
4632         case DataType::Type::kUint32:
4633           __ LockXaddl(field_addr, valreg);
4634           break;
4635         case DataType::Type::kInt64:
4636         case DataType::Type::kUint64:
4637           __ LockXaddq(field_addr, valreg);
4638           break;
4639         default:
4640           DCHECK(false) << "unexpected type in getAndAdd intrinsic";
4641           UNREACHABLE();
4642       }
4643     }
4644   }
4645 }
4646 
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op,bool need_any_store_barrier,bool need_any_any_barrier,bool byte_swap=false)4647 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
4648                                           CodeGeneratorX86_64* codegen,
4649                                           GetAndUpdateOp get_and_update_op,
4650                                           bool need_any_store_barrier,
4651                                           bool need_any_any_barrier,
4652                                           bool byte_swap = false) {
4653   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
4654 
4655   X86_64Assembler* assembler = codegen->GetAssembler();
4656   LocationSummary* locations = invoke->GetLocations();
4657 
4658   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4659   Location value = locations->InAt(number_of_arguments - 1);
4660   DataType::Type type = invoke->GetType();
4661 
4662   VarHandleSlowPathX86_64* slow_path = nullptr;
4663   VarHandleTarget target = GetVarHandleTarget(invoke);
4664   if (!byte_swap) {
4665     slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4666     GenerateVarHandleTarget(invoke, target, codegen);
4667     if (slow_path != nullptr) {
4668       slow_path->SetGetAndUpdateOp(get_and_update_op);
4669       slow_path->SetNeedAnyStoreBarrier(need_any_store_barrier);
4670       slow_path->SetNeedAnyAnyBarrier(need_any_any_barrier);
4671       __ Bind(slow_path->GetNativeByteOrderLabel());
4672     }
4673   }
4674 
4675   CpuRegister ref(target.object);
4676   Address field_addr(ref, CpuRegister(target.offset), TIMES_1, 0);
4677 
4678   if (need_any_store_barrier) {
4679     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4680   }
4681 
4682   switch (get_and_update_op) {
4683     case GetAndUpdateOp::kSet:
4684       GenerateVarHandleGetAndSet(invoke, codegen, value, type, field_addr, ref, byte_swap);
4685       break;
4686     case GetAndUpdateOp::kAdd:
4687       GenerateVarHandleGetAndAdd(invoke, codegen, value, type, field_addr, byte_swap);
4688       break;
4689     case GetAndUpdateOp::kBitwiseAnd:
4690     case GetAndUpdateOp::kBitwiseOr:
4691     case GetAndUpdateOp::kBitwiseXor:
4692       GenerateVarHandleGetAndOp(
4693           invoke, codegen, value, type, field_addr, get_and_update_op, byte_swap);
4694       break;
4695   }
4696 
4697   if (need_any_any_barrier) {
4698     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4699   }
4700 
4701   if (slow_path != nullptr) {
4702     DCHECK(!byte_swap);
4703     __ Bind(slow_path->GetExitLabel());
4704   }
4705 }
4706 
VisitVarHandleGetAndSet(HInvoke * invoke)4707 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4708   CreateVarHandleGetAndSetLocations(invoke);
4709 }
4710 
VisitVarHandleGetAndSet(HInvoke * invoke)4711 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4712   // `getAndSet` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4713   GenerateVarHandleGetAndUpdate(invoke,
4714                                 codegen_,
4715                                 GetAndUpdateOp::kSet,
4716                                 /*need_any_store_barrier=*/ true,
4717                                 /*need_any_any_barrier=*/ true);
4718 }
4719 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4720 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4721   CreateVarHandleGetAndSetLocations(invoke);
4722 }
4723 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4724 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4725   // `getAndSetAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4726   GenerateVarHandleGetAndUpdate(invoke,
4727                                 codegen_,
4728                                 GetAndUpdateOp::kSet,
4729                                 /*need_any_store_barrier=*/ false,
4730                                 /*need_any_any_barrier=*/ false);
4731 }
4732 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4733 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4734   CreateVarHandleGetAndSetLocations(invoke);
4735 }
4736 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4737 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4738   // `getAndSetRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4739   GenerateVarHandleGetAndUpdate(invoke,
4740                                 codegen_,
4741                                 GetAndUpdateOp::kSet,
4742                                 /*need_any_store_barrier=*/ true,
4743                                 /*need_any_any_barrier=*/ false);
4744 }
4745 
VisitVarHandleGetAndAdd(HInvoke * invoke)4746 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4747   CreateVarHandleGetAndAddLocations(invoke);
4748 }
4749 
VisitVarHandleGetAndAdd(HInvoke * invoke)4750 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4751   // `getAndAdd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4752   GenerateVarHandleGetAndUpdate(invoke,
4753                                 codegen_,
4754                                 GetAndUpdateOp::kAdd,
4755                                 /*need_any_store_barrier=*/ true,
4756                                 /*need_any_any_barrier=*/ true);
4757 }
4758 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4759 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4760   CreateVarHandleGetAndAddLocations(invoke);
4761 }
4762 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4763 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4764   // `getAndAddAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4765   GenerateVarHandleGetAndUpdate(invoke,
4766                                 codegen_,
4767                                 GetAndUpdateOp::kAdd,
4768                                 /*need_any_store_barrier=*/ false,
4769                                 /*need_any_any_barrier=*/ false);
4770 }
4771 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4772 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4773   CreateVarHandleGetAndAddLocations(invoke);
4774 }
4775 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4776 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4777   // `getAndAddRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4778   GenerateVarHandleGetAndUpdate(invoke,
4779                                 codegen_,
4780                                 GetAndUpdateOp::kAdd,
4781                                 /*need_any_store_barrier=*/ true,
4782                                 /*need_any_any_barrier=*/ false);
4783 }
4784 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4785 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4786   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4787 }
4788 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4789 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4790   // `getAndBitwiseAnd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4791   GenerateVarHandleGetAndUpdate(invoke,
4792                                 codegen_,
4793                                 GetAndUpdateOp::kBitwiseAnd,
4794                                 /*need_any_store_barrier=*/ true,
4795                                 /*need_any_any_barrier=*/ true);
4796 }
4797 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4798 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4799   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4800 }
4801 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4802 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4803   // `getAndBitwiseAndAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4804   GenerateVarHandleGetAndUpdate(invoke,
4805                                 codegen_,
4806                                 GetAndUpdateOp::kBitwiseAnd,
4807                                 /*need_any_store_barrier=*/ false,
4808                                 /*need_any_any_barrier=*/ false);
4809 }
4810 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4811 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4812   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4813 }
4814 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4815 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4816   // `getAndBitwiseAndRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4817   GenerateVarHandleGetAndUpdate(invoke,
4818                                 codegen_,
4819                                 GetAndUpdateOp::kBitwiseAnd,
4820                                 /*need_any_store_barrier=*/ true,
4821                                 /*need_any_any_barrier=*/ false);
4822 }
4823 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4824 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4825   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4826 }
4827 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4828 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4829   // `getAndBitwiseOr` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4830   GenerateVarHandleGetAndUpdate(invoke,
4831                                 codegen_,
4832                                 GetAndUpdateOp::kBitwiseOr,
4833                                 /*need_any_store_barrier=*/ true,
4834                                 /*need_any_any_barrier=*/ true);
4835 }
4836 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4837 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4838   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4839 }
4840 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4841 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4842   // `getAndBitwiseOrAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4843   GenerateVarHandleGetAndUpdate(invoke,
4844                                 codegen_,
4845                                 GetAndUpdateOp::kBitwiseOr,
4846                                 /*need_any_store_barrier=*/ false,
4847                                 /*need_any_any_barrier=*/ false);
4848 }
4849 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4850 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4851   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4852 }
4853 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4854 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4855   // `getAndBitwiseOrRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4856   GenerateVarHandleGetAndUpdate(invoke,
4857                                 codegen_,
4858                                 GetAndUpdateOp::kBitwiseOr,
4859                                 /*need_any_store_barrier=*/ true,
4860                                 /*need_any_any_barrier=*/ false);
4861 }
4862 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4863 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4864   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4865 }
4866 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4867 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4868   // `getAndBitwiseXor` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4869   GenerateVarHandleGetAndUpdate(invoke,
4870                                 codegen_,
4871                                 GetAndUpdateOp::kBitwiseXor,
4872                                 /*need_any_store_barrier=*/ true,
4873                                 /*need_any_any_barrier=*/ true);
4874 }
4875 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4876 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4877   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4878 }
4879 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4880 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4881   // `getAndBitwiseXorAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4882   GenerateVarHandleGetAndUpdate(invoke,
4883                                 codegen_,
4884                                 GetAndUpdateOp::kBitwiseXor,
4885                                 /*need_any_store_barrier=*/ false,
4886                                 /*need_any_any_barrier=*/ false);
4887 }
4888 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4889 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4890   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4891 }
4892 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4893 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4894   // `getAndBitwiseXorRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4895   GenerateVarHandleGetAndUpdate(invoke,
4896                                 codegen_,
4897                                 GetAndUpdateOp::kBitwiseXor,
4898                                 /*need_any_store_barrier=*/ true,
4899                                 /*need_any_any_barrier=*/ false);
4900 }
4901 
EmitByteArrayViewCode(CodeGeneratorX86_64 * codegen)4902 void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen) {
4903   DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
4904   X86_64Assembler* assembler = codegen->GetAssembler();
4905 
4906   HInvoke* invoke = GetInvoke();
4907   LocationSummary* locations = invoke->GetLocations();
4908   mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
4909   DataType::Type value_type =
4910       GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4911   DCHECK_NE(value_type, DataType::Type::kReference);
4912   size_t size = DataType::Size(value_type);
4913   DCHECK_GT(size, 1u);
4914 
4915   CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
4916   CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
4917   CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
4918   CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1).AsRegister<CpuRegister>();
4919 
4920   MemberOffset class_offset = mirror::Object::ClassOffset();
4921   MemberOffset array_length_offset = mirror::Array::LengthOffset();
4922   MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
4923   MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
4924 
4925   VarHandleTarget target = GetVarHandleTarget(invoke);
4926 
4927   __ Bind(GetByteArrayViewCheckLabel());
4928 
4929   // The main path checked that the coordinateType0 is an array class that matches
4930   // the class of the actual coordinate argument but it does not match the value type.
4931   // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
4932   codegen->LoadClassRootForIntrinsic(temp, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
4933   assembler->MaybePoisonHeapReference(temp);
4934   __ cmpl(temp, Address(varhandle, class_offset.Int32Value()));
4935   __ j(kNotEqual, GetEntryLabel());
4936 
4937   // Check for array index out of bounds.
4938   __ movl(temp, Address(object, array_length_offset.Int32Value()));
4939   // SUB sets flags in the same way as CMP.
4940   __ subl(temp, index);
4941   __ j(kBelowEqual, GetEntryLabel());
4942   // The difference between index and array length must be enough for the `value_type` size.
4943   __ cmpl(temp, Immediate(size));
4944   __ j(kBelow, GetEntryLabel());
4945 
4946   // Construct the target.
4947   __ leal(CpuRegister(target.offset), Address(index, TIMES_1, data_offset.Int32Value()));
4948 
4949   // Alignment check. For unaligned access, go to the runtime.
4950   DCHECK(IsPowerOfTwo(size));
4951   __ testl(CpuRegister(target.offset), Immediate(size - 1u));
4952   __ j(kNotZero, GetEntryLabel());
4953 
4954   // Byte order check. For native byte order return to the main path.
4955   if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
4956       IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
4957     // There is no reason to differentiate between native byte order and byte-swap
4958     // for setting a zero bit pattern. Just return to the main path.
4959     __ jmp(GetNativeByteOrderLabel());
4960     return;
4961   }
4962   __ cmpl(Address(varhandle, native_byte_order_offset.Int32Value()), Immediate(0));
4963   __ j(kNotEqual, GetNativeByteOrderLabel());
4964 
4965   switch (access_mode_template) {
4966     case mirror::VarHandle::AccessModeTemplate::kGet:
4967       GenerateVarHandleGet(invoke, codegen, /*byte_swap=*/ true);
4968       break;
4969     case mirror::VarHandle::AccessModeTemplate::kSet:
4970       GenerateVarHandleSet(invoke, codegen, is_volatile_, is_atomic_, /*byte_swap=*/ true);
4971       break;
4972     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
4973       GenerateVarHandleCompareAndSetOrExchange(
4974           invoke, codegen, /*is_cmpxchg=*/ false, /*byte_swap=*/ true);
4975       break;
4976     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
4977       GenerateVarHandleCompareAndSetOrExchange(
4978           invoke, codegen, /*is_cmpxchg=*/ true, /*byte_swap=*/ true);
4979       break;
4980     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
4981       GenerateVarHandleGetAndUpdate(invoke,
4982                                     codegen,
4983                                     get_and_update_op_,
4984                                     need_any_store_barrier_,
4985                                     need_any_any_barrier_,
4986                                     /*byte_swap=*/ true);
4987       break;
4988   }
4989 
4990   __ jmp(GetExitLabel());
4991 }
4992 
4993 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name)
4994 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED);
4995 #undef MARK_UNIMPLEMENTED
4996 
4997 UNREACHABLE_INTRINSICS(X86_64)
4998 
4999 #undef __
5000 
5001 }  // namespace x86_64
5002 }  // namespace art
5003