• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86_64.h"
18 
19 #include <limits>
20 
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86_64.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_utils.h"
29 #include "lock_word.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/object_array-inl.h"
32 #include "mirror/reference.h"
33 #include "mirror/string.h"
34 #include "scoped_thread_state_change-inl.h"
35 #include "thread-current-inl.h"
36 #include "utils/x86_64/assembler_x86_64.h"
37 #include "utils/x86_64/constants_x86_64.h"
38 
39 namespace art {
40 
41 namespace x86_64 {
42 
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)43 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
44   : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) {
45 }
46 
GetAssembler()47 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
48   return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
49 }
50 
GetAllocator()51 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
52   return codegen_->GetGraph()->GetAllocator();
53 }
54 
TryDispatch(HInvoke * invoke)55 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
56   Dispatch(invoke);
57   LocationSummary* res = invoke->GetLocations();
58   if (res == nullptr) {
59     return false;
60   }
61   return res->Intrinsified();
62 }
63 
MoveArguments(HInvoke * invoke,CodeGeneratorX86_64 * codegen)64 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
65   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
66   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
67 }
68 
69 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
70 
71 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
72 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
73 
74 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
75 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
76  public:
ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction * instruction)77   explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
78       : SlowPathCode(instruction) {
79     DCHECK(kEmitCompilerReadBarrier);
80     DCHECK(kUseBakerReadBarrier);
81   }
82 
EmitNativeCode(CodeGenerator * codegen)83   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
84     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
85     LocationSummary* locations = instruction_->GetLocations();
86     DCHECK(locations->CanCall());
87     DCHECK(instruction_->IsInvokeStaticOrDirect())
88         << "Unexpected instruction in read barrier arraycopy slow path: "
89         << instruction_->DebugName();
90     DCHECK(instruction_->GetLocations()->Intrinsified());
91     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
92 
93     int32_t element_size = DataType::Size(DataType::Type::kReference);
94 
95     CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
96     CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
97     CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
98 
99     __ Bind(GetEntryLabel());
100     NearLabel loop;
101     __ Bind(&loop);
102     __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
103     __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
104     // TODO: Inline the mark bit check before calling the runtime?
105     // TMP = ReadBarrier::Mark(TMP);
106     // No need to save live registers; it's taken care of by the
107     // entrypoint. Also, there is no need to update the stack mask,
108     // as this runtime call will not trigger a garbage collection.
109     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
110     // This runtime call does not require a stack map.
111     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
112     __ MaybePoisonHeapReference(CpuRegister(TMP));
113     __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
114     __ addl(src_curr_addr, Immediate(element_size));
115     __ addl(dst_curr_addr, Immediate(element_size));
116     __ cmpl(src_curr_addr, src_stop_addr);
117     __ j(kNotEqual, &loop);
118     __ jmp(GetExitLabel());
119   }
120 
GetDescription() const121   const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
122 
123  private:
124   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
125 };
126 
127 #undef __
128 
129 #define __ assembler->
130 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)131 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
132   LocationSummary* locations =
133       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
134   locations->SetInAt(0, Location::RequiresFpuRegister());
135   locations->SetOut(Location::RequiresRegister());
136 }
137 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)138 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
139   LocationSummary* locations =
140       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
141   locations->SetInAt(0, Location::RequiresRegister());
142   locations->SetOut(Location::RequiresFpuRegister());
143 }
144 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)145 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
146   Location input = locations->InAt(0);
147   Location output = locations->Out();
148   __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
149 }
150 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)151 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
152   Location input = locations->InAt(0);
153   Location output = locations->Out();
154   __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
155 }
156 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)157 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
158   CreateFPToIntLocations(allocator_, invoke);
159 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)160 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
161   CreateIntToFPLocations(allocator_, invoke);
162 }
163 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)164 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
165   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
166 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)167 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
168   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
169 }
170 
VisitFloatFloatToRawIntBits(HInvoke * invoke)171 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
172   CreateFPToIntLocations(allocator_, invoke);
173 }
VisitFloatIntBitsToFloat(HInvoke * invoke)174 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
175   CreateIntToFPLocations(allocator_, invoke);
176 }
177 
VisitFloatFloatToRawIntBits(HInvoke * invoke)178 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
179   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
180 }
VisitFloatIntBitsToFloat(HInvoke * invoke)181 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
182   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
183 }
184 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)185 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
186   LocationSummary* locations =
187       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
188   locations->SetInAt(0, Location::RequiresRegister());
189   locations->SetOut(Location::SameAsFirstInput());
190 }
191 
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)192 static void GenReverseBytes(LocationSummary* locations,
193                             DataType::Type size,
194                             X86_64Assembler* assembler) {
195   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
196 
197   switch (size) {
198     case DataType::Type::kInt16:
199       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
200       __ bswapl(out);
201       __ sarl(out, Immediate(16));
202       break;
203     case DataType::Type::kInt32:
204       __ bswapl(out);
205       break;
206     case DataType::Type::kInt64:
207       __ bswapq(out);
208       break;
209     default:
210       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
211       UNREACHABLE();
212   }
213 }
214 
VisitIntegerReverseBytes(HInvoke * invoke)215 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
216   CreateIntToIntLocations(allocator_, invoke);
217 }
218 
VisitIntegerReverseBytes(HInvoke * invoke)219 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
220   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
221 }
222 
VisitLongReverseBytes(HInvoke * invoke)223 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
224   CreateIntToIntLocations(allocator_, invoke);
225 }
226 
VisitLongReverseBytes(HInvoke * invoke)227 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
228   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
229 }
230 
VisitShortReverseBytes(HInvoke * invoke)231 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
232   CreateIntToIntLocations(allocator_, invoke);
233 }
234 
VisitShortReverseBytes(HInvoke * invoke)235 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
236   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
237 }
238 
239 
240 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
241 //       need is 64b.
242 
CreateFloatToFloatPlusTemps(ArenaAllocator * allocator,HInvoke * invoke)243 static void CreateFloatToFloatPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) {
244   // TODO: Enable memory operations when the assembler supports them.
245   LocationSummary* locations =
246       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
247   locations->SetInAt(0, Location::RequiresFpuRegister());
248   locations->SetOut(Location::SameAsFirstInput());
249   locations->AddTemp(Location::RequiresFpuRegister());  // FP reg to hold mask.
250 }
251 
MathAbsFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen)252 static void MathAbsFP(LocationSummary* locations,
253                       bool is64bit,
254                       X86_64Assembler* assembler,
255                       CodeGeneratorX86_64* codegen) {
256   Location output = locations->Out();
257 
258   DCHECK(output.IsFpuRegister());
259   XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
260 
261   // TODO: Can mask directly with constant area using pand if we can guarantee
262   // that the literal is aligned on a 16 byte boundary.  This will avoid a
263   // temporary.
264   if (is64bit) {
265     __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
266     __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
267   } else {
268     __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
269     __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
270   }
271 }
272 
VisitMathAbsDouble(HInvoke * invoke)273 void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
274   CreateFloatToFloatPlusTemps(allocator_, invoke);
275 }
276 
VisitMathAbsDouble(HInvoke * invoke)277 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
278   MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
279 }
280 
VisitMathAbsFloat(HInvoke * invoke)281 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
282   CreateFloatToFloatPlusTemps(allocator_, invoke);
283 }
284 
VisitMathAbsFloat(HInvoke * invoke)285 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
286   MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
287 }
288 
CreateIntToIntPlusTemp(ArenaAllocator * allocator,HInvoke * invoke)289 static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) {
290   LocationSummary* locations =
291       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
292   locations->SetInAt(0, Location::RequiresRegister());
293   locations->SetOut(Location::SameAsFirstInput());
294   locations->AddTemp(Location::RequiresRegister());
295 }
296 
GenAbsInteger(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)297 static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
298   Location output = locations->Out();
299   CpuRegister out = output.AsRegister<CpuRegister>();
300   CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
301 
302   if (is64bit) {
303     // Create mask.
304     __ movq(mask, out);
305     __ sarq(mask, Immediate(63));
306     // Add mask.
307     __ addq(out, mask);
308     __ xorq(out, mask);
309   } else {
310     // Create mask.
311     __ movl(mask, out);
312     __ sarl(mask, Immediate(31));
313     // Add mask.
314     __ addl(out, mask);
315     __ xorl(out, mask);
316   }
317 }
318 
VisitMathAbsInt(HInvoke * invoke)319 void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
320   CreateIntToIntPlusTemp(allocator_, invoke);
321 }
322 
VisitMathAbsInt(HInvoke * invoke)323 void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
324   GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
325 }
326 
VisitMathAbsLong(HInvoke * invoke)327 void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
328   CreateIntToIntPlusTemp(allocator_, invoke);
329 }
330 
VisitMathAbsLong(HInvoke * invoke)331 void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
332   GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
333 }
334 
GenMinMaxFP(LocationSummary * locations,bool is_min,bool is_double,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen)335 static void GenMinMaxFP(LocationSummary* locations,
336                         bool is_min,
337                         bool is_double,
338                         X86_64Assembler* assembler,
339                         CodeGeneratorX86_64* codegen) {
340   Location op1_loc = locations->InAt(0);
341   Location op2_loc = locations->InAt(1);
342   Location out_loc = locations->Out();
343   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
344 
345   // Shortcut for same input locations.
346   if (op1_loc.Equals(op2_loc)) {
347     DCHECK(out_loc.Equals(op1_loc));
348     return;
349   }
350 
351   //  (out := op1)
352   //  out <=? op2
353   //  if Nan jmp Nan_label
354   //  if out is min jmp done
355   //  if op2 is min jmp op2_label
356   //  handle -0/+0
357   //  jmp done
358   // Nan_label:
359   //  out := NaN
360   // op2_label:
361   //  out := op2
362   // done:
363   //
364   // This removes one jmp, but needs to copy one input (op1) to out.
365   //
366   // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
367 
368   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
369 
370   NearLabel nan, done, op2_label;
371   if (is_double) {
372     __ ucomisd(out, op2);
373   } else {
374     __ ucomiss(out, op2);
375   }
376 
377   __ j(Condition::kParityEven, &nan);
378 
379   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
380   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
381 
382   // Handle 0.0/-0.0.
383   if (is_min) {
384     if (is_double) {
385       __ orpd(out, op2);
386     } else {
387       __ orps(out, op2);
388     }
389   } else {
390     if (is_double) {
391       __ andpd(out, op2);
392     } else {
393       __ andps(out, op2);
394     }
395   }
396   __ jmp(&done);
397 
398   // NaN handling.
399   __ Bind(&nan);
400   if (is_double) {
401     __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
402   } else {
403     __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
404   }
405   __ jmp(&done);
406 
407   // out := op2;
408   __ Bind(&op2_label);
409   if (is_double) {
410     __ movsd(out, op2);
411   } else {
412     __ movss(out, op2);
413   }
414 
415   // Done.
416   __ Bind(&done);
417 }
418 
CreateFPFPToFP(ArenaAllocator * allocator,HInvoke * invoke)419 static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) {
420   LocationSummary* locations =
421       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
422   locations->SetInAt(0, Location::RequiresFpuRegister());
423   locations->SetInAt(1, Location::RequiresFpuRegister());
424   // The following is sub-optimal, but all we can do for now. It would be fine to also accept
425   // the second input to be the output (we can simply swap inputs).
426   locations->SetOut(Location::SameAsFirstInput());
427 }
428 
VisitMathMinDoubleDouble(HInvoke * invoke)429 void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
430   CreateFPFPToFP(allocator_, invoke);
431 }
432 
VisitMathMinDoubleDouble(HInvoke * invoke)433 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
434   GenMinMaxFP(
435       invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
436 }
437 
VisitMathMinFloatFloat(HInvoke * invoke)438 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
439   CreateFPFPToFP(allocator_, invoke);
440 }
441 
VisitMathMinFloatFloat(HInvoke * invoke)442 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
443   GenMinMaxFP(
444       invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
445 }
446 
VisitMathMaxDoubleDouble(HInvoke * invoke)447 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
448   CreateFPFPToFP(allocator_, invoke);
449 }
450 
VisitMathMaxDoubleDouble(HInvoke * invoke)451 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
452   GenMinMaxFP(
453       invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
454 }
455 
VisitMathMaxFloatFloat(HInvoke * invoke)456 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
457   CreateFPFPToFP(allocator_, invoke);
458 }
459 
VisitMathMaxFloatFloat(HInvoke * invoke)460 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
461   GenMinMaxFP(
462       invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
463 }
464 
GenMinMax(LocationSummary * locations,bool is_min,bool is_long,X86_64Assembler * assembler)465 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
466                       X86_64Assembler* assembler) {
467   Location op1_loc = locations->InAt(0);
468   Location op2_loc = locations->InAt(1);
469 
470   // Shortcut for same input locations.
471   if (op1_loc.Equals(op2_loc)) {
472     // Can return immediately, as op1_loc == out_loc.
473     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
474     //       a copy here.
475     DCHECK(locations->Out().Equals(op1_loc));
476     return;
477   }
478 
479   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
480   CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
481 
482   //  (out := op1)
483   //  out <=? op2
484   //  if out is min jmp done
485   //  out := op2
486   // done:
487 
488   if (is_long) {
489     __ cmpq(out, op2);
490   } else {
491     __ cmpl(out, op2);
492   }
493 
494   __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
495 }
496 
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)497 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
498   LocationSummary* locations =
499       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
500   locations->SetInAt(0, Location::RequiresRegister());
501   locations->SetInAt(1, Location::RequiresRegister());
502   locations->SetOut(Location::SameAsFirstInput());
503 }
504 
VisitMathMinIntInt(HInvoke * invoke)505 void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
506   CreateIntIntToIntLocations(allocator_, invoke);
507 }
508 
VisitMathMinIntInt(HInvoke * invoke)509 void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
510   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
511 }
512 
VisitMathMinLongLong(HInvoke * invoke)513 void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
514   CreateIntIntToIntLocations(allocator_, invoke);
515 }
516 
VisitMathMinLongLong(HInvoke * invoke)517 void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
518   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
519 }
520 
VisitMathMaxIntInt(HInvoke * invoke)521 void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
522   CreateIntIntToIntLocations(allocator_, invoke);
523 }
524 
VisitMathMaxIntInt(HInvoke * invoke)525 void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
526   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
527 }
528 
VisitMathMaxLongLong(HInvoke * invoke)529 void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
530   CreateIntIntToIntLocations(allocator_, invoke);
531 }
532 
VisitMathMaxLongLong(HInvoke * invoke)533 void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
534   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
535 }
536 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)537 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
538   LocationSummary* locations =
539       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
540   locations->SetInAt(0, Location::RequiresFpuRegister());
541   locations->SetOut(Location::RequiresFpuRegister());
542 }
543 
VisitMathSqrt(HInvoke * invoke)544 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
545   CreateFPToFPLocations(allocator_, invoke);
546 }
547 
VisitMathSqrt(HInvoke * invoke)548 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
549   LocationSummary* locations = invoke->GetLocations();
550   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
551   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
552 
553   GetAssembler()->sqrtsd(out, in);
554 }
555 
InvokeOutOfLineIntrinsic(CodeGeneratorX86_64 * codegen,HInvoke * invoke)556 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
557   MoveArguments(invoke, codegen);
558 
559   DCHECK(invoke->IsInvokeStaticOrDirect());
560   codegen->GenerateStaticOrDirectCall(
561       invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
562 
563   // Copy the result back to the expected output.
564   Location out = invoke->GetLocations()->Out();
565   if (out.IsValid()) {
566     DCHECK(out.IsRegister());
567     codegen->MoveFromReturnRegister(out, invoke->GetType());
568   }
569 }
570 
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)571 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
572                                        HInvoke* invoke,
573                                        CodeGeneratorX86_64* codegen) {
574   // Do we have instruction support?
575   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
576     CreateFPToFPLocations(allocator, invoke);
577     return;
578   }
579 
580   // We have to fall back to a call to the intrinsic.
581   LocationSummary* locations =
582       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly);
583   InvokeRuntimeCallingConvention calling_convention;
584   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
585   locations->SetOut(Location::FpuRegisterLocation(XMM0));
586   // Needs to be RDI for the invoke.
587   locations->AddTemp(Location::RegisterLocation(RDI));
588 }
589 
GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64 * codegen,HInvoke * invoke,X86_64Assembler * assembler,int round_mode)590 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
591                                    HInvoke* invoke,
592                                    X86_64Assembler* assembler,
593                                    int round_mode) {
594   LocationSummary* locations = invoke->GetLocations();
595   if (locations->WillCall()) {
596     InvokeOutOfLineIntrinsic(codegen, invoke);
597   } else {
598     XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
599     XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
600     __ roundsd(out, in, Immediate(round_mode));
601   }
602 }
603 
VisitMathCeil(HInvoke * invoke)604 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
605   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
606 }
607 
VisitMathCeil(HInvoke * invoke)608 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
609   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
610 }
611 
VisitMathFloor(HInvoke * invoke)612 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
613   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
614 }
615 
VisitMathFloor(HInvoke * invoke)616 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
617   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
618 }
619 
VisitMathRint(HInvoke * invoke)620 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
621   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
622 }
623 
VisitMathRint(HInvoke * invoke)624 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
625   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
626 }
627 
CreateSSE41FPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)628 static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator,
629                                         HInvoke* invoke,
630                                         CodeGeneratorX86_64* codegen) {
631   // Do we have instruction support?
632   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
633     LocationSummary* locations =
634         new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
635     locations->SetInAt(0, Location::RequiresFpuRegister());
636     locations->SetOut(Location::RequiresRegister());
637     locations->AddTemp(Location::RequiresFpuRegister());
638     locations->AddTemp(Location::RequiresFpuRegister());
639     return;
640   }
641 
642   // We have to fall back to a call to the intrinsic.
643   LocationSummary* locations =
644       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly);
645   InvokeRuntimeCallingConvention calling_convention;
646   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
647   locations->SetOut(Location::RegisterLocation(RAX));
648   // Needs to be RDI for the invoke.
649   locations->AddTemp(Location::RegisterLocation(RDI));
650 }
651 
VisitMathRoundFloat(HInvoke * invoke)652 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
653   CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
654 }
655 
VisitMathRoundFloat(HInvoke * invoke)656 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
657   LocationSummary* locations = invoke->GetLocations();
658   if (locations->WillCall()) {
659     InvokeOutOfLineIntrinsic(codegen_, invoke);
660     return;
661   }
662 
663   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
664   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
665   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
666   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
667   NearLabel skip_incr, done;
668   X86_64Assembler* assembler = GetAssembler();
669 
670   // Since no direct x86 rounding instruction matches the required semantics,
671   // this intrinsic is implemented as follows:
672   //  result = floor(in);
673   //  if (in - result >= 0.5f)
674   //    result = result + 1.0f;
675   __ movss(t2, in);
676   __ roundss(t1, in, Immediate(1));
677   __ subss(t2, t1);
678   __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
679   __ j(kBelow, &skip_incr);
680   __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
681   __ Bind(&skip_incr);
682 
683   // Final conversion to an integer. Unfortunately this also does not have a
684   // direct x86 instruction, since NaN should map to 0 and large positive
685   // values need to be clipped to the extreme value.
686   codegen_->Load32BitValue(out, kPrimIntMax);
687   __ cvtsi2ss(t2, out);
688   __ comiss(t1, t2);
689   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
690   __ movl(out, Immediate(0));  // does not change flags
691   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
692   __ cvttss2si(out, t1);
693   __ Bind(&done);
694 }
695 
VisitMathRoundDouble(HInvoke * invoke)696 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
697   CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
698 }
699 
VisitMathRoundDouble(HInvoke * invoke)700 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
701   LocationSummary* locations = invoke->GetLocations();
702   if (locations->WillCall()) {
703     InvokeOutOfLineIntrinsic(codegen_, invoke);
704     return;
705   }
706 
707   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
708   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
709   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
710   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
711   NearLabel skip_incr, done;
712   X86_64Assembler* assembler = GetAssembler();
713 
714   // Since no direct x86 rounding instruction matches the required semantics,
715   // this intrinsic is implemented as follows:
716   //  result = floor(in);
717   //  if (in - result >= 0.5)
718   //    result = result + 1.0f;
719   __ movsd(t2, in);
720   __ roundsd(t1, in, Immediate(1));
721   __ subsd(t2, t1);
722   __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
723   __ j(kBelow, &skip_incr);
724   __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
725   __ Bind(&skip_incr);
726 
727   // Final conversion to an integer. Unfortunately this also does not have a
728   // direct x86 instruction, since NaN should map to 0 and large positive
729   // values need to be clipped to the extreme value.
730   codegen_->Load64BitValue(out, kPrimLongMax);
731   __ cvtsi2sd(t2, out, /* is64bit */ true);
732   __ comisd(t1, t2);
733   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
734   __ movl(out, Immediate(0));  // does not change flags, implicit zero extension to 64-bit
735   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
736   __ cvttsd2si(out, t1, /* is64bit */ true);
737   __ Bind(&done);
738 }
739 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)740 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
741   LocationSummary* locations =
742       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
743   InvokeRuntimeCallingConvention calling_convention;
744   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
745   locations->SetOut(Location::FpuRegisterLocation(XMM0));
746 
747   // We have to ensure that the native code doesn't clobber the XMM registers which are
748   // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
749   // saved in the prologue and properly restored.
750   for (FloatRegister fp_reg : non_volatile_xmm_regs) {
751     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
752   }
753 }
754 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)755 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
756                           QuickEntrypointEnum entry) {
757   LocationSummary* locations = invoke->GetLocations();
758   DCHECK(locations->WillCall());
759   DCHECK(invoke->IsInvokeStaticOrDirect());
760 
761   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
762 }
763 
VisitMathCos(HInvoke * invoke)764 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
765   CreateFPToFPCallLocations(allocator_, invoke);
766 }
767 
VisitMathCos(HInvoke * invoke)768 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
769   GenFPToFPCall(invoke, codegen_, kQuickCos);
770 }
771 
VisitMathSin(HInvoke * invoke)772 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
773   CreateFPToFPCallLocations(allocator_, invoke);
774 }
775 
VisitMathSin(HInvoke * invoke)776 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
777   GenFPToFPCall(invoke, codegen_, kQuickSin);
778 }
779 
VisitMathAcos(HInvoke * invoke)780 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
781   CreateFPToFPCallLocations(allocator_, invoke);
782 }
783 
VisitMathAcos(HInvoke * invoke)784 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
785   GenFPToFPCall(invoke, codegen_, kQuickAcos);
786 }
787 
VisitMathAsin(HInvoke * invoke)788 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
789   CreateFPToFPCallLocations(allocator_, invoke);
790 }
791 
VisitMathAsin(HInvoke * invoke)792 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
793   GenFPToFPCall(invoke, codegen_, kQuickAsin);
794 }
795 
VisitMathAtan(HInvoke * invoke)796 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
797   CreateFPToFPCallLocations(allocator_, invoke);
798 }
799 
VisitMathAtan(HInvoke * invoke)800 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
801   GenFPToFPCall(invoke, codegen_, kQuickAtan);
802 }
803 
VisitMathCbrt(HInvoke * invoke)804 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
805   CreateFPToFPCallLocations(allocator_, invoke);
806 }
807 
VisitMathCbrt(HInvoke * invoke)808 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
809   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
810 }
811 
VisitMathCosh(HInvoke * invoke)812 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
813   CreateFPToFPCallLocations(allocator_, invoke);
814 }
815 
VisitMathCosh(HInvoke * invoke)816 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
817   GenFPToFPCall(invoke, codegen_, kQuickCosh);
818 }
819 
VisitMathExp(HInvoke * invoke)820 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
821   CreateFPToFPCallLocations(allocator_, invoke);
822 }
823 
VisitMathExp(HInvoke * invoke)824 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
825   GenFPToFPCall(invoke, codegen_, kQuickExp);
826 }
827 
VisitMathExpm1(HInvoke * invoke)828 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
829   CreateFPToFPCallLocations(allocator_, invoke);
830 }
831 
VisitMathExpm1(HInvoke * invoke)832 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
833   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
834 }
835 
VisitMathLog(HInvoke * invoke)836 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
837   CreateFPToFPCallLocations(allocator_, invoke);
838 }
839 
VisitMathLog(HInvoke * invoke)840 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
841   GenFPToFPCall(invoke, codegen_, kQuickLog);
842 }
843 
VisitMathLog10(HInvoke * invoke)844 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
845   CreateFPToFPCallLocations(allocator_, invoke);
846 }
847 
VisitMathLog10(HInvoke * invoke)848 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
849   GenFPToFPCall(invoke, codegen_, kQuickLog10);
850 }
851 
VisitMathSinh(HInvoke * invoke)852 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
853   CreateFPToFPCallLocations(allocator_, invoke);
854 }
855 
VisitMathSinh(HInvoke * invoke)856 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
857   GenFPToFPCall(invoke, codegen_, kQuickSinh);
858 }
859 
VisitMathTan(HInvoke * invoke)860 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
861   CreateFPToFPCallLocations(allocator_, invoke);
862 }
863 
VisitMathTan(HInvoke * invoke)864 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
865   GenFPToFPCall(invoke, codegen_, kQuickTan);
866 }
867 
VisitMathTanh(HInvoke * invoke)868 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
869   CreateFPToFPCallLocations(allocator_, invoke);
870 }
871 
VisitMathTanh(HInvoke * invoke)872 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
873   GenFPToFPCall(invoke, codegen_, kQuickTanh);
874 }
875 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)876 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
877   LocationSummary* locations =
878       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
879   InvokeRuntimeCallingConvention calling_convention;
880   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
881   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
882   locations->SetOut(Location::FpuRegisterLocation(XMM0));
883 
884   // We have to ensure that the native code doesn't clobber the XMM registers which are
885   // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
886   // saved in the prologue and properly restored.
887   for (FloatRegister fp_reg : non_volatile_xmm_regs) {
888     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
889   }
890 }
891 
VisitMathAtan2(HInvoke * invoke)892 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
893   CreateFPFPToFPCallLocations(allocator_, invoke);
894 }
895 
VisitMathAtan2(HInvoke * invoke)896 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
897   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
898 }
899 
VisitMathPow(HInvoke * invoke)900 void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
901   CreateFPFPToFPCallLocations(allocator_, invoke);
902 }
903 
VisitMathPow(HInvoke * invoke)904 void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
905   GenFPToFPCall(invoke, codegen_, kQuickPow);
906 }
907 
VisitMathHypot(HInvoke * invoke)908 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
909   CreateFPFPToFPCallLocations(allocator_, invoke);
910 }
911 
VisitMathHypot(HInvoke * invoke)912 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
913   GenFPToFPCall(invoke, codegen_, kQuickHypot);
914 }
915 
VisitMathNextAfter(HInvoke * invoke)916 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
917   CreateFPFPToFPCallLocations(allocator_, invoke);
918 }
919 
VisitMathNextAfter(HInvoke * invoke)920 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
921   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
922 }
923 
VisitSystemArrayCopyChar(HInvoke * invoke)924 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
925   // Check to see if we have known failures that will cause us to have to bail out
926   // to the runtime, and just generate the runtime call directly.
927   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
928   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
929 
930   // The positions must be non-negative.
931   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
932       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
933     // We will have to fail anyways.
934     return;
935   }
936 
937   // The length must be > 0.
938   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
939   if (length != nullptr) {
940     int32_t len = length->GetValue();
941     if (len < 0) {
942       // Just call as normal.
943       return;
944     }
945   }
946 
947   LocationSummary* locations =
948       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
949   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
950   locations->SetInAt(0, Location::RequiresRegister());
951   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
952   locations->SetInAt(2, Location::RequiresRegister());
953   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
954   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
955 
956   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
957   locations->AddTemp(Location::RegisterLocation(RSI));
958   locations->AddTemp(Location::RegisterLocation(RDI));
959   locations->AddTemp(Location::RegisterLocation(RCX));
960 }
961 
CheckPosition(X86_64Assembler * assembler,Location pos,CpuRegister input,Location length,SlowPathCode * slow_path,CpuRegister temp,bool length_is_input_length=false)962 static void CheckPosition(X86_64Assembler* assembler,
963                           Location pos,
964                           CpuRegister input,
965                           Location length,
966                           SlowPathCode* slow_path,
967                           CpuRegister temp,
968                           bool length_is_input_length = false) {
969   // Where is the length in the Array?
970   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
971 
972   if (pos.IsConstant()) {
973     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
974     if (pos_const == 0) {
975       if (!length_is_input_length) {
976         // Check that length(input) >= length.
977         if (length.IsConstant()) {
978           __ cmpl(Address(input, length_offset),
979                   Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
980         } else {
981           __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>());
982         }
983         __ j(kLess, slow_path->GetEntryLabel());
984       }
985     } else {
986       // Check that length(input) >= pos.
987       __ movl(temp, Address(input, length_offset));
988       __ subl(temp, Immediate(pos_const));
989       __ j(kLess, slow_path->GetEntryLabel());
990 
991       // Check that (length(input) - pos) >= length.
992       if (length.IsConstant()) {
993         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
994       } else {
995         __ cmpl(temp, length.AsRegister<CpuRegister>());
996       }
997       __ j(kLess, slow_path->GetEntryLabel());
998     }
999   } else if (length_is_input_length) {
1000     // The only way the copy can succeed is if pos is zero.
1001     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
1002     __ testl(pos_reg, pos_reg);
1003     __ j(kNotEqual, slow_path->GetEntryLabel());
1004   } else {
1005     // Check that pos >= 0.
1006     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
1007     __ testl(pos_reg, pos_reg);
1008     __ j(kLess, slow_path->GetEntryLabel());
1009 
1010     // Check that pos <= length(input).
1011     __ cmpl(Address(input, length_offset), pos_reg);
1012     __ j(kLess, slow_path->GetEntryLabel());
1013 
1014     // Check that (length(input) - pos) >= length.
1015     __ movl(temp, Address(input, length_offset));
1016     __ subl(temp, pos_reg);
1017     if (length.IsConstant()) {
1018       __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1019     } else {
1020       __ cmpl(temp, length.AsRegister<CpuRegister>());
1021     }
1022     __ j(kLess, slow_path->GetEntryLabel());
1023   }
1024 }
1025 
VisitSystemArrayCopyChar(HInvoke * invoke)1026 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
1027   X86_64Assembler* assembler = GetAssembler();
1028   LocationSummary* locations = invoke->GetLocations();
1029 
1030   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
1031   Location src_pos = locations->InAt(1);
1032   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
1033   Location dest_pos = locations->InAt(3);
1034   Location length = locations->InAt(4);
1035 
1036   // Temporaries that we need for MOVSW.
1037   CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
1038   DCHECK_EQ(src_base.AsRegister(), RSI);
1039   CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
1040   DCHECK_EQ(dest_base.AsRegister(), RDI);
1041   CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
1042   DCHECK_EQ(count.AsRegister(), RCX);
1043 
1044   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1045   codegen_->AddSlowPath(slow_path);
1046 
1047   // Bail out if the source and destination are the same.
1048   __ cmpl(src, dest);
1049   __ j(kEqual, slow_path->GetEntryLabel());
1050 
1051   // Bail out if the source is null.
1052   __ testl(src, src);
1053   __ j(kEqual, slow_path->GetEntryLabel());
1054 
1055   // Bail out if the destination is null.
1056   __ testl(dest, dest);
1057   __ j(kEqual, slow_path->GetEntryLabel());
1058 
1059   // If the length is negative, bail out.
1060   // We have already checked in the LocationsBuilder for the constant case.
1061   if (!length.IsConstant()) {
1062     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1063     __ j(kLess, slow_path->GetEntryLabel());
1064   }
1065 
1066   // Validity checks: source. Use src_base as a temporary register.
1067   CheckPosition(assembler, src_pos, src, length, slow_path, src_base);
1068 
1069   // Validity checks: dest. Use src_base as a temporary register.
1070   CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base);
1071 
1072   // We need the count in RCX.
1073   if (length.IsConstant()) {
1074     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1075   } else {
1076     __ movl(count, length.AsRegister<CpuRegister>());
1077   }
1078 
1079   // Okay, everything checks out.  Finally time to do the copy.
1080   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1081   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1082   DCHECK_EQ(char_size, 2u);
1083 
1084   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1085 
1086   if (src_pos.IsConstant()) {
1087     int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
1088     __ leal(src_base, Address(src, char_size * src_pos_const + data_offset));
1089   } else {
1090     __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(),
1091                               ScaleFactor::TIMES_2, data_offset));
1092   }
1093   if (dest_pos.IsConstant()) {
1094     int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1095     __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset));
1096   } else {
1097     __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(),
1098                                ScaleFactor::TIMES_2, data_offset));
1099   }
1100 
1101   // Do the move.
1102   __ rep_movsw();
1103 
1104   __ Bind(slow_path->GetExitLabel());
1105 }
1106 
1107 
VisitSystemArrayCopy(HInvoke * invoke)1108 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
1109   // The only read barrier implementation supporting the
1110   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1111   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1112     return;
1113   }
1114 
1115   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1116 }
1117 
1118 // Compute base source address, base destination address, and end
1119 // source address for the System.arraycopy intrinsic in `src_base`,
1120 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(X86_64Assembler * assembler,DataType::Type type,const CpuRegister & src,const Location & src_pos,const CpuRegister & dst,const Location & dst_pos,const Location & copy_length,const CpuRegister & src_base,const CpuRegister & dst_base,const CpuRegister & src_end)1121 static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler,
1122                                         DataType::Type type,
1123                                         const CpuRegister& src,
1124                                         const Location& src_pos,
1125                                         const CpuRegister& dst,
1126                                         const Location& dst_pos,
1127                                         const Location& copy_length,
1128                                         const CpuRegister& src_base,
1129                                         const CpuRegister& dst_base,
1130                                         const CpuRegister& src_end) {
1131   // This routine is only used by the SystemArrayCopy intrinsic.
1132   DCHECK_EQ(type, DataType::Type::kReference);
1133   const int32_t element_size = DataType::Size(type);
1134   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
1135   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1136 
1137   if (src_pos.IsConstant()) {
1138     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1139     __ leal(src_base, Address(src, element_size * constant + data_offset));
1140   } else {
1141     __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
1142   }
1143 
1144   if (dst_pos.IsConstant()) {
1145     int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
1146     __ leal(dst_base, Address(dst, element_size * constant + data_offset));
1147   } else {
1148     __ leal(dst_base, Address(dst, dst_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
1149   }
1150 
1151   if (copy_length.IsConstant()) {
1152     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
1153     __ leal(src_end, Address(src_base, element_size * constant));
1154   } else {
1155     __ leal(src_end, Address(src_base, copy_length.AsRegister<CpuRegister>(), scale_factor, 0));
1156   }
1157 }
1158 
VisitSystemArrayCopy(HInvoke * invoke)1159 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
1160   // The only read barrier implementation supporting the
1161   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1162   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1163 
1164   X86_64Assembler* assembler = GetAssembler();
1165   LocationSummary* locations = invoke->GetLocations();
1166 
1167   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1168   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1169   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1170   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1171   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1172 
1173   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
1174   Location src_pos = locations->InAt(1);
1175   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
1176   Location dest_pos = locations->InAt(3);
1177   Location length = locations->InAt(4);
1178   Location temp1_loc = locations->GetTemp(0);
1179   CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
1180   Location temp2_loc = locations->GetTemp(1);
1181   CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
1182   Location temp3_loc = locations->GetTemp(2);
1183   CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
1184   Location TMP_loc = Location::RegisterLocation(TMP);
1185 
1186   SlowPathCode* intrinsic_slow_path =
1187       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1188   codegen_->AddSlowPath(intrinsic_slow_path);
1189 
1190   NearLabel conditions_on_positions_validated;
1191   SystemArrayCopyOptimizations optimizations(invoke);
1192 
1193   // If source and destination are the same, we go to slow path if we need to do
1194   // forward copying.
1195   if (src_pos.IsConstant()) {
1196     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1197     if (dest_pos.IsConstant()) {
1198       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1199       if (optimizations.GetDestinationIsSource()) {
1200         // Checked when building locations.
1201         DCHECK_GE(src_pos_constant, dest_pos_constant);
1202       } else if (src_pos_constant < dest_pos_constant) {
1203         __ cmpl(src, dest);
1204         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1205       }
1206     } else {
1207       if (!optimizations.GetDestinationIsSource()) {
1208         __ cmpl(src, dest);
1209         __ j(kNotEqual, &conditions_on_positions_validated);
1210       }
1211       __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
1212       __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
1213     }
1214   } else {
1215     if (!optimizations.GetDestinationIsSource()) {
1216       __ cmpl(src, dest);
1217       __ j(kNotEqual, &conditions_on_positions_validated);
1218     }
1219     if (dest_pos.IsConstant()) {
1220       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1221       __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
1222       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
1223     } else {
1224       __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
1225       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
1226     }
1227   }
1228 
1229   __ Bind(&conditions_on_positions_validated);
1230 
1231   if (!optimizations.GetSourceIsNotNull()) {
1232     // Bail out if the source is null.
1233     __ testl(src, src);
1234     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1235   }
1236 
1237   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1238     // Bail out if the destination is null.
1239     __ testl(dest, dest);
1240     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1241   }
1242 
1243   // If the length is negative, bail out.
1244   // We have already checked in the LocationsBuilder for the constant case.
1245   if (!length.IsConstant() &&
1246       !optimizations.GetCountIsSourceLength() &&
1247       !optimizations.GetCountIsDestinationLength()) {
1248     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1249     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
1250   }
1251 
1252   // Validity checks: source.
1253   CheckPosition(assembler,
1254                 src_pos,
1255                 src,
1256                 length,
1257                 intrinsic_slow_path,
1258                 temp1,
1259                 optimizations.GetCountIsSourceLength());
1260 
1261   // Validity checks: dest.
1262   CheckPosition(assembler,
1263                 dest_pos,
1264                 dest,
1265                 length,
1266                 intrinsic_slow_path,
1267                 temp1,
1268                 optimizations.GetCountIsDestinationLength());
1269 
1270   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1271     // Check whether all elements of the source array are assignable to the component
1272     // type of the destination array. We do two checks: the classes are the same,
1273     // or the destination is Object[]. If none of these checks succeed, we go to the
1274     // slow path.
1275 
1276     bool did_unpoison = false;
1277     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1278       // /* HeapReference<Class> */ temp1 = dest->klass_
1279       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1280           invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
1281       // Register `temp1` is not trashed by the read barrier emitted
1282       // by GenerateFieldLoadWithBakerReadBarrier below, as that
1283       // method produces a call to a ReadBarrierMarkRegX entry point,
1284       // which saves all potentially live registers, including
1285       // temporaries such a `temp1`.
1286       // /* HeapReference<Class> */ temp2 = src->klass_
1287       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1288           invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
1289       // If heap poisoning is enabled, `temp1` and `temp2` have been
1290       // unpoisoned by the the previous calls to
1291       // GenerateFieldLoadWithBakerReadBarrier.
1292     } else {
1293       // /* HeapReference<Class> */ temp1 = dest->klass_
1294       __ movl(temp1, Address(dest, class_offset));
1295       // /* HeapReference<Class> */ temp2 = src->klass_
1296       __ movl(temp2, Address(src, class_offset));
1297       if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1298           !optimizations.GetSourceIsNonPrimitiveArray()) {
1299         // One or two of the references need to be unpoisoned. Unpoison them
1300         // both to make the identity check valid.
1301         __ MaybeUnpoisonHeapReference(temp1);
1302         __ MaybeUnpoisonHeapReference(temp2);
1303         did_unpoison = true;
1304       }
1305     }
1306 
1307     if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1308       // Bail out if the destination is not a non primitive array.
1309       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1310         // /* HeapReference<Class> */ TMP = temp1->component_type_
1311         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1312             invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
1313         __ testl(CpuRegister(TMP), CpuRegister(TMP));
1314         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1315         // If heap poisoning is enabled, `TMP` has been unpoisoned by
1316         // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1317       } else {
1318         // /* HeapReference<Class> */ TMP = temp1->component_type_
1319         __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1320         __ testl(CpuRegister(TMP), CpuRegister(TMP));
1321         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1322         __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1323       }
1324       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1325       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1326     }
1327 
1328     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1329       // Bail out if the source is not a non primitive array.
1330       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1331         // For the same reason given earlier, `temp1` is not trashed by the
1332         // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1333         // /* HeapReference<Class> */ TMP = temp2->component_type_
1334         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1335             invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false);
1336         __ testl(CpuRegister(TMP), CpuRegister(TMP));
1337         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1338         // If heap poisoning is enabled, `TMP` has been unpoisoned by
1339         // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1340       } else {
1341         // /* HeapReference<Class> */ TMP = temp2->component_type_
1342         __ movl(CpuRegister(TMP), Address(temp2, component_offset));
1343         __ testl(CpuRegister(TMP), CpuRegister(TMP));
1344         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1345         __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1346       }
1347       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1348       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1349     }
1350 
1351     __ cmpl(temp1, temp2);
1352 
1353     if (optimizations.GetDestinationIsTypedObjectArray()) {
1354       NearLabel do_copy;
1355       __ j(kEqual, &do_copy);
1356       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1357         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1358         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1359             invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
1360         // We do not need to emit a read barrier for the following
1361         // heap reference load, as `temp1` is only used in a
1362         // comparison with null below, and this reference is not
1363         // kept afterwards.
1364         __ cmpl(Address(temp1, super_offset), Immediate(0));
1365       } else {
1366         if (!did_unpoison) {
1367           __ MaybeUnpoisonHeapReference(temp1);
1368         }
1369         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1370         __ movl(temp1, Address(temp1, component_offset));
1371         __ MaybeUnpoisonHeapReference(temp1);
1372         // No need to unpoison the following heap reference load, as
1373         // we're comparing against null.
1374         __ cmpl(Address(temp1, super_offset), Immediate(0));
1375       }
1376       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1377       __ Bind(&do_copy);
1378     } else {
1379       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1380     }
1381   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1382     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1383     // Bail out if the source is not a non primitive array.
1384     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1385       // /* HeapReference<Class> */ temp1 = src->klass_
1386       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1387           invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
1388       // /* HeapReference<Class> */ TMP = temp1->component_type_
1389       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1390           invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
1391       __ testl(CpuRegister(TMP), CpuRegister(TMP));
1392       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1393     } else {
1394       // /* HeapReference<Class> */ temp1 = src->klass_
1395       __ movl(temp1, Address(src, class_offset));
1396       __ MaybeUnpoisonHeapReference(temp1);
1397       // /* HeapReference<Class> */ TMP = temp1->component_type_
1398       __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1399       // No need to unpoison `TMP` now, as we're comparing against null.
1400       __ testl(CpuRegister(TMP), CpuRegister(TMP));
1401       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1402       __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1403     }
1404     __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1405     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1406   }
1407 
1408   const DataType::Type type = DataType::Type::kReference;
1409   const int32_t element_size = DataType::Size(type);
1410 
1411   // Compute base source address, base destination address, and end
1412   // source address in `temp1`, `temp2` and `temp3` respectively.
1413   GenSystemArrayCopyAddresses(
1414       GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3);
1415 
1416   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1417     // SystemArrayCopy implementation for Baker read barriers (see
1418     // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
1419     //
1420     //   if (src_ptr != end_ptr) {
1421     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1422     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
1423     //     bool is_gray = (rb_state == ReadBarrier::GrayState());
1424     //     if (is_gray) {
1425     //       // Slow-path copy.
1426     //       do {
1427     //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1428     //       } while (src_ptr != end_ptr)
1429     //     } else {
1430     //       // Fast-path copy.
1431     //       do {
1432     //         *dest_ptr++ = *src_ptr++;
1433     //       } while (src_ptr != end_ptr)
1434     //     }
1435     //   }
1436 
1437     NearLabel loop, done;
1438 
1439     // Don't enter copy loop if `length == 0`.
1440     __ cmpl(temp1, temp3);
1441     __ j(kEqual, &done);
1442 
1443     // Given the numeric representation, it's enough to check the low bit of the rb_state.
1444     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
1445     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1446     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
1447     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
1448     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
1449 
1450     // if (rb_state == ReadBarrier::GrayState())
1451     //   goto slow_path;
1452     // At this point, just do the "if" and make sure that flags are preserved until the branch.
1453     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
1454 
1455     // Load fence to prevent load-load reordering.
1456     // Note that this is a no-op, thanks to the x86-64 memory model.
1457     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
1458 
1459     // Slow path used to copy array when `src` is gray.
1460     SlowPathCode* read_barrier_slow_path =
1461         new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
1462     codegen_->AddSlowPath(read_barrier_slow_path);
1463 
1464     // We have done the "if" of the gray bit check above, now branch based on the flags.
1465     __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
1466 
1467     // Fast-path copy.
1468     // Iterate over the arrays and do a raw copy of the objects. We don't need to
1469     // poison/unpoison.
1470     __ Bind(&loop);
1471     __ movl(CpuRegister(TMP), Address(temp1, 0));
1472     __ movl(Address(temp2, 0), CpuRegister(TMP));
1473     __ addl(temp1, Immediate(element_size));
1474     __ addl(temp2, Immediate(element_size));
1475     __ cmpl(temp1, temp3);
1476     __ j(kNotEqual, &loop);
1477 
1478     __ Bind(read_barrier_slow_path->GetExitLabel());
1479     __ Bind(&done);
1480   } else {
1481     // Non read barrier code.
1482 
1483     // Iterate over the arrays and do a raw copy of the objects. We don't need to
1484     // poison/unpoison.
1485     NearLabel loop, done;
1486     __ cmpl(temp1, temp3);
1487     __ j(kEqual, &done);
1488     __ Bind(&loop);
1489     __ movl(CpuRegister(TMP), Address(temp1, 0));
1490     __ movl(Address(temp2, 0), CpuRegister(TMP));
1491     __ addl(temp1, Immediate(element_size));
1492     __ addl(temp2, Immediate(element_size));
1493     __ cmpl(temp1, temp3);
1494     __ j(kNotEqual, &loop);
1495     __ Bind(&done);
1496   }
1497 
1498   // We only need one card marking on the destination array.
1499   codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null */ false);
1500 
1501   __ Bind(intrinsic_slow_path->GetExitLabel());
1502 }
1503 
VisitStringCompareTo(HInvoke * invoke)1504 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1505   LocationSummary* locations = new (allocator_) LocationSummary(
1506       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1507   InvokeRuntimeCallingConvention calling_convention;
1508   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1509   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1510   locations->SetOut(Location::RegisterLocation(RAX));
1511 }
1512 
VisitStringCompareTo(HInvoke * invoke)1513 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1514   X86_64Assembler* assembler = GetAssembler();
1515   LocationSummary* locations = invoke->GetLocations();
1516 
1517   // Note that the null check must have been done earlier.
1518   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1519 
1520   CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1521   __ testl(argument, argument);
1522   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1523   codegen_->AddSlowPath(slow_path);
1524   __ j(kEqual, slow_path->GetEntryLabel());
1525 
1526   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1527   __ Bind(slow_path->GetExitLabel());
1528 }
1529 
VisitStringEquals(HInvoke * invoke)1530 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1531   if (kEmitCompilerReadBarrier &&
1532       !StringEqualsOptimizations(invoke).GetArgumentIsString() &&
1533       !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) {
1534     // No support for this odd case (String class is moveable, not in the boot image).
1535     return;
1536   }
1537 
1538   LocationSummary* locations =
1539       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1540   locations->SetInAt(0, Location::RequiresRegister());
1541   locations->SetInAt(1, Location::RequiresRegister());
1542 
1543   // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1544   locations->AddTemp(Location::RegisterLocation(RCX));
1545   locations->AddTemp(Location::RegisterLocation(RDI));
1546 
1547   // Set output, RSI needed for repe_cmpsq instruction anyways.
1548   locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1549 }
1550 
VisitStringEquals(HInvoke * invoke)1551 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1552   X86_64Assembler* assembler = GetAssembler();
1553   LocationSummary* locations = invoke->GetLocations();
1554 
1555   CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1556   CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1557   CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1558   CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1559   CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1560 
1561   NearLabel end, return_true, return_false;
1562 
1563   // Get offsets of count, value, and class fields within a string object.
1564   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1565   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1566   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1567 
1568   // Note that the null check must have been done earlier.
1569   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1570 
1571   StringEqualsOptimizations optimizations(invoke);
1572   if (!optimizations.GetArgumentNotNull()) {
1573     // Check if input is null, return false if it is.
1574     __ testl(arg, arg);
1575     __ j(kEqual, &return_false);
1576   }
1577 
1578   if (!optimizations.GetArgumentIsString()) {
1579     // Instanceof check for the argument by comparing class fields.
1580     // All string objects must have the same type since String cannot be subclassed.
1581     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1582     // If the argument is a string object, its class field must be equal to receiver's class field.
1583     __ movl(rcx, Address(str, class_offset));
1584     __ cmpl(rcx, Address(arg, class_offset));
1585     __ j(kNotEqual, &return_false);
1586   }
1587 
1588   // Reference equality check, return true if same reference.
1589   __ cmpl(str, arg);
1590   __ j(kEqual, &return_true);
1591 
1592   // Load length and compression flag of receiver string.
1593   __ movl(rcx, Address(str, count_offset));
1594   // Check if lengths and compressiond flags are equal, return false if they're not.
1595   // Two identical strings will always have same compression style since
1596   // compression style is decided on alloc.
1597   __ cmpl(rcx, Address(arg, count_offset));
1598   __ j(kNotEqual, &return_false);
1599   // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1600   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1601                 "Expecting 0=compressed, 1=uncompressed");
1602   __ jrcxz(&return_true);
1603 
1604   if (mirror::kUseStringCompression) {
1605     NearLabel string_uncompressed;
1606     // Extract length and differentiate between both compressed or both uncompressed.
1607     // Different compression style is cut above.
1608     __ shrl(rcx, Immediate(1));
1609     __ j(kCarrySet, &string_uncompressed);
1610     // Divide string length by 2, rounding up, and continue as if uncompressed.
1611     // Merge clearing the compression flag with +1 for rounding.
1612     __ addl(rcx, Immediate(1));
1613     __ shrl(rcx, Immediate(1));
1614     __ Bind(&string_uncompressed);
1615   }
1616   // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1617   __ leal(rsi, Address(str, value_offset));
1618   __ leal(rdi, Address(arg, value_offset));
1619 
1620   // Divide string length by 4 and adjust for lengths not divisible by 4.
1621   __ addl(rcx, Immediate(3));
1622   __ shrl(rcx, Immediate(2));
1623 
1624   // Assertions that must hold in order to compare strings 4 characters (uncompressed)
1625   // or 8 characters (compressed) at a time.
1626   DCHECK_ALIGNED(value_offset, 8);
1627   static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1628 
1629   // Loop to compare strings four characters at a time starting at the beginning of the string.
1630   __ repe_cmpsq();
1631   // If strings are not equal, zero flag will be cleared.
1632   __ j(kNotEqual, &return_false);
1633 
1634   // Return true and exit the function.
1635   // If loop does not result in returning false, we return true.
1636   __ Bind(&return_true);
1637   __ movl(rsi, Immediate(1));
1638   __ jmp(&end);
1639 
1640   // Return false and exit the function.
1641   __ Bind(&return_false);
1642   __ xorl(rsi, rsi);
1643   __ Bind(&end);
1644 }
1645 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1646 static void CreateStringIndexOfLocations(HInvoke* invoke,
1647                                          ArenaAllocator* allocator,
1648                                          bool start_at_zero) {
1649   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1650                                                                LocationSummary::kCallOnSlowPath,
1651                                                                kIntrinsified);
1652   // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1653   locations->SetInAt(0, Location::RegisterLocation(RDI));
1654   // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1655   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1656   // of the instruction explicitly.
1657   // Note: This works as we don't clobber RAX anywhere.
1658   locations->SetInAt(1, Location::RegisterLocation(RAX));
1659   if (!start_at_zero) {
1660     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1661   }
1662   // As we clobber RDI during execution anyways, also use it as the output.
1663   locations->SetOut(Location::SameAsFirstInput());
1664 
1665   // repne scasw uses RCX as the counter.
1666   locations->AddTemp(Location::RegisterLocation(RCX));
1667   // Need another temporary to be able to compute the result.
1668   locations->AddTemp(Location::RequiresRegister());
1669 }
1670 
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,bool start_at_zero)1671 static void GenerateStringIndexOf(HInvoke* invoke,
1672                                   X86_64Assembler* assembler,
1673                                   CodeGeneratorX86_64* codegen,
1674                                   bool start_at_zero) {
1675   LocationSummary* locations = invoke->GetLocations();
1676 
1677   // Note that the null check must have been done earlier.
1678   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1679 
1680   CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1681   CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1682   CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1683   CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1684   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1685 
1686   // Check our assumptions for registers.
1687   DCHECK_EQ(string_obj.AsRegister(), RDI);
1688   DCHECK_EQ(search_value.AsRegister(), RAX);
1689   DCHECK_EQ(counter.AsRegister(), RCX);
1690   DCHECK_EQ(out.AsRegister(), RDI);
1691 
1692   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1693   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1694   SlowPathCode* slow_path = nullptr;
1695   HInstruction* code_point = invoke->InputAt(1);
1696   if (code_point->IsIntConstant()) {
1697     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1698     std::numeric_limits<uint16_t>::max()) {
1699       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1700       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1701       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1702       codegen->AddSlowPath(slow_path);
1703       __ jmp(slow_path->GetEntryLabel());
1704       __ Bind(slow_path->GetExitLabel());
1705       return;
1706     }
1707   } else if (code_point->GetType() != DataType::Type::kUint16) {
1708     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1709     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1710     codegen->AddSlowPath(slow_path);
1711     __ j(kAbove, slow_path->GetEntryLabel());
1712   }
1713 
1714   // From here down, we know that we are looking for a char that fits in
1715   // 16 bits (uncompressed) or 8 bits (compressed).
1716   // Location of reference to data array within the String object.
1717   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1718   // Location of count within the String object.
1719   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1720 
1721   // Load the count field of the string containing the length and compression flag.
1722   __ movl(string_length, Address(string_obj, count_offset));
1723 
1724   // Do a zero-length check. Even with string compression `count == 0` means empty.
1725   // TODO: Support jecxz.
1726   NearLabel not_found_label;
1727   __ testl(string_length, string_length);
1728   __ j(kEqual, &not_found_label);
1729 
1730   if (mirror::kUseStringCompression) {
1731     // Use TMP to keep string_length_flagged.
1732     __ movl(CpuRegister(TMP), string_length);
1733     // Mask out first bit used as compression flag.
1734     __ shrl(string_length, Immediate(1));
1735   }
1736 
1737   if (start_at_zero) {
1738     // Number of chars to scan is the same as the string length.
1739     __ movl(counter, string_length);
1740     // Move to the start of the string.
1741     __ addq(string_obj, Immediate(value_offset));
1742   } else {
1743     CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1744 
1745     // Do a start_index check.
1746     __ cmpl(start_index, string_length);
1747     __ j(kGreaterEqual, &not_found_label);
1748 
1749     // Ensure we have a start index >= 0;
1750     __ xorl(counter, counter);
1751     __ cmpl(start_index, Immediate(0));
1752     __ cmov(kGreater, counter, start_index, /* is64bit */ false);  // 32-bit copy is enough.
1753 
1754     if (mirror::kUseStringCompression) {
1755       NearLabel modify_counter, offset_uncompressed_label;
1756       __ testl(CpuRegister(TMP), Immediate(1));
1757       __ j(kNotZero, &offset_uncompressed_label);
1758       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1759       __ jmp(&modify_counter);
1760       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1761       __ Bind(&offset_uncompressed_label);
1762       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1763       __ Bind(&modify_counter);
1764     } else {
1765       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1766     }
1767     // Now update ecx, the work counter: it's gonna be string.length - start_index.
1768     __ negq(counter);  // Needs to be 64-bit negation, as the address computation is 64-bit.
1769     __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1770   }
1771 
1772   if (mirror::kUseStringCompression) {
1773     NearLabel uncompressed_string_comparison;
1774     NearLabel comparison_done;
1775     __ testl(CpuRegister(TMP), Immediate(1));
1776     __ j(kNotZero, &uncompressed_string_comparison);
1777     // Check if RAX (search_value) is ASCII.
1778     __ cmpl(search_value, Immediate(127));
1779     __ j(kGreater, &not_found_label);
1780     // Comparing byte-per-byte.
1781     __ repne_scasb();
1782     __ jmp(&comparison_done);
1783     // Everything is set up for repne scasw:
1784     //   * Comparison address in RDI.
1785     //   * Counter in ECX.
1786     __ Bind(&uncompressed_string_comparison);
1787     __ repne_scasw();
1788     __ Bind(&comparison_done);
1789   } else {
1790     __ repne_scasw();
1791   }
1792   // Did we find a match?
1793   __ j(kNotEqual, &not_found_label);
1794 
1795   // Yes, we matched.  Compute the index of the result.
1796   __ subl(string_length, counter);
1797   __ leal(out, Address(string_length, -1));
1798 
1799   NearLabel done;
1800   __ jmp(&done);
1801 
1802   // Failed to match; return -1.
1803   __ Bind(&not_found_label);
1804   __ movl(out, Immediate(-1));
1805 
1806   // And join up at the end.
1807   __ Bind(&done);
1808   if (slow_path != nullptr) {
1809     __ Bind(slow_path->GetExitLabel());
1810   }
1811 }
1812 
VisitStringIndexOf(HInvoke * invoke)1813 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1814   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ true);
1815 }
1816 
VisitStringIndexOf(HInvoke * invoke)1817 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1818   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true);
1819 }
1820 
VisitStringIndexOfAfter(HInvoke * invoke)1821 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1822   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ false);
1823 }
1824 
VisitStringIndexOfAfter(HInvoke * invoke)1825 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1826   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false);
1827 }
1828 
VisitStringNewStringFromBytes(HInvoke * invoke)1829 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1830   LocationSummary* locations = new (allocator_) LocationSummary(
1831       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1832   InvokeRuntimeCallingConvention calling_convention;
1833   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1834   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1835   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1836   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1837   locations->SetOut(Location::RegisterLocation(RAX));
1838 }
1839 
VisitStringNewStringFromBytes(HInvoke * invoke)1840 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1841   X86_64Assembler* assembler = GetAssembler();
1842   LocationSummary* locations = invoke->GetLocations();
1843 
1844   CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1845   __ testl(byte_array, byte_array);
1846   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1847   codegen_->AddSlowPath(slow_path);
1848   __ j(kEqual, slow_path->GetEntryLabel());
1849 
1850   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1851   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1852   __ Bind(slow_path->GetExitLabel());
1853 }
1854 
VisitStringNewStringFromChars(HInvoke * invoke)1855 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1856   LocationSummary* locations =
1857       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1858   InvokeRuntimeCallingConvention calling_convention;
1859   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1860   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1861   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1862   locations->SetOut(Location::RegisterLocation(RAX));
1863 }
1864 
VisitStringNewStringFromChars(HInvoke * invoke)1865 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1866   // No need to emit code checking whether `locations->InAt(2)` is a null
1867   // pointer, as callers of the native method
1868   //
1869   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1870   //
1871   // all include a null check on `data` before calling that method.
1872   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1873   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1874 }
1875 
VisitStringNewStringFromString(HInvoke * invoke)1876 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1877   LocationSummary* locations = new (allocator_) LocationSummary(
1878       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1879   InvokeRuntimeCallingConvention calling_convention;
1880   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1881   locations->SetOut(Location::RegisterLocation(RAX));
1882 }
1883 
VisitStringNewStringFromString(HInvoke * invoke)1884 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1885   X86_64Assembler* assembler = GetAssembler();
1886   LocationSummary* locations = invoke->GetLocations();
1887 
1888   CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1889   __ testl(string_to_copy, string_to_copy);
1890   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1891   codegen_->AddSlowPath(slow_path);
1892   __ j(kEqual, slow_path->GetEntryLabel());
1893 
1894   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1895   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1896   __ Bind(slow_path->GetExitLabel());
1897 }
1898 
VisitStringGetCharsNoCheck(HInvoke * invoke)1899 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1900   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1901   LocationSummary* locations =
1902       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1903   locations->SetInAt(0, Location::RequiresRegister());
1904   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1905   locations->SetInAt(2, Location::RequiresRegister());
1906   locations->SetInAt(3, Location::RequiresRegister());
1907   locations->SetInAt(4, Location::RequiresRegister());
1908 
1909   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1910   locations->AddTemp(Location::RegisterLocation(RSI));
1911   locations->AddTemp(Location::RegisterLocation(RDI));
1912   locations->AddTemp(Location::RegisterLocation(RCX));
1913 }
1914 
VisitStringGetCharsNoCheck(HInvoke * invoke)1915 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1916   X86_64Assembler* assembler = GetAssembler();
1917   LocationSummary* locations = invoke->GetLocations();
1918 
1919   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1920   // Location of data in char array buffer.
1921   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1922   // Location of char array data in string.
1923   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1924 
1925   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1926   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1927   Location srcBegin = locations->InAt(1);
1928   int srcBegin_value =
1929     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1930   CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1931   CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1932   CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1933 
1934   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1935   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1936   DCHECK_EQ(char_size, 2u);
1937 
1938   NearLabel done;
1939   // Compute the number of chars (words) to move.
1940   __ movl(CpuRegister(RCX), srcEnd);
1941   if (srcBegin.IsConstant()) {
1942     __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1943   } else {
1944     DCHECK(srcBegin.IsRegister());
1945     __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1946   }
1947   if (mirror::kUseStringCompression) {
1948     NearLabel copy_uncompressed, copy_loop;
1949     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1950     DCHECK_EQ(c_char_size, 1u);
1951     // Location of count in string.
1952     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1953 
1954     __ testl(Address(obj, count_offset), Immediate(1));
1955     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1956                   "Expecting 0=compressed, 1=uncompressed");
1957     __ j(kNotZero, &copy_uncompressed);
1958     // Compute the address of the source string by adding the number of chars from
1959     // the source beginning to the value offset of a string.
1960     __ leaq(CpuRegister(RSI),
1961             CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1962     // Start the loop to copy String's value to Array of Char.
1963     __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1964 
1965     __ Bind(&copy_loop);
1966     __ jrcxz(&done);
1967     // Use TMP as temporary (convert byte from RSI to word).
1968     // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
1969     __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
1970     __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
1971     __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
1972     __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
1973     // TODO: Add support for LOOP to X86_64Assembler.
1974     __ subl(CpuRegister(RCX), Immediate(1));
1975     __ jmp(&copy_loop);
1976 
1977     __ Bind(&copy_uncompressed);
1978   }
1979 
1980   __ leaq(CpuRegister(RSI),
1981           CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1982   // Compute the address of the destination buffer.
1983   __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1984   // Do the move.
1985   __ rep_movsw();
1986 
1987   __ Bind(&done);
1988 }
1989 
GenPeek(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1990 static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1991   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1992   CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
1993   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1994   // to avoid a SIGBUS.
1995   switch (size) {
1996     case DataType::Type::kInt8:
1997       __ movsxb(out, Address(address, 0));
1998       break;
1999     case DataType::Type::kInt16:
2000       __ movsxw(out, Address(address, 0));
2001       break;
2002     case DataType::Type::kInt32:
2003       __ movl(out, Address(address, 0));
2004       break;
2005     case DataType::Type::kInt64:
2006       __ movq(out, Address(address, 0));
2007       break;
2008     default:
2009       LOG(FATAL) << "Type not recognized for peek: " << size;
2010       UNREACHABLE();
2011   }
2012 }
2013 
VisitMemoryPeekByte(HInvoke * invoke)2014 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
2015   CreateIntToIntLocations(allocator_, invoke);
2016 }
2017 
VisitMemoryPeekByte(HInvoke * invoke)2018 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
2019   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
2020 }
2021 
VisitMemoryPeekIntNative(HInvoke * invoke)2022 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
2023   CreateIntToIntLocations(allocator_, invoke);
2024 }
2025 
VisitMemoryPeekIntNative(HInvoke * invoke)2026 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
2027   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
2028 }
2029 
VisitMemoryPeekLongNative(HInvoke * invoke)2030 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
2031   CreateIntToIntLocations(allocator_, invoke);
2032 }
2033 
VisitMemoryPeekLongNative(HInvoke * invoke)2034 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
2035   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
2036 }
2037 
VisitMemoryPeekShortNative(HInvoke * invoke)2038 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
2039   CreateIntToIntLocations(allocator_, invoke);
2040 }
2041 
VisitMemoryPeekShortNative(HInvoke * invoke)2042 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
2043   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
2044 }
2045 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)2046 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2047   LocationSummary* locations =
2048       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2049   locations->SetInAt(0, Location::RequiresRegister());
2050   locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
2051 }
2052 
GenPoke(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)2053 static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
2054   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
2055   Location value = locations->InAt(1);
2056   // x86 allows unaligned access. We do not have to check the input or use specific instructions
2057   // to avoid a SIGBUS.
2058   switch (size) {
2059     case DataType::Type::kInt8:
2060       if (value.IsConstant()) {
2061         __ movb(Address(address, 0),
2062                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
2063       } else {
2064         __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
2065       }
2066       break;
2067     case DataType::Type::kInt16:
2068       if (value.IsConstant()) {
2069         __ movw(Address(address, 0),
2070                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
2071       } else {
2072         __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
2073       }
2074       break;
2075     case DataType::Type::kInt32:
2076       if (value.IsConstant()) {
2077         __ movl(Address(address, 0),
2078                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
2079       } else {
2080         __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
2081       }
2082       break;
2083     case DataType::Type::kInt64:
2084       if (value.IsConstant()) {
2085         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
2086         DCHECK(IsInt<32>(v));
2087         int32_t v_32 = v;
2088         __ movq(Address(address, 0), Immediate(v_32));
2089       } else {
2090         __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
2091       }
2092       break;
2093     default:
2094       LOG(FATAL) << "Type not recognized for poke: " << size;
2095       UNREACHABLE();
2096   }
2097 }
2098 
VisitMemoryPokeByte(HInvoke * invoke)2099 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
2100   CreateIntIntToVoidLocations(allocator_, invoke);
2101 }
2102 
VisitMemoryPokeByte(HInvoke * invoke)2103 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
2104   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
2105 }
2106 
VisitMemoryPokeIntNative(HInvoke * invoke)2107 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
2108   CreateIntIntToVoidLocations(allocator_, invoke);
2109 }
2110 
VisitMemoryPokeIntNative(HInvoke * invoke)2111 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
2112   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
2113 }
2114 
VisitMemoryPokeLongNative(HInvoke * invoke)2115 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
2116   CreateIntIntToVoidLocations(allocator_, invoke);
2117 }
2118 
VisitMemoryPokeLongNative(HInvoke * invoke)2119 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
2120   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
2121 }
2122 
VisitMemoryPokeShortNative(HInvoke * invoke)2123 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
2124   CreateIntIntToVoidLocations(allocator_, invoke);
2125 }
2126 
VisitMemoryPokeShortNative(HInvoke * invoke)2127 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
2128   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
2129 }
2130 
VisitThreadCurrentThread(HInvoke * invoke)2131 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
2132   LocationSummary* locations =
2133       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2134   locations->SetOut(Location::RequiresRegister());
2135 }
2136 
VisitThreadCurrentThread(HInvoke * invoke)2137 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
2138   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
2139   GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
2140                                                     /* no_rip */ true));
2141 }
2142 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile ATTRIBUTE_UNUSED,CodeGeneratorX86_64 * codegen)2143 static void GenUnsafeGet(HInvoke* invoke,
2144                          DataType::Type type,
2145                          bool is_volatile ATTRIBUTE_UNUSED,
2146                          CodeGeneratorX86_64* codegen) {
2147   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2148   LocationSummary* locations = invoke->GetLocations();
2149   Location base_loc = locations->InAt(1);
2150   CpuRegister base = base_loc.AsRegister<CpuRegister>();
2151   Location offset_loc = locations->InAt(2);
2152   CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
2153   Location output_loc = locations->Out();
2154   CpuRegister output = output_loc.AsRegister<CpuRegister>();
2155 
2156   switch (type) {
2157     case DataType::Type::kInt32:
2158       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2159       break;
2160 
2161     case DataType::Type::kReference: {
2162       if (kEmitCompilerReadBarrier) {
2163         if (kUseBakerReadBarrier) {
2164           Address src(base, offset, ScaleFactor::TIMES_1, 0);
2165           codegen->GenerateReferenceLoadWithBakerReadBarrier(
2166               invoke, output_loc, base, src, /* needs_null_check */ false);
2167         } else {
2168           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2169           codegen->GenerateReadBarrierSlow(
2170               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
2171         }
2172       } else {
2173         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2174         __ MaybeUnpoisonHeapReference(output);
2175       }
2176       break;
2177     }
2178 
2179     case DataType::Type::kInt64:
2180       __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2181       break;
2182 
2183     default:
2184       LOG(FATAL) << "Unsupported op size " << type;
2185       UNREACHABLE();
2186   }
2187 }
2188 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)2189 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2190   bool can_call = kEmitCompilerReadBarrier &&
2191       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
2192        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
2193   LocationSummary* locations =
2194       new (allocator) LocationSummary(invoke,
2195                                       can_call
2196                                           ? LocationSummary::kCallOnSlowPath
2197                                           : LocationSummary::kNoCall,
2198                                       kIntrinsified);
2199   if (can_call && kUseBakerReadBarrier) {
2200     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2201   }
2202   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2203   locations->SetInAt(1, Location::RequiresRegister());
2204   locations->SetInAt(2, Location::RequiresRegister());
2205   locations->SetOut(Location::RequiresRegister(),
2206                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2207 }
2208 
VisitUnsafeGet(HInvoke * invoke)2209 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
2210   CreateIntIntIntToIntLocations(allocator_, invoke);
2211 }
VisitUnsafeGetVolatile(HInvoke * invoke)2212 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2213   CreateIntIntIntToIntLocations(allocator_, invoke);
2214 }
VisitUnsafeGetLong(HInvoke * invoke)2215 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2216   CreateIntIntIntToIntLocations(allocator_, invoke);
2217 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2218 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2219   CreateIntIntIntToIntLocations(allocator_, invoke);
2220 }
VisitUnsafeGetObject(HInvoke * invoke)2221 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2222   CreateIntIntIntToIntLocations(allocator_, invoke);
2223 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2224 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2225   CreateIntIntIntToIntLocations(allocator_, invoke);
2226 }
2227 
2228 
VisitUnsafeGet(HInvoke * invoke)2229 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
2230   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_);
2231 }
VisitUnsafeGetVolatile(HInvoke * invoke)2232 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2233   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_);
2234 }
VisitUnsafeGetLong(HInvoke * invoke)2235 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2236   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_);
2237 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2238 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2239   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_);
2240 }
VisitUnsafeGetObject(HInvoke * invoke)2241 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2242   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_);
2243 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2244 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2245   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_);
2246 }
2247 
2248 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2249 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
2250                                                        DataType::Type type,
2251                                                        HInvoke* invoke) {
2252   LocationSummary* locations =
2253       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2254   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2255   locations->SetInAt(1, Location::RequiresRegister());
2256   locations->SetInAt(2, Location::RequiresRegister());
2257   locations->SetInAt(3, Location::RequiresRegister());
2258   if (type == DataType::Type::kReference) {
2259     // Need temp registers for card-marking.
2260     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2261     locations->AddTemp(Location::RequiresRegister());
2262   }
2263 }
2264 
VisitUnsafePut(HInvoke * invoke)2265 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
2266   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2267 }
VisitUnsafePutOrdered(HInvoke * invoke)2268 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2269   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2270 }
VisitUnsafePutVolatile(HInvoke * invoke)2271 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2272   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2273 }
VisitUnsafePutObject(HInvoke * invoke)2274 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2275   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2276 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2277 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2278   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2279 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2280 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2281   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2282 }
VisitUnsafePutLong(HInvoke * invoke)2283 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2284   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2285 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2286 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2287   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2288 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2289 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2290   CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2291 }
2292 
2293 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2294 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2295 static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile,
2296                          CodeGeneratorX86_64* codegen) {
2297   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2298   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2299   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2300   CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2301 
2302   if (type == DataType::Type::kInt64) {
2303     __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2304   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2305     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2306     __ movl(temp, value);
2307     __ PoisonHeapReference(temp);
2308     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2309   } else {
2310     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2311   }
2312 
2313   if (is_volatile) {
2314     codegen->MemoryFence();
2315   }
2316 
2317   if (type == DataType::Type::kReference) {
2318     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2319     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2320                         locations->GetTemp(1).AsRegister<CpuRegister>(),
2321                         base,
2322                         value,
2323                         value_can_be_null);
2324   }
2325 }
2326 
VisitUnsafePut(HInvoke * invoke)2327 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
2328   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_);
2329 }
VisitUnsafePutOrdered(HInvoke * invoke)2330 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2331   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_);
2332 }
VisitUnsafePutVolatile(HInvoke * invoke)2333 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2334   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ true, codegen_);
2335 }
VisitUnsafePutObject(HInvoke * invoke)2336 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2337   GenUnsafePut(
2338       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_);
2339 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2340 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2341   GenUnsafePut(
2342       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_);
2343 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2344 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2345   GenUnsafePut(
2346       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ true, codegen_);
2347 }
VisitUnsafePutLong(HInvoke * invoke)2348 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2349   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_);
2350 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2351 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2352   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_);
2353 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2354 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2355   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ true, codegen_);
2356 }
2357 
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2358 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
2359                                        DataType::Type type,
2360                                        HInvoke* invoke) {
2361   bool can_call = kEmitCompilerReadBarrier &&
2362       kUseBakerReadBarrier &&
2363       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
2364   LocationSummary* locations =
2365       new (allocator) LocationSummary(invoke,
2366                                       can_call
2367                                           ? LocationSummary::kCallOnSlowPath
2368                                           : LocationSummary::kNoCall,
2369                                       kIntrinsified);
2370   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2371   locations->SetInAt(1, Location::RequiresRegister());
2372   locations->SetInAt(2, Location::RequiresRegister());
2373   // expected value must be in EAX/RAX.
2374   locations->SetInAt(3, Location::RegisterLocation(RAX));
2375   locations->SetInAt(4, Location::RequiresRegister());
2376 
2377   locations->SetOut(Location::RequiresRegister());
2378   if (type == DataType::Type::kReference) {
2379     // Need temporary registers for card-marking, and possibly for
2380     // (Baker) read barrier.
2381     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2382     locations->AddTemp(Location::RequiresRegister());
2383   }
2384 }
2385 
VisitUnsafeCASInt(HInvoke * invoke)2386 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2387   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
2388 }
2389 
VisitUnsafeCASLong(HInvoke * invoke)2390 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2391   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
2392 }
2393 
VisitUnsafeCASObject(HInvoke * invoke)2394 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2395   // The only read barrier implementation supporting the
2396   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2397   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2398     return;
2399   }
2400 
2401   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
2402 }
2403 
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2404 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2405   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2406   LocationSummary* locations = invoke->GetLocations();
2407 
2408   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2409   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2410   CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
2411   // Ensure `expected` is in RAX (required by the CMPXCHG instruction).
2412   DCHECK_EQ(expected.AsRegister(), RAX);
2413   CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
2414   Location out_loc = locations->Out();
2415   CpuRegister out = out_loc.AsRegister<CpuRegister>();
2416 
2417   if (type == DataType::Type::kReference) {
2418     // The only read barrier implementation supporting the
2419     // UnsafeCASObject intrinsic is the Baker-style read barriers.
2420     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2421 
2422     CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2423     CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2424 
2425     // Mark card for object assuming new value is stored.
2426     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2427     codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
2428 
2429     // The address of the field within the holding object.
2430     Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
2431 
2432     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2433       // Need to make sure the reference stored in the field is a to-space
2434       // one before attempting the CAS or the CAS could fail incorrectly.
2435       codegen->GenerateReferenceLoadWithBakerReadBarrier(
2436           invoke,
2437           out_loc,  // Unused, used only as a "temporary" within the read barrier.
2438           base,
2439           field_addr,
2440           /* needs_null_check */ false,
2441           /* always_update_field */ true,
2442           &temp1,
2443           &temp2);
2444     }
2445 
2446     bool base_equals_value = (base.AsRegister() == value.AsRegister());
2447     Register value_reg = value.AsRegister();
2448     if (kPoisonHeapReferences) {
2449       if (base_equals_value) {
2450         // If `base` and `value` are the same register location, move
2451         // `value_reg` to a temporary register.  This way, poisoning
2452         // `value_reg` won't invalidate `base`.
2453         value_reg = temp1.AsRegister();
2454         __ movl(CpuRegister(value_reg), base);
2455       }
2456 
2457       // Check that the register allocator did not assign the location
2458       // of `expected` (RAX) to `value` nor to `base`, so that heap
2459       // poisoning (when enabled) works as intended below.
2460       // - If `value` were equal to `expected`, both references would
2461       //   be poisoned twice, meaning they would not be poisoned at
2462       //   all, as heap poisoning uses address negation.
2463       // - If `base` were equal to `expected`, poisoning `expected`
2464       //   would invalidate `base`.
2465       DCHECK_NE(value_reg, expected.AsRegister());
2466       DCHECK_NE(base.AsRegister(), expected.AsRegister());
2467 
2468       __ PoisonHeapReference(expected);
2469       __ PoisonHeapReference(CpuRegister(value_reg));
2470     }
2471 
2472     __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
2473 
2474     // LOCK CMPXCHG has full barrier semantics, and we don't need
2475     // scheduling barriers at this time.
2476 
2477     // Convert ZF into the Boolean result.
2478     __ setcc(kZero, out);
2479     __ movzxb(out, out);
2480 
2481     // If heap poisoning is enabled, we need to unpoison the values
2482     // that were poisoned earlier.
2483     if (kPoisonHeapReferences) {
2484       if (base_equals_value) {
2485         // `value_reg` has been moved to a temporary register, no need
2486         // to unpoison it.
2487       } else {
2488         // Ensure `value` is different from `out`, so that unpoisoning
2489         // the former does not invalidate the latter.
2490         DCHECK_NE(value_reg, out.AsRegister());
2491         __ UnpoisonHeapReference(CpuRegister(value_reg));
2492       }
2493       // Ensure `expected` is different from `out`, so that unpoisoning
2494       // the former does not invalidate the latter.
2495       DCHECK_NE(expected.AsRegister(), out.AsRegister());
2496       __ UnpoisonHeapReference(expected);
2497     }
2498   } else {
2499     if (type == DataType::Type::kInt32) {
2500       __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
2501     } else if (type == DataType::Type::kInt64) {
2502       __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
2503     } else {
2504       LOG(FATAL) << "Unexpected CAS type " << type;
2505     }
2506 
2507     // LOCK CMPXCHG has full barrier semantics, and we don't need
2508     // scheduling barriers at this time.
2509 
2510     // Convert ZF into the Boolean result.
2511     __ setcc(kZero, out);
2512     __ movzxb(out, out);
2513   }
2514 }
2515 
VisitUnsafeCASInt(HInvoke * invoke)2516 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2517   GenCAS(DataType::Type::kInt32, invoke, codegen_);
2518 }
2519 
VisitUnsafeCASLong(HInvoke * invoke)2520 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2521   GenCAS(DataType::Type::kInt64, invoke, codegen_);
2522 }
2523 
VisitUnsafeCASObject(HInvoke * invoke)2524 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2525   // The only read barrier implementation supporting the
2526   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2527   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2528 
2529   GenCAS(DataType::Type::kReference, invoke, codegen_);
2530 }
2531 
VisitIntegerReverse(HInvoke * invoke)2532 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2533   LocationSummary* locations =
2534       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2535   locations->SetInAt(0, Location::RequiresRegister());
2536   locations->SetOut(Location::SameAsFirstInput());
2537   locations->AddTemp(Location::RequiresRegister());
2538 }
2539 
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2540 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2541                      X86_64Assembler* assembler) {
2542   Immediate imm_shift(shift);
2543   Immediate imm_mask(mask);
2544   __ movl(temp, reg);
2545   __ shrl(reg, imm_shift);
2546   __ andl(temp, imm_mask);
2547   __ andl(reg, imm_mask);
2548   __ shll(temp, imm_shift);
2549   __ orl(reg, temp);
2550 }
2551 
VisitIntegerReverse(HInvoke * invoke)2552 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2553   X86_64Assembler* assembler = GetAssembler();
2554   LocationSummary* locations = invoke->GetLocations();
2555 
2556   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2557   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2558 
2559   /*
2560    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2561    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2562    * compared to generic luni implementation which has 5 rounds of swapping bits.
2563    * x = bswap x
2564    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2565    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2566    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2567    */
2568   __ bswapl(reg);
2569   SwapBits(reg, temp, 1, 0x55555555, assembler);
2570   SwapBits(reg, temp, 2, 0x33333333, assembler);
2571   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2572 }
2573 
VisitLongReverse(HInvoke * invoke)2574 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2575   LocationSummary* locations =
2576       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2577   locations->SetInAt(0, Location::RequiresRegister());
2578   locations->SetOut(Location::SameAsFirstInput());
2579   locations->AddTemp(Location::RequiresRegister());
2580   locations->AddTemp(Location::RequiresRegister());
2581 }
2582 
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2583 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2584                        int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2585   Immediate imm_shift(shift);
2586   __ movq(temp_mask, Immediate(mask));
2587   __ movq(temp, reg);
2588   __ shrq(reg, imm_shift);
2589   __ andq(temp, temp_mask);
2590   __ andq(reg, temp_mask);
2591   __ shlq(temp, imm_shift);
2592   __ orq(reg, temp);
2593 }
2594 
VisitLongReverse(HInvoke * invoke)2595 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2596   X86_64Assembler* assembler = GetAssembler();
2597   LocationSummary* locations = invoke->GetLocations();
2598 
2599   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2600   CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2601   CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2602 
2603   /*
2604    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2605    * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2606    * compared to generic luni implementation which has 5 rounds of swapping bits.
2607    * x = bswap x
2608    * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2609    * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2610    * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2611    */
2612   __ bswapq(reg);
2613   SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2614   SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2615   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
2616 }
2617 
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen,HInvoke * invoke)2618 static void CreateBitCountLocations(
2619     ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
2620   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2621     // Do nothing if there is no popcnt support. This results in generating
2622     // a call for the intrinsic rather than direct code.
2623     return;
2624   }
2625   LocationSummary* locations =
2626       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2627   locations->SetInAt(0, Location::Any());
2628   locations->SetOut(Location::RequiresRegister());
2629 }
2630 
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2631 static void GenBitCount(X86_64Assembler* assembler,
2632                         CodeGeneratorX86_64* codegen,
2633                         HInvoke* invoke,
2634                         bool is_long) {
2635   LocationSummary* locations = invoke->GetLocations();
2636   Location src = locations->InAt(0);
2637   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2638 
2639   if (invoke->InputAt(0)->IsConstant()) {
2640     // Evaluate this at compile time.
2641     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2642     int32_t result = is_long
2643         ? POPCOUNT(static_cast<uint64_t>(value))
2644         : POPCOUNT(static_cast<uint32_t>(value));
2645     codegen->Load32BitValue(out, result);
2646     return;
2647   }
2648 
2649   if (src.IsRegister()) {
2650     if (is_long) {
2651       __ popcntq(out, src.AsRegister<CpuRegister>());
2652     } else {
2653       __ popcntl(out, src.AsRegister<CpuRegister>());
2654     }
2655   } else if (is_long) {
2656     DCHECK(src.IsDoubleStackSlot());
2657     __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2658   } else {
2659     DCHECK(src.IsStackSlot());
2660     __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2661   }
2662 }
2663 
VisitIntegerBitCount(HInvoke * invoke)2664 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2665   CreateBitCountLocations(allocator_, codegen_, invoke);
2666 }
2667 
VisitIntegerBitCount(HInvoke * invoke)2668 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2669   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
2670 }
2671 
VisitLongBitCount(HInvoke * invoke)2672 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
2673   CreateBitCountLocations(allocator_, codegen_, invoke);
2674 }
2675 
VisitLongBitCount(HInvoke * invoke)2676 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
2677   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
2678 }
2679 
CreateOneBitLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_high)2680 static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) {
2681   LocationSummary* locations =
2682       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2683   locations->SetInAt(0, Location::Any());
2684   locations->SetOut(Location::RequiresRegister());
2685   locations->AddTemp(is_high ? Location::RegisterLocation(RCX)  // needs CL
2686                              : Location::RequiresRegister());  // any will do
2687 }
2688 
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)2689 static void GenOneBit(X86_64Assembler* assembler,
2690                       CodeGeneratorX86_64* codegen,
2691                       HInvoke* invoke,
2692                       bool is_high, bool is_long) {
2693   LocationSummary* locations = invoke->GetLocations();
2694   Location src = locations->InAt(0);
2695   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2696 
2697   if (invoke->InputAt(0)->IsConstant()) {
2698     // Evaluate this at compile time.
2699     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2700     if (value == 0) {
2701       __ xorl(out, out);  // Clears upper bits too.
2702       return;
2703     }
2704     // Nonzero value.
2705     if (is_high) {
2706       value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
2707                       : 31 - CLZ(static_cast<uint32_t>(value));
2708     } else {
2709       value = is_long ? CTZ(static_cast<uint64_t>(value))
2710                       : CTZ(static_cast<uint32_t>(value));
2711     }
2712     if (is_long) {
2713       codegen->Load64BitValue(out, 1ULL << value);
2714     } else {
2715       codegen->Load32BitValue(out, 1 << value);
2716     }
2717     return;
2718   }
2719 
2720   // Handle the non-constant cases.
2721   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
2722   if (is_high) {
2723     // Use architectural support: basically 1 << bsr.
2724     if (src.IsRegister()) {
2725       if (is_long) {
2726         __ bsrq(tmp, src.AsRegister<CpuRegister>());
2727       } else {
2728         __ bsrl(tmp, src.AsRegister<CpuRegister>());
2729       }
2730     } else if (is_long) {
2731       DCHECK(src.IsDoubleStackSlot());
2732       __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2733     } else {
2734       DCHECK(src.IsStackSlot());
2735       __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2736     }
2737     // BSR sets ZF if the input was zero.
2738     NearLabel is_zero, done;
2739     __ j(kEqual, &is_zero);
2740     __ movl(out, Immediate(1));  // Clears upper bits too.
2741     if (is_long) {
2742       __ shlq(out, tmp);
2743     } else {
2744       __ shll(out, tmp);
2745     }
2746     __ jmp(&done);
2747     __ Bind(&is_zero);
2748     __ xorl(out, out);  // Clears upper bits too.
2749     __ Bind(&done);
2750   } else  {
2751     // Copy input into temporary.
2752     if (src.IsRegister()) {
2753       if (is_long) {
2754         __ movq(tmp, src.AsRegister<CpuRegister>());
2755       } else {
2756         __ movl(tmp, src.AsRegister<CpuRegister>());
2757       }
2758     } else if (is_long) {
2759       DCHECK(src.IsDoubleStackSlot());
2760       __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2761     } else {
2762       DCHECK(src.IsStackSlot());
2763       __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2764     }
2765     // Do the bit twiddling: basically tmp & -tmp;
2766     if (is_long) {
2767       __ movq(out, tmp);
2768       __ negq(tmp);
2769       __ andq(out, tmp);
2770     } else {
2771       __ movl(out, tmp);
2772       __ negl(tmp);
2773       __ andl(out, tmp);
2774     }
2775   }
2776 }
2777 
VisitIntegerHighestOneBit(HInvoke * invoke)2778 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2779   CreateOneBitLocations(allocator_, invoke, /* is_high */ true);
2780 }
2781 
VisitIntegerHighestOneBit(HInvoke * invoke)2782 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2783   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false);
2784 }
2785 
VisitLongHighestOneBit(HInvoke * invoke)2786 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2787   CreateOneBitLocations(allocator_, invoke, /* is_high */ true);
2788 }
2789 
VisitLongHighestOneBit(HInvoke * invoke)2790 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2791   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true);
2792 }
2793 
VisitIntegerLowestOneBit(HInvoke * invoke)2794 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2795   CreateOneBitLocations(allocator_, invoke, /* is_high */ false);
2796 }
2797 
VisitIntegerLowestOneBit(HInvoke * invoke)2798 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2799   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false);
2800 }
2801 
VisitLongLowestOneBit(HInvoke * invoke)2802 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2803   CreateOneBitLocations(allocator_, invoke, /* is_high */ false);
2804 }
2805 
VisitLongLowestOneBit(HInvoke * invoke)2806 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2807   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true);
2808 }
2809 
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)2810 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2811   LocationSummary* locations =
2812       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2813   locations->SetInAt(0, Location::Any());
2814   locations->SetOut(Location::RequiresRegister());
2815 }
2816 
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2817 static void GenLeadingZeros(X86_64Assembler* assembler,
2818                             CodeGeneratorX86_64* codegen,
2819                             HInvoke* invoke, bool is_long) {
2820   LocationSummary* locations = invoke->GetLocations();
2821   Location src = locations->InAt(0);
2822   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2823 
2824   int zero_value_result = is_long ? 64 : 32;
2825   if (invoke->InputAt(0)->IsConstant()) {
2826     // Evaluate this at compile time.
2827     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2828     if (value == 0) {
2829       value = zero_value_result;
2830     } else {
2831       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2832     }
2833     codegen->Load32BitValue(out, value);
2834     return;
2835   }
2836 
2837   // Handle the non-constant cases.
2838   if (src.IsRegister()) {
2839     if (is_long) {
2840       __ bsrq(out, src.AsRegister<CpuRegister>());
2841     } else {
2842       __ bsrl(out, src.AsRegister<CpuRegister>());
2843     }
2844   } else if (is_long) {
2845     DCHECK(src.IsDoubleStackSlot());
2846     __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2847   } else {
2848     DCHECK(src.IsStackSlot());
2849     __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2850   }
2851 
2852   // BSR sets ZF if the input was zero, and the output is undefined.
2853   NearLabel is_zero, done;
2854   __ j(kEqual, &is_zero);
2855 
2856   // Correct the result from BSR to get the CLZ result.
2857   __ xorl(out, Immediate(zero_value_result - 1));
2858   __ jmp(&done);
2859 
2860   // Fix the zero case with the expected result.
2861   __ Bind(&is_zero);
2862   __ movl(out, Immediate(zero_value_result));
2863 
2864   __ Bind(&done);
2865 }
2866 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2867 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2868   CreateLeadingZeroLocations(allocator_, invoke);
2869 }
2870 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2871 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2872   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2873 }
2874 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2875 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2876   CreateLeadingZeroLocations(allocator_, invoke);
2877 }
2878 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2879 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2880   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2881 }
2882 
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)2883 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2884   LocationSummary* locations =
2885       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2886   locations->SetInAt(0, Location::Any());
2887   locations->SetOut(Location::RequiresRegister());
2888 }
2889 
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2890 static void GenTrailingZeros(X86_64Assembler* assembler,
2891                              CodeGeneratorX86_64* codegen,
2892                              HInvoke* invoke, bool is_long) {
2893   LocationSummary* locations = invoke->GetLocations();
2894   Location src = locations->InAt(0);
2895   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2896 
2897   int zero_value_result = is_long ? 64 : 32;
2898   if (invoke->InputAt(0)->IsConstant()) {
2899     // Evaluate this at compile time.
2900     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2901     if (value == 0) {
2902       value = zero_value_result;
2903     } else {
2904       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2905     }
2906     codegen->Load32BitValue(out, value);
2907     return;
2908   }
2909 
2910   // Handle the non-constant cases.
2911   if (src.IsRegister()) {
2912     if (is_long) {
2913       __ bsfq(out, src.AsRegister<CpuRegister>());
2914     } else {
2915       __ bsfl(out, src.AsRegister<CpuRegister>());
2916     }
2917   } else if (is_long) {
2918     DCHECK(src.IsDoubleStackSlot());
2919     __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2920   } else {
2921     DCHECK(src.IsStackSlot());
2922     __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2923   }
2924 
2925   // BSF sets ZF if the input was zero, and the output is undefined.
2926   NearLabel done;
2927   __ j(kNotEqual, &done);
2928 
2929   // Fix the zero case with the expected result.
2930   __ movl(out, Immediate(zero_value_result));
2931 
2932   __ Bind(&done);
2933 }
2934 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2935 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2936   CreateTrailingZeroLocations(allocator_, invoke);
2937 }
2938 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2939 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2940   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2941 }
2942 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2943 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2944   CreateTrailingZeroLocations(allocator_, invoke);
2945 }
2946 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2947 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2948   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2949 }
2950 
VisitIntegerValueOf(HInvoke * invoke)2951 void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
2952   InvokeRuntimeCallingConvention calling_convention;
2953   IntrinsicVisitor::ComputeIntegerValueOfLocations(
2954       invoke,
2955       codegen_,
2956       Location::RegisterLocation(RAX),
2957       Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2958 }
2959 
VisitIntegerValueOf(HInvoke * invoke)2960 void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
2961   IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
2962   LocationSummary* locations = invoke->GetLocations();
2963   X86_64Assembler* assembler = GetAssembler();
2964 
2965   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2966   InvokeRuntimeCallingConvention calling_convention;
2967   if (invoke->InputAt(0)->IsConstant()) {
2968     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2969     if (value >= info.low && value <= info.high) {
2970       // Just embed the j.l.Integer in the code.
2971       ScopedObjectAccess soa(Thread::Current());
2972       mirror::Object* boxed = info.cache->Get(value + (-info.low));
2973       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
2974       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
2975       __ movl(out, Immediate(static_cast<int32_t>(address)));
2976     } else {
2977       // Allocate and initialize a new j.l.Integer.
2978       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2979       // JIT object table.
2980       CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
2981       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
2982       __ movl(argument, Immediate(static_cast<int32_t>(address)));
2983       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2984       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2985       __ movl(Address(out, info.value_offset), Immediate(value));
2986     }
2987   } else {
2988     CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
2989     // Check bounds of our cache.
2990     __ leal(out, Address(in, -info.low));
2991     __ cmpl(out, Immediate(info.high - info.low + 1));
2992     NearLabel allocate, done;
2993     __ j(kAboveEqual, &allocate);
2994     // If the value is within the bounds, load the j.l.Integer directly from the array.
2995     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
2996     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
2997     if (data_offset + address <= std::numeric_limits<int32_t>::max()) {
2998       __ movl(out, Address(out, TIMES_4, data_offset + address));
2999     } else {
3000       CpuRegister temp = CpuRegister(calling_convention.GetRegisterAt(0));
3001       __ movl(temp, Immediate(static_cast<int32_t>(data_offset + address)));
3002       __ movl(out, Address(temp, out, TIMES_4, 0));
3003     }
3004     __ MaybeUnpoisonHeapReference(out);
3005     __ jmp(&done);
3006     __ Bind(&allocate);
3007     // Otherwise allocate and initialize a new j.l.Integer.
3008     CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
3009     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3010     __ movl(argument, Immediate(static_cast<int32_t>(address)));
3011     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3012     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3013     __ movl(Address(out, info.value_offset), in);
3014     __ Bind(&done);
3015   }
3016 }
3017 
VisitThreadInterrupted(HInvoke * invoke)3018 void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3019   LocationSummary* locations =
3020       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3021   locations->SetOut(Location::RequiresRegister());
3022 }
3023 
VisitThreadInterrupted(HInvoke * invoke)3024 void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3025   X86_64Assembler* assembler = GetAssembler();
3026   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
3027   Address address = Address::Absolute
3028       (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip */ true);
3029   NearLabel done;
3030   __ gs()->movl(out, address);
3031   __ testl(out, out);
3032   __ j(kEqual, &done);
3033   __ gs()->movl(address, Immediate(0));
3034   codegen_->MemoryFence();
3035   __ Bind(&done);
3036 }
3037 
VisitReachabilityFence(HInvoke * invoke)3038 void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
3039   LocationSummary* locations =
3040       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3041   locations->SetInAt(0, Location::Any());
3042 }
3043 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3044 void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3045 
3046 UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent)
3047 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
3048 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
3049 
3050 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
3051 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
3052 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend);
3053 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength);
3054 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString);
3055 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppend);
3056 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength);
3057 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString);
3058 
3059 // 1.8.
3060 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
3061 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
3062 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
3063 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong)
3064 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject)
3065 
3066 UNREACHABLE_INTRINSICS(X86_64)
3067 
3068 #undef __
3069 
3070 }  // namespace x86_64
3071 }  // namespace art
3072