• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86_64.h"
18 
19 #include <limits>
20 
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "art_method-inl.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86_64.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "intrinsics.h"
27 #include "intrinsics_utils.h"
28 #include "mirror/array-inl.h"
29 #include "mirror/string.h"
30 #include "thread.h"
31 #include "utils/x86_64/assembler_x86_64.h"
32 #include "utils/x86_64/constants_x86_64.h"
33 
34 namespace art {
35 
36 namespace x86_64 {
37 
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)38 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
39   : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
40 }
41 
42 
GetAssembler()43 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
44   return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
45 }
46 
GetAllocator()47 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
48   return codegen_->GetGraph()->GetArena();
49 }
50 
TryDispatch(HInvoke * invoke)51 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
52   Dispatch(invoke);
53   LocationSummary* res = invoke->GetLocations();
54   if (res == nullptr) {
55     return false;
56   }
57   if (kEmitCompilerReadBarrier && res->CanCall()) {
58     // Generating an intrinsic for this HInvoke may produce an
59     // IntrinsicSlowPathX86_64 slow path.  Currently this approach
60     // does not work when using read barriers, as the emitted
61     // calling sequence will make use of another slow path
62     // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect,
63     // ReadBarrierSlowPathX86_64 for HInvokeVirtual).  So we bail
64     // out in this case.
65     //
66     // TODO: Find a way to have intrinsics work with read barriers.
67     invoke->SetLocations(nullptr);
68     return false;
69   }
70   return res->Intrinsified();
71 }
72 
MoveArguments(HInvoke * invoke,CodeGeneratorX86_64 * codegen)73 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
74   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
75   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
76 }
77 
78 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
79 
80 #define __ assembler->
81 
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke)82 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
83   LocationSummary* locations = new (arena) LocationSummary(invoke,
84                                                            LocationSummary::kNoCall,
85                                                            kIntrinsified);
86   locations->SetInAt(0, Location::RequiresFpuRegister());
87   locations->SetOut(Location::RequiresRegister());
88 }
89 
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke)90 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
91   LocationSummary* locations = new (arena) LocationSummary(invoke,
92                                                            LocationSummary::kNoCall,
93                                                            kIntrinsified);
94   locations->SetInAt(0, Location::RequiresRegister());
95   locations->SetOut(Location::RequiresFpuRegister());
96 }
97 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)98 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
99   Location input = locations->InAt(0);
100   Location output = locations->Out();
101   __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
102 }
103 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)104 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
105   Location input = locations->InAt(0);
106   Location output = locations->Out();
107   __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
108 }
109 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)110 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
111   CreateFPToIntLocations(arena_, invoke);
112 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)113 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
114   CreateIntToFPLocations(arena_, invoke);
115 }
116 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)117 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
118   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
119 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)120 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
121   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
122 }
123 
VisitFloatFloatToRawIntBits(HInvoke * invoke)124 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
125   CreateFPToIntLocations(arena_, invoke);
126 }
VisitFloatIntBitsToFloat(HInvoke * invoke)127 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
128   CreateIntToFPLocations(arena_, invoke);
129 }
130 
VisitFloatFloatToRawIntBits(HInvoke * invoke)131 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
132   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
133 }
VisitFloatIntBitsToFloat(HInvoke * invoke)134 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
135   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
136 }
137 
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)138 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
139   LocationSummary* locations = new (arena) LocationSummary(invoke,
140                                                            LocationSummary::kNoCall,
141                                                            kIntrinsified);
142   locations->SetInAt(0, Location::RequiresRegister());
143   locations->SetOut(Location::SameAsFirstInput());
144 }
145 
GenReverseBytes(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)146 static void GenReverseBytes(LocationSummary* locations,
147                             Primitive::Type size,
148                             X86_64Assembler* assembler) {
149   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
150 
151   switch (size) {
152     case Primitive::kPrimShort:
153       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
154       __ bswapl(out);
155       __ sarl(out, Immediate(16));
156       break;
157     case Primitive::kPrimInt:
158       __ bswapl(out);
159       break;
160     case Primitive::kPrimLong:
161       __ bswapq(out);
162       break;
163     default:
164       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
165       UNREACHABLE();
166   }
167 }
168 
VisitIntegerReverseBytes(HInvoke * invoke)169 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
170   CreateIntToIntLocations(arena_, invoke);
171 }
172 
VisitIntegerReverseBytes(HInvoke * invoke)173 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
174   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
175 }
176 
VisitLongReverseBytes(HInvoke * invoke)177 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
178   CreateIntToIntLocations(arena_, invoke);
179 }
180 
VisitLongReverseBytes(HInvoke * invoke)181 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
182   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
183 }
184 
VisitShortReverseBytes(HInvoke * invoke)185 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
186   CreateIntToIntLocations(arena_, invoke);
187 }
188 
VisitShortReverseBytes(HInvoke * invoke)189 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
190   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
191 }
192 
193 
194 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
195 //       need is 64b.
196 
CreateFloatToFloatPlusTemps(ArenaAllocator * arena,HInvoke * invoke)197 static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
198   // TODO: Enable memory operations when the assembler supports them.
199   LocationSummary* locations = new (arena) LocationSummary(invoke,
200                                                            LocationSummary::kNoCall,
201                                                            kIntrinsified);
202   locations->SetInAt(0, Location::RequiresFpuRegister());
203   locations->SetOut(Location::SameAsFirstInput());
204   locations->AddTemp(Location::RequiresFpuRegister());  // FP reg to hold mask.
205 }
206 
MathAbsFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen)207 static void MathAbsFP(LocationSummary* locations,
208                       bool is64bit,
209                       X86_64Assembler* assembler,
210                       CodeGeneratorX86_64* codegen) {
211   Location output = locations->Out();
212 
213   DCHECK(output.IsFpuRegister());
214   XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
215 
216   // TODO: Can mask directly with constant area using pand if we can guarantee
217   // that the literal is aligned on a 16 byte boundary.  This will avoid a
218   // temporary.
219   if (is64bit) {
220     __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
221     __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
222   } else {
223     __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
224     __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
225   }
226 }
227 
VisitMathAbsDouble(HInvoke * invoke)228 void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
229   CreateFloatToFloatPlusTemps(arena_, invoke);
230 }
231 
VisitMathAbsDouble(HInvoke * invoke)232 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
233   MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
234 }
235 
VisitMathAbsFloat(HInvoke * invoke)236 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
237   CreateFloatToFloatPlusTemps(arena_, invoke);
238 }
239 
VisitMathAbsFloat(HInvoke * invoke)240 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
241   MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
242 }
243 
CreateIntToIntPlusTemp(ArenaAllocator * arena,HInvoke * invoke)244 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
245   LocationSummary* locations = new (arena) LocationSummary(invoke,
246                                                            LocationSummary::kNoCall,
247                                                            kIntrinsified);
248   locations->SetInAt(0, Location::RequiresRegister());
249   locations->SetOut(Location::SameAsFirstInput());
250   locations->AddTemp(Location::RequiresRegister());
251 }
252 
GenAbsInteger(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)253 static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
254   Location output = locations->Out();
255   CpuRegister out = output.AsRegister<CpuRegister>();
256   CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
257 
258   if (is64bit) {
259     // Create mask.
260     __ movq(mask, out);
261     __ sarq(mask, Immediate(63));
262     // Add mask.
263     __ addq(out, mask);
264     __ xorq(out, mask);
265   } else {
266     // Create mask.
267     __ movl(mask, out);
268     __ sarl(mask, Immediate(31));
269     // Add mask.
270     __ addl(out, mask);
271     __ xorl(out, mask);
272   }
273 }
274 
VisitMathAbsInt(HInvoke * invoke)275 void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
276   CreateIntToIntPlusTemp(arena_, invoke);
277 }
278 
VisitMathAbsInt(HInvoke * invoke)279 void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
280   GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
281 }
282 
VisitMathAbsLong(HInvoke * invoke)283 void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
284   CreateIntToIntPlusTemp(arena_, invoke);
285 }
286 
VisitMathAbsLong(HInvoke * invoke)287 void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
288   GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
289 }
290 
GenMinMaxFP(LocationSummary * locations,bool is_min,bool is_double,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen)291 static void GenMinMaxFP(LocationSummary* locations,
292                         bool is_min,
293                         bool is_double,
294                         X86_64Assembler* assembler,
295                         CodeGeneratorX86_64* codegen) {
296   Location op1_loc = locations->InAt(0);
297   Location op2_loc = locations->InAt(1);
298   Location out_loc = locations->Out();
299   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
300 
301   // Shortcut for same input locations.
302   if (op1_loc.Equals(op2_loc)) {
303     DCHECK(out_loc.Equals(op1_loc));
304     return;
305   }
306 
307   //  (out := op1)
308   //  out <=? op2
309   //  if Nan jmp Nan_label
310   //  if out is min jmp done
311   //  if op2 is min jmp op2_label
312   //  handle -0/+0
313   //  jmp done
314   // Nan_label:
315   //  out := NaN
316   // op2_label:
317   //  out := op2
318   // done:
319   //
320   // This removes one jmp, but needs to copy one input (op1) to out.
321   //
322   // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
323 
324   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
325 
326   NearLabel nan, done, op2_label;
327   if (is_double) {
328     __ ucomisd(out, op2);
329   } else {
330     __ ucomiss(out, op2);
331   }
332 
333   __ j(Condition::kParityEven, &nan);
334 
335   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
336   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
337 
338   // Handle 0.0/-0.0.
339   if (is_min) {
340     if (is_double) {
341       __ orpd(out, op2);
342     } else {
343       __ orps(out, op2);
344     }
345   } else {
346     if (is_double) {
347       __ andpd(out, op2);
348     } else {
349       __ andps(out, op2);
350     }
351   }
352   __ jmp(&done);
353 
354   // NaN handling.
355   __ Bind(&nan);
356   if (is_double) {
357     __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
358   } else {
359     __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
360   }
361   __ jmp(&done);
362 
363   // out := op2;
364   __ Bind(&op2_label);
365   if (is_double) {
366     __ movsd(out, op2);
367   } else {
368     __ movss(out, op2);
369   }
370 
371   // Done.
372   __ Bind(&done);
373 }
374 
CreateFPFPToFP(ArenaAllocator * arena,HInvoke * invoke)375 static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
376   LocationSummary* locations = new (arena) LocationSummary(invoke,
377                                                            LocationSummary::kNoCall,
378                                                            kIntrinsified);
379   locations->SetInAt(0, Location::RequiresFpuRegister());
380   locations->SetInAt(1, Location::RequiresFpuRegister());
381   // The following is sub-optimal, but all we can do for now. It would be fine to also accept
382   // the second input to be the output (we can simply swap inputs).
383   locations->SetOut(Location::SameAsFirstInput());
384 }
385 
VisitMathMinDoubleDouble(HInvoke * invoke)386 void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
387   CreateFPFPToFP(arena_, invoke);
388 }
389 
VisitMathMinDoubleDouble(HInvoke * invoke)390 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
391   GenMinMaxFP(
392       invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
393 }
394 
VisitMathMinFloatFloat(HInvoke * invoke)395 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
396   CreateFPFPToFP(arena_, invoke);
397 }
398 
VisitMathMinFloatFloat(HInvoke * invoke)399 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
400   GenMinMaxFP(
401       invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
402 }
403 
VisitMathMaxDoubleDouble(HInvoke * invoke)404 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
405   CreateFPFPToFP(arena_, invoke);
406 }
407 
VisitMathMaxDoubleDouble(HInvoke * invoke)408 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
409   GenMinMaxFP(
410       invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
411 }
412 
VisitMathMaxFloatFloat(HInvoke * invoke)413 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
414   CreateFPFPToFP(arena_, invoke);
415 }
416 
VisitMathMaxFloatFloat(HInvoke * invoke)417 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
418   GenMinMaxFP(
419       invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
420 }
421 
GenMinMax(LocationSummary * locations,bool is_min,bool is_long,X86_64Assembler * assembler)422 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
423                       X86_64Assembler* assembler) {
424   Location op1_loc = locations->InAt(0);
425   Location op2_loc = locations->InAt(1);
426 
427   // Shortcut for same input locations.
428   if (op1_loc.Equals(op2_loc)) {
429     // Can return immediately, as op1_loc == out_loc.
430     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
431     //       a copy here.
432     DCHECK(locations->Out().Equals(op1_loc));
433     return;
434   }
435 
436   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
437   CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
438 
439   //  (out := op1)
440   //  out <=? op2
441   //  if out is min jmp done
442   //  out := op2
443   // done:
444 
445   if (is_long) {
446     __ cmpq(out, op2);
447   } else {
448     __ cmpl(out, op2);
449   }
450 
451   __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
452 }
453 
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)454 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
455   LocationSummary* locations = new (arena) LocationSummary(invoke,
456                                                            LocationSummary::kNoCall,
457                                                            kIntrinsified);
458   locations->SetInAt(0, Location::RequiresRegister());
459   locations->SetInAt(1, Location::RequiresRegister());
460   locations->SetOut(Location::SameAsFirstInput());
461 }
462 
VisitMathMinIntInt(HInvoke * invoke)463 void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
464   CreateIntIntToIntLocations(arena_, invoke);
465 }
466 
VisitMathMinIntInt(HInvoke * invoke)467 void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
468   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
469 }
470 
VisitMathMinLongLong(HInvoke * invoke)471 void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
472   CreateIntIntToIntLocations(arena_, invoke);
473 }
474 
VisitMathMinLongLong(HInvoke * invoke)475 void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
476   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
477 }
478 
VisitMathMaxIntInt(HInvoke * invoke)479 void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
480   CreateIntIntToIntLocations(arena_, invoke);
481 }
482 
VisitMathMaxIntInt(HInvoke * invoke)483 void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
484   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
485 }
486 
VisitMathMaxLongLong(HInvoke * invoke)487 void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
488   CreateIntIntToIntLocations(arena_, invoke);
489 }
490 
VisitMathMaxLongLong(HInvoke * invoke)491 void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
492   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
493 }
494 
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)495 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
496   LocationSummary* locations = new (arena) LocationSummary(invoke,
497                                                            LocationSummary::kNoCall,
498                                                            kIntrinsified);
499   locations->SetInAt(0, Location::RequiresFpuRegister());
500   locations->SetOut(Location::RequiresFpuRegister());
501 }
502 
VisitMathSqrt(HInvoke * invoke)503 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
504   CreateFPToFPLocations(arena_, invoke);
505 }
506 
VisitMathSqrt(HInvoke * invoke)507 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
508   LocationSummary* locations = invoke->GetLocations();
509   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
510   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
511 
512   GetAssembler()->sqrtsd(out, in);
513 }
514 
InvokeOutOfLineIntrinsic(CodeGeneratorX86_64 * codegen,HInvoke * invoke)515 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
516   MoveArguments(invoke, codegen);
517 
518   DCHECK(invoke->IsInvokeStaticOrDirect());
519   codegen->GenerateStaticOrDirectCall(
520       invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
521   codegen->RecordPcInfo(invoke, invoke->GetDexPc());
522 
523   // Copy the result back to the expected output.
524   Location out = invoke->GetLocations()->Out();
525   if (out.IsValid()) {
526     DCHECK(out.IsRegister());
527     codegen->MoveFromReturnRegister(out, invoke->GetType());
528   }
529 }
530 
CreateSSE41FPToFPLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86_64 * codegen)531 static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
532                                       HInvoke* invoke,
533                                       CodeGeneratorX86_64* codegen) {
534   // Do we have instruction support?
535   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
536     CreateFPToFPLocations(arena, invoke);
537     return;
538   }
539 
540   // We have to fall back to a call to the intrinsic.
541   LocationSummary* locations = new (arena) LocationSummary(invoke,
542                                                            LocationSummary::kCall);
543   InvokeRuntimeCallingConvention calling_convention;
544   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
545   locations->SetOut(Location::FpuRegisterLocation(XMM0));
546   // Needs to be RDI for the invoke.
547   locations->AddTemp(Location::RegisterLocation(RDI));
548 }
549 
GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64 * codegen,HInvoke * invoke,X86_64Assembler * assembler,int round_mode)550 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
551                                    HInvoke* invoke,
552                                    X86_64Assembler* assembler,
553                                    int round_mode) {
554   LocationSummary* locations = invoke->GetLocations();
555   if (locations->WillCall()) {
556     InvokeOutOfLineIntrinsic(codegen, invoke);
557   } else {
558     XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
559     XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
560     __ roundsd(out, in, Immediate(round_mode));
561   }
562 }
563 
VisitMathCeil(HInvoke * invoke)564 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
565   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
566 }
567 
VisitMathCeil(HInvoke * invoke)568 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
569   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
570 }
571 
VisitMathFloor(HInvoke * invoke)572 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
573   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
574 }
575 
VisitMathFloor(HInvoke * invoke)576 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
577   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
578 }
579 
VisitMathRint(HInvoke * invoke)580 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
581   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
582 }
583 
VisitMathRint(HInvoke * invoke)584 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
585   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
586 }
587 
CreateSSE41FPToIntLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86_64 * codegen)588 static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
589                                        HInvoke* invoke,
590                                        CodeGeneratorX86_64* codegen) {
591   // Do we have instruction support?
592   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
593     LocationSummary* locations = new (arena) LocationSummary(invoke,
594                                                               LocationSummary::kNoCall,
595                                                               kIntrinsified);
596     locations->SetInAt(0, Location::RequiresFpuRegister());
597     locations->SetOut(Location::RequiresRegister());
598     locations->AddTemp(Location::RequiresFpuRegister());
599     return;
600   }
601 
602   // We have to fall back to a call to the intrinsic.
603   LocationSummary* locations = new (arena) LocationSummary(invoke,
604                                                            LocationSummary::kCall);
605   InvokeRuntimeCallingConvention calling_convention;
606   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
607   locations->SetOut(Location::RegisterLocation(RAX));
608   // Needs to be RDI for the invoke.
609   locations->AddTemp(Location::RegisterLocation(RDI));
610 }
611 
VisitMathRoundFloat(HInvoke * invoke)612 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
613   // See intrinsics.h.
614   if (kRoundIsPlusPointFive) {
615     CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
616   }
617 }
618 
VisitMathRoundFloat(HInvoke * invoke)619 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
620   LocationSummary* locations = invoke->GetLocations();
621   if (locations->WillCall()) {
622     InvokeOutOfLineIntrinsic(codegen_, invoke);
623     return;
624   }
625 
626   // Implement RoundFloat as t1 = floor(input + 0.5f);  convert to int.
627   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
628   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
629   XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
630   NearLabel done, nan;
631   X86_64Assembler* assembler = GetAssembler();
632 
633   // Load 0.5 into inPlusPointFive.
634   __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
635 
636   // Add in the input.
637   __ addss(inPlusPointFive, in);
638 
639   // And truncate to an integer.
640   __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
641 
642   // Load maxInt into out.
643   codegen_->Load64BitValue(out, kPrimIntMax);
644 
645   // if inPlusPointFive >= maxInt goto done
646   __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
647   __ j(kAboveEqual, &done);
648 
649   // if input == NaN goto nan
650   __ j(kUnordered, &nan);
651 
652   // output = float-to-int-truncate(input)
653   __ cvttss2si(out, inPlusPointFive);
654   __ jmp(&done);
655   __ Bind(&nan);
656 
657   //  output = 0
658   __ xorl(out, out);
659   __ Bind(&done);
660 }
661 
VisitMathRoundDouble(HInvoke * invoke)662 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
663   // See intrinsics.h.
664   if (kRoundIsPlusPointFive) {
665     CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
666   }
667 }
668 
VisitMathRoundDouble(HInvoke * invoke)669 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
670   LocationSummary* locations = invoke->GetLocations();
671   if (locations->WillCall()) {
672     InvokeOutOfLineIntrinsic(codegen_, invoke);
673     return;
674   }
675 
676   // Implement RoundDouble as t1 = floor(input + 0.5);  convert to long.
677   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
678   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
679   XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
680   NearLabel done, nan;
681   X86_64Assembler* assembler = GetAssembler();
682 
683   // Load 0.5 into inPlusPointFive.
684   __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
685 
686   // Add in the input.
687   __ addsd(inPlusPointFive, in);
688 
689   // And truncate to an integer.
690   __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
691 
692   // Load maxLong into out.
693   codegen_->Load64BitValue(out, kPrimLongMax);
694 
695   // if inPlusPointFive >= maxLong goto done
696   __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
697   __ j(kAboveEqual, &done);
698 
699   // if input == NaN goto nan
700   __ j(kUnordered, &nan);
701 
702   // output = double-to-long-truncate(input)
703   __ cvttsd2si(out, inPlusPointFive, /* is64bit */ true);
704   __ jmp(&done);
705   __ Bind(&nan);
706 
707   //  output = 0
708   __ xorl(out, out);
709   __ Bind(&done);
710 }
711 
CreateFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)712 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
713                                       HInvoke* invoke) {
714   LocationSummary* locations = new (arena) LocationSummary(invoke,
715                                                            LocationSummary::kCall,
716                                                            kIntrinsified);
717   InvokeRuntimeCallingConvention calling_convention;
718   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
719   locations->SetOut(Location::FpuRegisterLocation(XMM0));
720 
721   // We have to ensure that the native code doesn't clobber the XMM registers which are
722   // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
723   // saved in the prologue and properly restored.
724   for (auto fp_reg : non_volatile_xmm_regs) {
725     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
726   }
727 }
728 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)729 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
730                           QuickEntrypointEnum entry) {
731   LocationSummary* locations = invoke->GetLocations();
732   DCHECK(locations->WillCall());
733   DCHECK(invoke->IsInvokeStaticOrDirect());
734   X86_64Assembler* assembler = codegen->GetAssembler();
735 
736   __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64WordSize>(entry), true));
737   codegen->RecordPcInfo(invoke, invoke->GetDexPc());
738 }
739 
VisitMathCos(HInvoke * invoke)740 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
741   CreateFPToFPCallLocations(arena_, invoke);
742 }
743 
VisitMathCos(HInvoke * invoke)744 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
745   GenFPToFPCall(invoke, codegen_, kQuickCos);
746 }
747 
VisitMathSin(HInvoke * invoke)748 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
749   CreateFPToFPCallLocations(arena_, invoke);
750 }
751 
VisitMathSin(HInvoke * invoke)752 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
753   GenFPToFPCall(invoke, codegen_, kQuickSin);
754 }
755 
VisitMathAcos(HInvoke * invoke)756 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
757   CreateFPToFPCallLocations(arena_, invoke);
758 }
759 
VisitMathAcos(HInvoke * invoke)760 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
761   GenFPToFPCall(invoke, codegen_, kQuickAcos);
762 }
763 
VisitMathAsin(HInvoke * invoke)764 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
765   CreateFPToFPCallLocations(arena_, invoke);
766 }
767 
VisitMathAsin(HInvoke * invoke)768 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
769   GenFPToFPCall(invoke, codegen_, kQuickAsin);
770 }
771 
VisitMathAtan(HInvoke * invoke)772 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
773   CreateFPToFPCallLocations(arena_, invoke);
774 }
775 
VisitMathAtan(HInvoke * invoke)776 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
777   GenFPToFPCall(invoke, codegen_, kQuickAtan);
778 }
779 
VisitMathCbrt(HInvoke * invoke)780 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
781   CreateFPToFPCallLocations(arena_, invoke);
782 }
783 
VisitMathCbrt(HInvoke * invoke)784 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
785   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
786 }
787 
VisitMathCosh(HInvoke * invoke)788 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
789   CreateFPToFPCallLocations(arena_, invoke);
790 }
791 
VisitMathCosh(HInvoke * invoke)792 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
793   GenFPToFPCall(invoke, codegen_, kQuickCosh);
794 }
795 
VisitMathExp(HInvoke * invoke)796 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
797   CreateFPToFPCallLocations(arena_, invoke);
798 }
799 
VisitMathExp(HInvoke * invoke)800 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
801   GenFPToFPCall(invoke, codegen_, kQuickExp);
802 }
803 
VisitMathExpm1(HInvoke * invoke)804 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
805   CreateFPToFPCallLocations(arena_, invoke);
806 }
807 
VisitMathExpm1(HInvoke * invoke)808 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
809   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
810 }
811 
VisitMathLog(HInvoke * invoke)812 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
813   CreateFPToFPCallLocations(arena_, invoke);
814 }
815 
VisitMathLog(HInvoke * invoke)816 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
817   GenFPToFPCall(invoke, codegen_, kQuickLog);
818 }
819 
VisitMathLog10(HInvoke * invoke)820 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
821   CreateFPToFPCallLocations(arena_, invoke);
822 }
823 
VisitMathLog10(HInvoke * invoke)824 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
825   GenFPToFPCall(invoke, codegen_, kQuickLog10);
826 }
827 
VisitMathSinh(HInvoke * invoke)828 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
829   CreateFPToFPCallLocations(arena_, invoke);
830 }
831 
VisitMathSinh(HInvoke * invoke)832 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
833   GenFPToFPCall(invoke, codegen_, kQuickSinh);
834 }
835 
VisitMathTan(HInvoke * invoke)836 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
837   CreateFPToFPCallLocations(arena_, invoke);
838 }
839 
VisitMathTan(HInvoke * invoke)840 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
841   GenFPToFPCall(invoke, codegen_, kQuickTan);
842 }
843 
VisitMathTanh(HInvoke * invoke)844 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
845   CreateFPToFPCallLocations(arena_, invoke);
846 }
847 
VisitMathTanh(HInvoke * invoke)848 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
849   GenFPToFPCall(invoke, codegen_, kQuickTanh);
850 }
851 
CreateFPFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)852 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
853                                         HInvoke* invoke) {
854   LocationSummary* locations = new (arena) LocationSummary(invoke,
855                                                            LocationSummary::kCall,
856                                                            kIntrinsified);
857   InvokeRuntimeCallingConvention calling_convention;
858   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
859   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
860   locations->SetOut(Location::FpuRegisterLocation(XMM0));
861 
862   // We have to ensure that the native code doesn't clobber the XMM registers which are
863   // non-volatile for ART, but volatile for Native calls.  This will ensure that they are
864   // saved in the prologue and properly restored.
865   for (auto fp_reg : non_volatile_xmm_regs) {
866     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
867   }
868 }
869 
VisitMathAtan2(HInvoke * invoke)870 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
871   CreateFPFPToFPCallLocations(arena_, invoke);
872 }
873 
VisitMathAtan2(HInvoke * invoke)874 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
875   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
876 }
877 
VisitMathHypot(HInvoke * invoke)878 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
879   CreateFPFPToFPCallLocations(arena_, invoke);
880 }
881 
VisitMathHypot(HInvoke * invoke)882 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
883   GenFPToFPCall(invoke, codegen_, kQuickHypot);
884 }
885 
VisitMathNextAfter(HInvoke * invoke)886 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
887   CreateFPFPToFPCallLocations(arena_, invoke);
888 }
889 
VisitMathNextAfter(HInvoke * invoke)890 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
891   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
892 }
893 
VisitStringCharAt(HInvoke * invoke)894 void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
895   // The inputs plus one temp.
896   LocationSummary* locations = new (arena_) LocationSummary(invoke,
897                                                             LocationSummary::kCallOnSlowPath,
898                                                             kIntrinsified);
899   locations->SetInAt(0, Location::RequiresRegister());
900   locations->SetInAt(1, Location::RequiresRegister());
901   locations->SetOut(Location::SameAsFirstInput());
902   locations->AddTemp(Location::RequiresRegister());
903 }
904 
VisitStringCharAt(HInvoke * invoke)905 void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
906   LocationSummary* locations = invoke->GetLocations();
907 
908   // Location of reference to data array.
909   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
910   // Location of count.
911   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
912 
913   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
914   CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
915   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
916 
917   // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
918   //       the cost.
919   // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
920   //       we will not optimize the code for constants (which would save a register).
921 
922   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
923   codegen_->AddSlowPath(slow_path);
924 
925   X86_64Assembler* assembler = GetAssembler();
926 
927   __ cmpl(idx, Address(obj, count_offset));
928   codegen_->MaybeRecordImplicitNullCheck(invoke);
929   __ j(kAboveEqual, slow_path->GetEntryLabel());
930 
931   // out = out[2*idx].
932   __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
933 
934   __ Bind(slow_path->GetExitLabel());
935 }
936 
VisitSystemArrayCopyChar(HInvoke * invoke)937 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
938   // Check to see if we have known failures that will cause us to have to bail out
939   // to the runtime, and just generate the runtime call directly.
940   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
941   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
942 
943   // The positions must be non-negative.
944   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
945       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
946     // We will have to fail anyways.
947     return;
948   }
949 
950   // The length must be > 0.
951   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
952   if (length != nullptr) {
953     int32_t len = length->GetValue();
954     if (len < 0) {
955       // Just call as normal.
956       return;
957     }
958   }
959 
960   LocationSummary* locations = new (arena_) LocationSummary(invoke,
961                                                             LocationSummary::kCallOnSlowPath,
962                                                             kIntrinsified);
963   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
964   locations->SetInAt(0, Location::RequiresRegister());
965   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
966   locations->SetInAt(2, Location::RequiresRegister());
967   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
968   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
969 
970   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
971   locations->AddTemp(Location::RegisterLocation(RSI));
972   locations->AddTemp(Location::RegisterLocation(RDI));
973   locations->AddTemp(Location::RegisterLocation(RCX));
974 }
975 
CheckPosition(X86_64Assembler * assembler,Location pos,CpuRegister input,Location length,SlowPathCode * slow_path,CpuRegister input_len,CpuRegister temp,bool length_is_input_length=false)976 static void CheckPosition(X86_64Assembler* assembler,
977                           Location pos,
978                           CpuRegister input,
979                           Location length,
980                           SlowPathCode* slow_path,
981                           CpuRegister input_len,
982                           CpuRegister temp,
983                           bool length_is_input_length = false) {
984   // Where is the length in the Array?
985   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
986 
987   if (pos.IsConstant()) {
988     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
989     if (pos_const == 0) {
990       if (!length_is_input_length) {
991         // Check that length(input) >= length.
992         if (length.IsConstant()) {
993           __ cmpl(Address(input, length_offset),
994                   Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
995         } else {
996           __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>());
997         }
998         __ j(kLess, slow_path->GetEntryLabel());
999       }
1000     } else {
1001       // Check that length(input) >= pos.
1002       __ movl(input_len, Address(input, length_offset));
1003       __ cmpl(input_len, Immediate(pos_const));
1004       __ j(kLess, slow_path->GetEntryLabel());
1005 
1006       // Check that (length(input) - pos) >= length.
1007       __ leal(temp, Address(input_len, -pos_const));
1008       if (length.IsConstant()) {
1009         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1010       } else {
1011         __ cmpl(temp, length.AsRegister<CpuRegister>());
1012       }
1013       __ j(kLess, slow_path->GetEntryLabel());
1014     }
1015   } else if (length_is_input_length) {
1016     // The only way the copy can succeed is if pos is zero.
1017     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
1018     __ testl(pos_reg, pos_reg);
1019     __ j(kNotEqual, slow_path->GetEntryLabel());
1020   } else {
1021     // Check that pos >= 0.
1022     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
1023     __ testl(pos_reg, pos_reg);
1024     __ j(kLess, slow_path->GetEntryLabel());
1025 
1026     // Check that pos <= length(input).
1027     __ cmpl(Address(input, length_offset), pos_reg);
1028     __ j(kLess, slow_path->GetEntryLabel());
1029 
1030     // Check that (length(input) - pos) >= length.
1031     __ movl(temp, Address(input, length_offset));
1032     __ subl(temp, pos_reg);
1033     if (length.IsConstant()) {
1034       __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1035     } else {
1036       __ cmpl(temp, length.AsRegister<CpuRegister>());
1037     }
1038     __ j(kLess, slow_path->GetEntryLabel());
1039   }
1040 }
1041 
VisitSystemArrayCopyChar(HInvoke * invoke)1042 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
1043   X86_64Assembler* assembler = GetAssembler();
1044   LocationSummary* locations = invoke->GetLocations();
1045 
1046   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
1047   Location src_pos = locations->InAt(1);
1048   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
1049   Location dest_pos = locations->InAt(3);
1050   Location length = locations->InAt(4);
1051 
1052   // Temporaries that we need for MOVSW.
1053   CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
1054   DCHECK_EQ(src_base.AsRegister(), RSI);
1055   CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
1056   DCHECK_EQ(dest_base.AsRegister(), RDI);
1057   CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
1058   DCHECK_EQ(count.AsRegister(), RCX);
1059 
1060   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1061   codegen_->AddSlowPath(slow_path);
1062 
1063   // Bail out if the source and destination are the same.
1064   __ cmpl(src, dest);
1065   __ j(kEqual, slow_path->GetEntryLabel());
1066 
1067   // Bail out if the source is null.
1068   __ testl(src, src);
1069   __ j(kEqual, slow_path->GetEntryLabel());
1070 
1071   // Bail out if the destination is null.
1072   __ testl(dest, dest);
1073   __ j(kEqual, slow_path->GetEntryLabel());
1074 
1075   // If the length is negative, bail out.
1076   // We have already checked in the LocationsBuilder for the constant case.
1077   if (!length.IsConstant()) {
1078     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1079     __ j(kLess, slow_path->GetEntryLabel());
1080   }
1081 
1082   // Validity checks: source.
1083   CheckPosition(assembler, src_pos, src, length, slow_path, src_base, dest_base);
1084 
1085   // Validity checks: dest.
1086   CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base, dest_base);
1087 
1088   // We need the count in RCX.
1089   if (length.IsConstant()) {
1090     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1091   } else {
1092     __ movl(count, length.AsRegister<CpuRegister>());
1093   }
1094 
1095   // Okay, everything checks out.  Finally time to do the copy.
1096   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1097   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1098   DCHECK_EQ(char_size, 2u);
1099 
1100   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1101 
1102   if (src_pos.IsConstant()) {
1103     int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
1104     __ leal(src_base, Address(src, char_size * src_pos_const + data_offset));
1105   } else {
1106     __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(),
1107                               ScaleFactor::TIMES_2, data_offset));
1108   }
1109   if (dest_pos.IsConstant()) {
1110     int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1111     __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset));
1112   } else {
1113     __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(),
1114                                ScaleFactor::TIMES_2, data_offset));
1115   }
1116 
1117   // Do the move.
1118   __ rep_movsw();
1119 
1120   __ Bind(slow_path->GetExitLabel());
1121 }
1122 
1123 
VisitSystemArrayCopy(HInvoke * invoke)1124 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
1125   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1126 }
1127 
1128 // TODO: Implement read barriers in the SystemArrayCopy intrinsic.
1129 // Note that this code path is not used (yet) because we do not
1130 // intrinsify methods that can go into the IntrinsicSlowPathX86_64
1131 // slow path.
VisitSystemArrayCopy(HInvoke * invoke)1132 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
1133   X86_64Assembler* assembler = GetAssembler();
1134   LocationSummary* locations = invoke->GetLocations();
1135 
1136   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1137   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1138   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1139   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1140 
1141   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
1142   Location src_pos = locations->InAt(1);
1143   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
1144   Location dest_pos = locations->InAt(3);
1145   Location length = locations->InAt(4);
1146   CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1147   CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1148   CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
1149 
1150   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1151   codegen_->AddSlowPath(slow_path);
1152 
1153   NearLabel conditions_on_positions_validated;
1154   SystemArrayCopyOptimizations optimizations(invoke);
1155 
1156   // If source and destination are the same, we go to slow path if we need to do
1157   // forward copying.
1158   if (src_pos.IsConstant()) {
1159     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1160     if (dest_pos.IsConstant()) {
1161       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1162       if (optimizations.GetDestinationIsSource()) {
1163         // Checked when building locations.
1164         DCHECK_GE(src_pos_constant, dest_pos_constant);
1165       } else if (src_pos_constant < dest_pos_constant) {
1166         __ cmpl(src, dest);
1167         __ j(kEqual, slow_path->GetEntryLabel());
1168       }
1169     } else {
1170       if (!optimizations.GetDestinationIsSource()) {
1171         __ cmpl(src, dest);
1172         __ j(kNotEqual, &conditions_on_positions_validated);
1173       }
1174       __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
1175       __ j(kGreater, slow_path->GetEntryLabel());
1176     }
1177   } else {
1178     if (!optimizations.GetDestinationIsSource()) {
1179       __ cmpl(src, dest);
1180       __ j(kNotEqual, &conditions_on_positions_validated);
1181     }
1182     if (dest_pos.IsConstant()) {
1183       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1184       __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
1185       __ j(kLess, slow_path->GetEntryLabel());
1186     } else {
1187       __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
1188       __ j(kLess, slow_path->GetEntryLabel());
1189     }
1190   }
1191 
1192   __ Bind(&conditions_on_positions_validated);
1193 
1194   if (!optimizations.GetSourceIsNotNull()) {
1195     // Bail out if the source is null.
1196     __ testl(src, src);
1197     __ j(kEqual, slow_path->GetEntryLabel());
1198   }
1199 
1200   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1201     // Bail out if the destination is null.
1202     __ testl(dest, dest);
1203     __ j(kEqual, slow_path->GetEntryLabel());
1204   }
1205 
1206   // If the length is negative, bail out.
1207   // We have already checked in the LocationsBuilder for the constant case.
1208   if (!length.IsConstant() &&
1209       !optimizations.GetCountIsSourceLength() &&
1210       !optimizations.GetCountIsDestinationLength()) {
1211     __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1212     __ j(kLess, slow_path->GetEntryLabel());
1213   }
1214 
1215   // Validity checks: source.
1216   CheckPosition(assembler,
1217                 src_pos,
1218                 src,
1219                 length,
1220                 slow_path,
1221                 temp1,
1222                 temp2,
1223                 optimizations.GetCountIsSourceLength());
1224 
1225   // Validity checks: dest.
1226   CheckPosition(assembler,
1227                 dest_pos,
1228                 dest,
1229                 length,
1230                 slow_path,
1231                 temp1,
1232                 temp2,
1233                 optimizations.GetCountIsDestinationLength());
1234 
1235   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1236     // Check whether all elements of the source array are assignable to the component
1237     // type of the destination array. We do two checks: the classes are the same,
1238     // or the destination is Object[]. If none of these checks succeed, we go to the
1239     // slow path.
1240     __ movl(temp1, Address(dest, class_offset));
1241     __ movl(temp2, Address(src, class_offset));
1242     bool did_unpoison = false;
1243     if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1244         !optimizations.GetSourceIsNonPrimitiveArray()) {
1245       // One or two of the references need to be unpoisoned. Unpoison them
1246       // both to make the identity check valid.
1247       __ MaybeUnpoisonHeapReference(temp1);
1248       __ MaybeUnpoisonHeapReference(temp2);
1249       did_unpoison = true;
1250     }
1251 
1252     if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1253       // Bail out if the destination is not a non primitive array.
1254       // /* HeapReference<Class> */ TMP = temp1->component_type_
1255       __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1256       __ testl(CpuRegister(TMP), CpuRegister(TMP));
1257       __ j(kEqual, slow_path->GetEntryLabel());
1258       __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1259       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1260       __ j(kNotEqual, slow_path->GetEntryLabel());
1261     }
1262 
1263     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1264       // Bail out if the source is not a non primitive array.
1265       // /* HeapReference<Class> */ TMP = temp2->component_type_
1266       __ movl(CpuRegister(TMP), Address(temp2, component_offset));
1267       __ testl(CpuRegister(TMP), CpuRegister(TMP));
1268       __ j(kEqual, slow_path->GetEntryLabel());
1269       __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1270       __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1271       __ j(kNotEqual, slow_path->GetEntryLabel());
1272     }
1273 
1274     __ cmpl(temp1, temp2);
1275 
1276     if (optimizations.GetDestinationIsTypedObjectArray()) {
1277       NearLabel do_copy;
1278       __ j(kEqual, &do_copy);
1279       if (!did_unpoison) {
1280         __ MaybeUnpoisonHeapReference(temp1);
1281       }
1282       // /* HeapReference<Class> */ temp1 = temp1->component_type_
1283       __ movl(temp1, Address(temp1, component_offset));
1284       __ MaybeUnpoisonHeapReference(temp1);
1285       // /* HeapReference<Class> */ temp1 = temp1->super_class_
1286       __ movl(temp1, Address(temp1, super_offset));
1287       // No need to unpoison the result, we're comparing against null.
1288       __ testl(temp1, temp1);
1289       __ j(kNotEqual, slow_path->GetEntryLabel());
1290       __ Bind(&do_copy);
1291     } else {
1292       __ j(kNotEqual, slow_path->GetEntryLabel());
1293     }
1294   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1295     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1296     // Bail out if the source is not a non primitive array.
1297     // /* HeapReference<Class> */ temp1 = src->klass_
1298     __ movl(temp1, Address(src, class_offset));
1299     __ MaybeUnpoisonHeapReference(temp1);
1300     // /* HeapReference<Class> */ TMP = temp1->component_type_
1301     __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1302     __ testl(CpuRegister(TMP), CpuRegister(TMP));
1303     __ j(kEqual, slow_path->GetEntryLabel());
1304     __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1305     __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1306     __ j(kNotEqual, slow_path->GetEntryLabel());
1307   }
1308 
1309   // Compute base source address, base destination address, and end source address.
1310 
1311   uint32_t element_size = sizeof(int32_t);
1312   uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
1313   if (src_pos.IsConstant()) {
1314     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1315     __ leal(temp1, Address(src, element_size * constant + offset));
1316   } else {
1317     __ leal(temp1, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
1318   }
1319 
1320   if (dest_pos.IsConstant()) {
1321     int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1322     __ leal(temp2, Address(dest, element_size * constant + offset));
1323   } else {
1324     __ leal(temp2, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
1325   }
1326 
1327   if (length.IsConstant()) {
1328     int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
1329     __ leal(temp3, Address(temp1, element_size * constant));
1330   } else {
1331     __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
1332   }
1333 
1334   // Iterate over the arrays and do a raw copy of the objects. We don't need to
1335   // poison/unpoison, nor do any read barrier as the next uses of the destination
1336   // array will do it.
1337   NearLabel loop, done;
1338   __ cmpl(temp1, temp3);
1339   __ j(kEqual, &done);
1340   __ Bind(&loop);
1341   __ movl(CpuRegister(TMP), Address(temp1, 0));
1342   __ movl(Address(temp2, 0), CpuRegister(TMP));
1343   __ addl(temp1, Immediate(element_size));
1344   __ addl(temp2, Immediate(element_size));
1345   __ cmpl(temp1, temp3);
1346   __ j(kNotEqual, &loop);
1347   __ Bind(&done);
1348 
1349   // We only need one card marking on the destination array.
1350   codegen_->MarkGCCard(temp1,
1351                        temp2,
1352                        dest,
1353                        CpuRegister(kNoRegister),
1354                        /* value_can_be_null */ false);
1355 
1356   __ Bind(slow_path->GetExitLabel());
1357 }
1358 
VisitStringCompareTo(HInvoke * invoke)1359 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1360   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1361                                                             LocationSummary::kCall,
1362                                                             kIntrinsified);
1363   InvokeRuntimeCallingConvention calling_convention;
1364   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1365   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1366   locations->SetOut(Location::RegisterLocation(RAX));
1367 }
1368 
VisitStringCompareTo(HInvoke * invoke)1369 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1370   X86_64Assembler* assembler = GetAssembler();
1371   LocationSummary* locations = invoke->GetLocations();
1372 
1373   // Note that the null check must have been done earlier.
1374   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1375 
1376   CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1377   __ testl(argument, argument);
1378   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1379   codegen_->AddSlowPath(slow_path);
1380   __ j(kEqual, slow_path->GetEntryLabel());
1381 
1382   __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo),
1383                                   /* no_rip */ true));
1384   __ Bind(slow_path->GetExitLabel());
1385 }
1386 
VisitStringEquals(HInvoke * invoke)1387 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1388   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1389                                                             LocationSummary::kNoCall,
1390                                                             kIntrinsified);
1391   locations->SetInAt(0, Location::RequiresRegister());
1392   locations->SetInAt(1, Location::RequiresRegister());
1393 
1394   // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1395   locations->AddTemp(Location::RegisterLocation(RCX));
1396   locations->AddTemp(Location::RegisterLocation(RDI));
1397 
1398   // Set output, RSI needed for repe_cmpsq instruction anyways.
1399   locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1400 }
1401 
VisitStringEquals(HInvoke * invoke)1402 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1403   X86_64Assembler* assembler = GetAssembler();
1404   LocationSummary* locations = invoke->GetLocations();
1405 
1406   CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1407   CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1408   CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1409   CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1410   CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1411 
1412   NearLabel end, return_true, return_false;
1413 
1414   // Get offsets of count, value, and class fields within a string object.
1415   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1416   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1417   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1418 
1419   // Note that the null check must have been done earlier.
1420   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1421 
1422   // Check if input is null, return false if it is.
1423   __ testl(arg, arg);
1424   __ j(kEqual, &return_false);
1425 
1426   // Instanceof check for the argument by comparing class fields.
1427   // All string objects must have the same type since String cannot be subclassed.
1428   // Receiver must be a string object, so its class field is equal to all strings' class fields.
1429   // If the argument is a string object, its class field must be equal to receiver's class field.
1430   __ movl(rcx, Address(str, class_offset));
1431   __ cmpl(rcx, Address(arg, class_offset));
1432   __ j(kNotEqual, &return_false);
1433 
1434   // Reference equality check, return true if same reference.
1435   __ cmpl(str, arg);
1436   __ j(kEqual, &return_true);
1437 
1438   // Load length of receiver string.
1439   __ movl(rcx, Address(str, count_offset));
1440   // Check if lengths are equal, return false if they're not.
1441   __ cmpl(rcx, Address(arg, count_offset));
1442   __ j(kNotEqual, &return_false);
1443   // Return true if both strings are empty.
1444   __ jrcxz(&return_true);
1445 
1446   // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1447   __ leal(rsi, Address(str, value_offset));
1448   __ leal(rdi, Address(arg, value_offset));
1449 
1450   // Divide string length by 4 and adjust for lengths not divisible by 4.
1451   __ addl(rcx, Immediate(3));
1452   __ shrl(rcx, Immediate(2));
1453 
1454   // Assertions that must hold in order to compare strings 4 characters at a time.
1455   DCHECK_ALIGNED(value_offset, 8);
1456   static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1457 
1458   // Loop to compare strings four characters at a time starting at the beginning of the string.
1459   __ repe_cmpsq();
1460   // If strings are not equal, zero flag will be cleared.
1461   __ j(kNotEqual, &return_false);
1462 
1463   // Return true and exit the function.
1464   // If loop does not result in returning false, we return true.
1465   __ Bind(&return_true);
1466   __ movl(rsi, Immediate(1));
1467   __ jmp(&end);
1468 
1469   // Return false and exit the function.
1470   __ Bind(&return_false);
1471   __ xorl(rsi, rsi);
1472   __ Bind(&end);
1473 }
1474 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1475 static void CreateStringIndexOfLocations(HInvoke* invoke,
1476                                          ArenaAllocator* allocator,
1477                                          bool start_at_zero) {
1478   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1479                                                                LocationSummary::kCallOnSlowPath,
1480                                                                kIntrinsified);
1481   // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1482   locations->SetInAt(0, Location::RegisterLocation(RDI));
1483   // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1484   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1485   // of the instruction explicitly.
1486   // Note: This works as we don't clobber RAX anywhere.
1487   locations->SetInAt(1, Location::RegisterLocation(RAX));
1488   if (!start_at_zero) {
1489     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1490   }
1491   // As we clobber RDI during execution anyways, also use it as the output.
1492   locations->SetOut(Location::SameAsFirstInput());
1493 
1494   // repne scasw uses RCX as the counter.
1495   locations->AddTemp(Location::RegisterLocation(RCX));
1496   // Need another temporary to be able to compute the result.
1497   locations->AddTemp(Location::RequiresRegister());
1498 }
1499 
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,ArenaAllocator * allocator,bool start_at_zero)1500 static void GenerateStringIndexOf(HInvoke* invoke,
1501                                   X86_64Assembler* assembler,
1502                                   CodeGeneratorX86_64* codegen,
1503                                   ArenaAllocator* allocator,
1504                                   bool start_at_zero) {
1505   LocationSummary* locations = invoke->GetLocations();
1506 
1507   // Note that the null check must have been done earlier.
1508   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1509 
1510   CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1511   CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1512   CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1513   CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1514   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1515 
1516   // Check our assumptions for registers.
1517   DCHECK_EQ(string_obj.AsRegister(), RDI);
1518   DCHECK_EQ(search_value.AsRegister(), RAX);
1519   DCHECK_EQ(counter.AsRegister(), RCX);
1520   DCHECK_EQ(out.AsRegister(), RDI);
1521 
1522   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1523   // or directly dispatch if we have a constant.
1524   SlowPathCode* slow_path = nullptr;
1525   if (invoke->InputAt(1)->IsIntConstant()) {
1526     if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
1527     std::numeric_limits<uint16_t>::max()) {
1528       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1529       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1530       slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
1531       codegen->AddSlowPath(slow_path);
1532       __ jmp(slow_path->GetEntryLabel());
1533       __ Bind(slow_path->GetExitLabel());
1534       return;
1535     }
1536   } else {
1537     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1538     slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
1539     codegen->AddSlowPath(slow_path);
1540     __ j(kAbove, slow_path->GetEntryLabel());
1541   }
1542 
1543   // From here down, we know that we are looking for a char that fits in 16 bits.
1544   // Location of reference to data array within the String object.
1545   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1546   // Location of count within the String object.
1547   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1548 
1549   // Load string length, i.e., the count field of the string.
1550   __ movl(string_length, Address(string_obj, count_offset));
1551 
1552   // Do a length check.
1553   // TODO: Support jecxz.
1554   NearLabel not_found_label;
1555   __ testl(string_length, string_length);
1556   __ j(kEqual, &not_found_label);
1557 
1558   if (start_at_zero) {
1559     // Number of chars to scan is the same as the string length.
1560     __ movl(counter, string_length);
1561 
1562     // Move to the start of the string.
1563     __ addq(string_obj, Immediate(value_offset));
1564   } else {
1565     CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1566 
1567     // Do a start_index check.
1568     __ cmpl(start_index, string_length);
1569     __ j(kGreaterEqual, &not_found_label);
1570 
1571     // Ensure we have a start index >= 0;
1572     __ xorl(counter, counter);
1573     __ cmpl(start_index, Immediate(0));
1574     __ cmov(kGreater, counter, start_index, /* is64bit */ false);  // 32-bit copy is enough.
1575 
1576     // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1577     __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1578 
1579     // Now update ecx, the work counter: it's gonna be string.length - start_index.
1580     __ negq(counter);  // Needs to be 64-bit negation, as the address computation is 64-bit.
1581     __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1582   }
1583 
1584   // Everything is set up for repne scasw:
1585   //   * Comparison address in RDI.
1586   //   * Counter in ECX.
1587   __ repne_scasw();
1588 
1589   // Did we find a match?
1590   __ j(kNotEqual, &not_found_label);
1591 
1592   // Yes, we matched.  Compute the index of the result.
1593   __ subl(string_length, counter);
1594   __ leal(out, Address(string_length, -1));
1595 
1596   NearLabel done;
1597   __ jmp(&done);
1598 
1599   // Failed to match; return -1.
1600   __ Bind(&not_found_label);
1601   __ movl(out, Immediate(-1));
1602 
1603   // And join up at the end.
1604   __ Bind(&done);
1605   if (slow_path != nullptr) {
1606     __ Bind(slow_path->GetExitLabel());
1607   }
1608 }
1609 
VisitStringIndexOf(HInvoke * invoke)1610 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1611   CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
1612 }
1613 
VisitStringIndexOf(HInvoke * invoke)1614 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1615   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1616 }
1617 
VisitStringIndexOfAfter(HInvoke * invoke)1618 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1619   CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
1620 }
1621 
VisitStringIndexOfAfter(HInvoke * invoke)1622 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1623   GenerateStringIndexOf(
1624       invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1625 }
1626 
VisitStringNewStringFromBytes(HInvoke * invoke)1627 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1628   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1629                                                             LocationSummary::kCall,
1630                                                             kIntrinsified);
1631   InvokeRuntimeCallingConvention calling_convention;
1632   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1633   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1634   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1635   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1636   locations->SetOut(Location::RegisterLocation(RAX));
1637 }
1638 
VisitStringNewStringFromBytes(HInvoke * invoke)1639 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1640   X86_64Assembler* assembler = GetAssembler();
1641   LocationSummary* locations = invoke->GetLocations();
1642 
1643   CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1644   __ testl(byte_array, byte_array);
1645   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1646   codegen_->AddSlowPath(slow_path);
1647   __ j(kEqual, slow_path->GetEntryLabel());
1648 
1649   __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes),
1650                                   /* no_rip */ true));
1651   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1652   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1653   __ Bind(slow_path->GetExitLabel());
1654 }
1655 
VisitStringNewStringFromChars(HInvoke * invoke)1656 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1657   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1658                                                             LocationSummary::kCall,
1659                                                             kIntrinsified);
1660   InvokeRuntimeCallingConvention calling_convention;
1661   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1662   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1663   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1664   locations->SetOut(Location::RegisterLocation(RAX));
1665 }
1666 
VisitStringNewStringFromChars(HInvoke * invoke)1667 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1668   X86_64Assembler* assembler = GetAssembler();
1669 
1670   // No need to emit code checking whether `locations->InAt(2)` is a null
1671   // pointer, as callers of the native method
1672   //
1673   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1674   //
1675   // all include a null check on `data` before calling that method.
1676   __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars),
1677                                   /* no_rip */ true));
1678   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1679   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1680 }
1681 
VisitStringNewStringFromString(HInvoke * invoke)1682 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1683   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1684                                                             LocationSummary::kCall,
1685                                                             kIntrinsified);
1686   InvokeRuntimeCallingConvention calling_convention;
1687   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1688   locations->SetOut(Location::RegisterLocation(RAX));
1689 }
1690 
VisitStringNewStringFromString(HInvoke * invoke)1691 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1692   X86_64Assembler* assembler = GetAssembler();
1693   LocationSummary* locations = invoke->GetLocations();
1694 
1695   CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1696   __ testl(string_to_copy, string_to_copy);
1697   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1698   codegen_->AddSlowPath(slow_path);
1699   __ j(kEqual, slow_path->GetEntryLabel());
1700 
1701   __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString),
1702                                   /* no_rip */ true));
1703   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1704   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1705   __ Bind(slow_path->GetExitLabel());
1706 }
1707 
VisitStringGetCharsNoCheck(HInvoke * invoke)1708 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1709   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1710   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1711                                                             LocationSummary::kNoCall,
1712                                                             kIntrinsified);
1713   locations->SetInAt(0, Location::RequiresRegister());
1714   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1715   locations->SetInAt(2, Location::RequiresRegister());
1716   locations->SetInAt(3, Location::RequiresRegister());
1717   locations->SetInAt(4, Location::RequiresRegister());
1718 
1719   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1720   locations->AddTemp(Location::RegisterLocation(RSI));
1721   locations->AddTemp(Location::RegisterLocation(RDI));
1722   locations->AddTemp(Location::RegisterLocation(RCX));
1723 }
1724 
VisitStringGetCharsNoCheck(HInvoke * invoke)1725 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1726   X86_64Assembler* assembler = GetAssembler();
1727   LocationSummary* locations = invoke->GetLocations();
1728 
1729   size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
1730   // Location of data in char array buffer.
1731   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1732   // Location of char array data in string.
1733   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1734 
1735   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1736   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1737   Location srcBegin = locations->InAt(1);
1738   int srcBegin_value =
1739     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1740   CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1741   CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1742   CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1743 
1744   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1745   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1746   DCHECK_EQ(char_size, 2u);
1747 
1748   // Compute the address of the destination buffer.
1749   __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1750 
1751   // Compute the address of the source string.
1752   if (srcBegin.IsConstant()) {
1753     // Compute the address of the source string by adding the number of chars from
1754     // the source beginning to the value offset of a string.
1755     __ leaq(CpuRegister(RSI), Address(obj, srcBegin_value * char_size + value_offset));
1756   } else {
1757     __ leaq(CpuRegister(RSI), Address(obj, srcBegin.AsRegister<CpuRegister>(),
1758                                       ScaleFactor::TIMES_2, value_offset));
1759   }
1760 
1761   // Compute the number of chars (words) to move.
1762   __ movl(CpuRegister(RCX), srcEnd);
1763   if (srcBegin.IsConstant()) {
1764     if (srcBegin_value != 0) {
1765       __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1766     }
1767   } else {
1768     DCHECK(srcBegin.IsRegister());
1769     __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1770   }
1771 
1772   // Do the move.
1773   __ rep_movsw();
1774 }
1775 
GenPeek(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)1776 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1777   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1778   CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
1779   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1780   // to avoid a SIGBUS.
1781   switch (size) {
1782     case Primitive::kPrimByte:
1783       __ movsxb(out, Address(address, 0));
1784       break;
1785     case Primitive::kPrimShort:
1786       __ movsxw(out, Address(address, 0));
1787       break;
1788     case Primitive::kPrimInt:
1789       __ movl(out, Address(address, 0));
1790       break;
1791     case Primitive::kPrimLong:
1792       __ movq(out, Address(address, 0));
1793       break;
1794     default:
1795       LOG(FATAL) << "Type not recognized for peek: " << size;
1796       UNREACHABLE();
1797   }
1798 }
1799 
VisitMemoryPeekByte(HInvoke * invoke)1800 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1801   CreateIntToIntLocations(arena_, invoke);
1802 }
1803 
VisitMemoryPeekByte(HInvoke * invoke)1804 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1805   GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1806 }
1807 
VisitMemoryPeekIntNative(HInvoke * invoke)1808 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1809   CreateIntToIntLocations(arena_, invoke);
1810 }
1811 
VisitMemoryPeekIntNative(HInvoke * invoke)1812 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1813   GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1814 }
1815 
VisitMemoryPeekLongNative(HInvoke * invoke)1816 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1817   CreateIntToIntLocations(arena_, invoke);
1818 }
1819 
VisitMemoryPeekLongNative(HInvoke * invoke)1820 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1821   GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1822 }
1823 
VisitMemoryPeekShortNative(HInvoke * invoke)1824 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1825   CreateIntToIntLocations(arena_, invoke);
1826 }
1827 
VisitMemoryPeekShortNative(HInvoke * invoke)1828 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1829   GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1830 }
1831 
CreateIntIntToVoidLocations(ArenaAllocator * arena,HInvoke * invoke)1832 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1833   LocationSummary* locations = new (arena) LocationSummary(invoke,
1834                                                            LocationSummary::kNoCall,
1835                                                            kIntrinsified);
1836   locations->SetInAt(0, Location::RequiresRegister());
1837   locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
1838 }
1839 
GenPoke(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)1840 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1841   CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1842   Location value = locations->InAt(1);
1843   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1844   // to avoid a SIGBUS.
1845   switch (size) {
1846     case Primitive::kPrimByte:
1847       if (value.IsConstant()) {
1848         __ movb(Address(address, 0),
1849                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1850       } else {
1851         __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1852       }
1853       break;
1854     case Primitive::kPrimShort:
1855       if (value.IsConstant()) {
1856         __ movw(Address(address, 0),
1857                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1858       } else {
1859         __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1860       }
1861       break;
1862     case Primitive::kPrimInt:
1863       if (value.IsConstant()) {
1864         __ movl(Address(address, 0),
1865                 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1866       } else {
1867         __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1868       }
1869       break;
1870     case Primitive::kPrimLong:
1871       if (value.IsConstant()) {
1872         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1873         DCHECK(IsInt<32>(v));
1874         int32_t v_32 = v;
1875         __ movq(Address(address, 0), Immediate(v_32));
1876       } else {
1877         __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1878       }
1879       break;
1880     default:
1881       LOG(FATAL) << "Type not recognized for poke: " << size;
1882       UNREACHABLE();
1883   }
1884 }
1885 
VisitMemoryPokeByte(HInvoke * invoke)1886 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1887   CreateIntIntToVoidLocations(arena_, invoke);
1888 }
1889 
VisitMemoryPokeByte(HInvoke * invoke)1890 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1891   GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1892 }
1893 
VisitMemoryPokeIntNative(HInvoke * invoke)1894 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1895   CreateIntIntToVoidLocations(arena_, invoke);
1896 }
1897 
VisitMemoryPokeIntNative(HInvoke * invoke)1898 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1899   GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1900 }
1901 
VisitMemoryPokeLongNative(HInvoke * invoke)1902 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1903   CreateIntIntToVoidLocations(arena_, invoke);
1904 }
1905 
VisitMemoryPokeLongNative(HInvoke * invoke)1906 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1907   GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1908 }
1909 
VisitMemoryPokeShortNative(HInvoke * invoke)1910 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1911   CreateIntIntToVoidLocations(arena_, invoke);
1912 }
1913 
VisitMemoryPokeShortNative(HInvoke * invoke)1914 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1915   GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1916 }
1917 
VisitThreadCurrentThread(HInvoke * invoke)1918 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1919   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1920                                                             LocationSummary::kNoCall,
1921                                                             kIntrinsified);
1922   locations->SetOut(Location::RequiresRegister());
1923 }
1924 
VisitThreadCurrentThread(HInvoke * invoke)1925 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1926   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1927   GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(),
1928                                                     /* no_rip */ true));
1929 }
1930 
GenUnsafeGet(HInvoke * invoke,Primitive::Type type,bool is_volatile ATTRIBUTE_UNUSED,CodeGeneratorX86_64 * codegen)1931 static void GenUnsafeGet(HInvoke* invoke,
1932                          Primitive::Type type,
1933                          bool is_volatile ATTRIBUTE_UNUSED,
1934                          CodeGeneratorX86_64* codegen) {
1935   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1936   LocationSummary* locations = invoke->GetLocations();
1937   Location base_loc = locations->InAt(1);
1938   CpuRegister base = base_loc.AsRegister<CpuRegister>();
1939   Location offset_loc = locations->InAt(2);
1940   CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1941   Location output_loc = locations->Out();
1942   CpuRegister output = output_loc.AsRegister<CpuRegister>();
1943 
1944   switch (type) {
1945     case Primitive::kPrimInt:
1946       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1947       break;
1948 
1949     case Primitive::kPrimNot: {
1950       if (kEmitCompilerReadBarrier) {
1951         if (kUseBakerReadBarrier) {
1952           Location temp = locations->GetTemp(0);
1953           codegen->GenerateArrayLoadWithBakerReadBarrier(
1954               invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
1955         } else {
1956           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1957           codegen->GenerateReadBarrierSlow(
1958               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1959         }
1960       } else {
1961         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1962         __ MaybeUnpoisonHeapReference(output);
1963       }
1964       break;
1965     }
1966 
1967     case Primitive::kPrimLong:
1968       __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1969       break;
1970 
1971     default:
1972       LOG(FATAL) << "Unsupported op size " << type;
1973       UNREACHABLE();
1974   }
1975 }
1976 
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type)1977 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
1978                                           HInvoke* invoke,
1979                                           Primitive::Type type) {
1980   bool can_call = kEmitCompilerReadBarrier &&
1981       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1982        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1983   LocationSummary* locations = new (arena) LocationSummary(invoke,
1984                                                            can_call ?
1985                                                                LocationSummary::kCallOnSlowPath :
1986                                                                LocationSummary::kNoCall,
1987                                                            kIntrinsified);
1988   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1989   locations->SetInAt(1, Location::RequiresRegister());
1990   locations->SetInAt(2, Location::RequiresRegister());
1991   locations->SetOut(Location::RequiresRegister());
1992   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1993     // We need a temporary register for the read barrier marking slow
1994     // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
1995     locations->AddTemp(Location::RequiresRegister());
1996   }
1997 }
1998 
VisitUnsafeGet(HInvoke * invoke)1999 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
2000   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
2001 }
VisitUnsafeGetVolatile(HInvoke * invoke)2002 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2003   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
2004 }
VisitUnsafeGetLong(HInvoke * invoke)2005 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2006   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
2007 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2008 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2009   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
2010 }
VisitUnsafeGetObject(HInvoke * invoke)2011 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2012   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
2013 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2014 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2015   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
2016 }
2017 
2018 
VisitUnsafeGet(HInvoke * invoke)2019 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
2020   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2021 }
VisitUnsafeGetVolatile(HInvoke * invoke)2022 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2023   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
2024 }
VisitUnsafeGetLong(HInvoke * invoke)2025 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2026   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2027 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2028 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2029   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
2030 }
VisitUnsafeGetObject(HInvoke * invoke)2031 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2032   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2033 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2034 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2035   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
2036 }
2037 
2038 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)2039 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
2040                                                        Primitive::Type type,
2041                                                        HInvoke* invoke) {
2042   LocationSummary* locations = new (arena) LocationSummary(invoke,
2043                                                            LocationSummary::kNoCall,
2044                                                            kIntrinsified);
2045   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2046   locations->SetInAt(1, Location::RequiresRegister());
2047   locations->SetInAt(2, Location::RequiresRegister());
2048   locations->SetInAt(3, Location::RequiresRegister());
2049   if (type == Primitive::kPrimNot) {
2050     // Need temp registers for card-marking.
2051     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2052     locations->AddTemp(Location::RequiresRegister());
2053   }
2054 }
2055 
VisitUnsafePut(HInvoke * invoke)2056 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
2057   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
2058 }
VisitUnsafePutOrdered(HInvoke * invoke)2059 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2060   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
2061 }
VisitUnsafePutVolatile(HInvoke * invoke)2062 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2063   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
2064 }
VisitUnsafePutObject(HInvoke * invoke)2065 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2066   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
2067 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2068 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2069   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
2070 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2071 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2072   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
2073 }
VisitUnsafePutLong(HInvoke * invoke)2074 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2075   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
2076 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2077 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2078   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
2079 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2080 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2081   CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
2082 }
2083 
2084 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2085 // memory model.
GenUnsafePut(LocationSummary * locations,Primitive::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2086 static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
2087                          CodeGeneratorX86_64* codegen) {
2088   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2089   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2090   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2091   CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2092 
2093   if (type == Primitive::kPrimLong) {
2094     __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2095   } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
2096     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2097     __ movl(temp, value);
2098     __ PoisonHeapReference(temp);
2099     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2100   } else {
2101     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2102   }
2103 
2104   if (is_volatile) {
2105     codegen->MemoryFence();
2106   }
2107 
2108   if (type == Primitive::kPrimNot) {
2109     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2110     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2111                         locations->GetTemp(1).AsRegister<CpuRegister>(),
2112                         base,
2113                         value,
2114                         value_can_be_null);
2115   }
2116 }
2117 
VisitUnsafePut(HInvoke * invoke)2118 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
2119   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2120 }
VisitUnsafePutOrdered(HInvoke * invoke)2121 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2122   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2123 }
VisitUnsafePutVolatile(HInvoke * invoke)2124 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2125   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
2126 }
VisitUnsafePutObject(HInvoke * invoke)2127 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2128   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2129 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2130 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2131   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2132 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2133 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2134   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
2135 }
VisitUnsafePutLong(HInvoke * invoke)2136 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2137   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2138 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2139 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2140   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2141 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2142 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2143   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
2144 }
2145 
CreateIntIntIntIntIntToInt(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)2146 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
2147                                        HInvoke* invoke) {
2148   LocationSummary* locations = new (arena) LocationSummary(invoke,
2149                                                            LocationSummary::kNoCall,
2150                                                            kIntrinsified);
2151   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2152   locations->SetInAt(1, Location::RequiresRegister());
2153   locations->SetInAt(2, Location::RequiresRegister());
2154   // expected value must be in EAX/RAX.
2155   locations->SetInAt(3, Location::RegisterLocation(RAX));
2156   locations->SetInAt(4, Location::RequiresRegister());
2157 
2158   locations->SetOut(Location::RequiresRegister());
2159   if (type == Primitive::kPrimNot) {
2160     // Need temp registers for card-marking.
2161     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2162     locations->AddTemp(Location::RequiresRegister());
2163   }
2164 }
2165 
VisitUnsafeCASInt(HInvoke * invoke)2166 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2167   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
2168 }
2169 
VisitUnsafeCASLong(HInvoke * invoke)2170 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2171   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
2172 }
2173 
VisitUnsafeCASObject(HInvoke * invoke)2174 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2175   // The UnsafeCASObject intrinsic is missing a read barrier, and
2176   // therefore sometimes does not work as expected (b/25883050).
2177   // Turn it off temporarily as a quick fix, until the read barrier is
2178   // implemented.
2179   //
2180   // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
2181   // this intrinsic.
2182   if (kEmitCompilerReadBarrier) {
2183     return;
2184   }
2185 
2186   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
2187 }
2188 
GenCAS(Primitive::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2189 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2190   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2191   LocationSummary* locations = invoke->GetLocations();
2192 
2193   CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2194   CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2195   CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
2196   // Ensure `expected` is in RAX (required by the CMPXCHG instruction).
2197   DCHECK_EQ(expected.AsRegister(), RAX);
2198   CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
2199   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2200 
2201   if (type == Primitive::kPrimNot) {
2202     // Mark card for object assuming new value is stored.
2203     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2204     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2205                         locations->GetTemp(1).AsRegister<CpuRegister>(),
2206                         base,
2207                         value,
2208                         value_can_be_null);
2209 
2210     bool base_equals_value = (base.AsRegister() == value.AsRegister());
2211     Register value_reg = value.AsRegister();
2212     if (kPoisonHeapReferences) {
2213       if (base_equals_value) {
2214         // If `base` and `value` are the same register location, move
2215         // `value_reg` to a temporary register.  This way, poisoning
2216         // `value_reg` won't invalidate `base`.
2217         value_reg = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
2218         __ movl(CpuRegister(value_reg), base);
2219       }
2220 
2221       // Check that the register allocator did not assign the location
2222       // of `expected` (RAX) to `value` nor to `base`, so that heap
2223       // poisoning (when enabled) works as intended below.
2224       // - If `value` were equal to `expected`, both references would
2225       //   be poisoned twice, meaning they would not be poisoned at
2226       //   all, as heap poisoning uses address negation.
2227       // - If `base` were equal to `expected`, poisoning `expected`
2228       //   would invalidate `base`.
2229       DCHECK_NE(value_reg, expected.AsRegister());
2230       DCHECK_NE(base.AsRegister(), expected.AsRegister());
2231 
2232       __ PoisonHeapReference(expected);
2233       __ PoisonHeapReference(CpuRegister(value_reg));
2234     }
2235 
2236     // TODO: Add a read barrier for the reference stored in the object
2237     // before attempting the CAS, similar to the one in the
2238     // art::Unsafe_compareAndSwapObject JNI implementation.
2239     //
2240     // Note that this code is not (yet) used when read barriers are
2241     // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject).
2242     DCHECK(!kEmitCompilerReadBarrier);
2243     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
2244 
2245     // LOCK CMPXCHG has full barrier semantics, and we don't need
2246     // scheduling barriers at this time.
2247 
2248     // Convert ZF into the boolean result.
2249     __ setcc(kZero, out);
2250     __ movzxb(out, out);
2251 
2252     // If heap poisoning is enabled, we need to unpoison the values
2253     // that were poisoned earlier.
2254     if (kPoisonHeapReferences) {
2255       if (base_equals_value) {
2256         // `value_reg` has been moved to a temporary register, no need
2257         // to unpoison it.
2258       } else {
2259         // Ensure `value` is different from `out`, so that unpoisoning
2260         // the former does not invalidate the latter.
2261         DCHECK_NE(value_reg, out.AsRegister());
2262         __ UnpoisonHeapReference(CpuRegister(value_reg));
2263       }
2264       // Ensure `expected` is different from `out`, so that unpoisoning
2265       // the former does not invalidate the latter.
2266       DCHECK_NE(expected.AsRegister(), out.AsRegister());
2267       __ UnpoisonHeapReference(expected);
2268     }
2269   } else {
2270     if (type == Primitive::kPrimInt) {
2271       __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
2272     } else if (type == Primitive::kPrimLong) {
2273       __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
2274     } else {
2275       LOG(FATAL) << "Unexpected CAS type " << type;
2276     }
2277 
2278     // LOCK CMPXCHG has full barrier semantics, and we don't need
2279     // scheduling barriers at this time.
2280 
2281     // Convert ZF into the boolean result.
2282     __ setcc(kZero, out);
2283     __ movzxb(out, out);
2284   }
2285 }
2286 
VisitUnsafeCASInt(HInvoke * invoke)2287 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2288   GenCAS(Primitive::kPrimInt, invoke, codegen_);
2289 }
2290 
VisitUnsafeCASLong(HInvoke * invoke)2291 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2292   GenCAS(Primitive::kPrimLong, invoke, codegen_);
2293 }
2294 
VisitUnsafeCASObject(HInvoke * invoke)2295 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2296   GenCAS(Primitive::kPrimNot, invoke, codegen_);
2297 }
2298 
VisitIntegerReverse(HInvoke * invoke)2299 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2300   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2301                                                            LocationSummary::kNoCall,
2302                                                            kIntrinsified);
2303   locations->SetInAt(0, Location::RequiresRegister());
2304   locations->SetOut(Location::SameAsFirstInput());
2305   locations->AddTemp(Location::RequiresRegister());
2306 }
2307 
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2308 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2309                      X86_64Assembler* assembler) {
2310   Immediate imm_shift(shift);
2311   Immediate imm_mask(mask);
2312   __ movl(temp, reg);
2313   __ shrl(reg, imm_shift);
2314   __ andl(temp, imm_mask);
2315   __ andl(reg, imm_mask);
2316   __ shll(temp, imm_shift);
2317   __ orl(reg, temp);
2318 }
2319 
VisitIntegerReverse(HInvoke * invoke)2320 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2321   X86_64Assembler* assembler = GetAssembler();
2322   LocationSummary* locations = invoke->GetLocations();
2323 
2324   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2325   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2326 
2327   /*
2328    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2329    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2330    * compared to generic luni implementation which has 5 rounds of swapping bits.
2331    * x = bswap x
2332    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2333    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2334    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2335    */
2336   __ bswapl(reg);
2337   SwapBits(reg, temp, 1, 0x55555555, assembler);
2338   SwapBits(reg, temp, 2, 0x33333333, assembler);
2339   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2340 }
2341 
VisitLongReverse(HInvoke * invoke)2342 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2343   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2344                                                            LocationSummary::kNoCall,
2345                                                            kIntrinsified);
2346   locations->SetInAt(0, Location::RequiresRegister());
2347   locations->SetOut(Location::SameAsFirstInput());
2348   locations->AddTemp(Location::RequiresRegister());
2349   locations->AddTemp(Location::RequiresRegister());
2350 }
2351 
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2352 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2353                        int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2354   Immediate imm_shift(shift);
2355   __ movq(temp_mask, Immediate(mask));
2356   __ movq(temp, reg);
2357   __ shrq(reg, imm_shift);
2358   __ andq(temp, temp_mask);
2359   __ andq(reg, temp_mask);
2360   __ shlq(temp, imm_shift);
2361   __ orq(reg, temp);
2362 }
2363 
VisitLongReverse(HInvoke * invoke)2364 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2365   X86_64Assembler* assembler = GetAssembler();
2366   LocationSummary* locations = invoke->GetLocations();
2367 
2368   CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2369   CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2370   CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2371 
2372   /*
2373    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2374    * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2375    * compared to generic luni implementation which has 5 rounds of swapping bits.
2376    * x = bswap x
2377    * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2378    * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2379    * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2380    */
2381   __ bswapq(reg);
2382   SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2383   SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2384   SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
2385 }
2386 
CreateBitCountLocations(ArenaAllocator * arena,CodeGeneratorX86_64 * codegen,HInvoke * invoke)2387 static void CreateBitCountLocations(
2388     ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
2389   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2390     // Do nothing if there is no popcnt support. This results in generating
2391     // a call for the intrinsic rather than direct code.
2392     return;
2393   }
2394   LocationSummary* locations = new (arena) LocationSummary(invoke,
2395                                                            LocationSummary::kNoCall,
2396                                                            kIntrinsified);
2397   locations->SetInAt(0, Location::Any());
2398   locations->SetOut(Location::RequiresRegister());
2399 }
2400 
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2401 static void GenBitCount(X86_64Assembler* assembler,
2402                         CodeGeneratorX86_64* codegen,
2403                         HInvoke* invoke,
2404                         bool is_long) {
2405   LocationSummary* locations = invoke->GetLocations();
2406   Location src = locations->InAt(0);
2407   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2408 
2409   if (invoke->InputAt(0)->IsConstant()) {
2410     // Evaluate this at compile time.
2411     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2412     int32_t result = is_long
2413         ? POPCOUNT(static_cast<uint64_t>(value))
2414         : POPCOUNT(static_cast<uint32_t>(value));
2415     codegen->Load32BitValue(out, result);
2416     return;
2417   }
2418 
2419   if (src.IsRegister()) {
2420     if (is_long) {
2421       __ popcntq(out, src.AsRegister<CpuRegister>());
2422     } else {
2423       __ popcntl(out, src.AsRegister<CpuRegister>());
2424     }
2425   } else if (is_long) {
2426     DCHECK(src.IsDoubleStackSlot());
2427     __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2428   } else {
2429     DCHECK(src.IsStackSlot());
2430     __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2431   }
2432 }
2433 
VisitIntegerBitCount(HInvoke * invoke)2434 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2435   CreateBitCountLocations(arena_, codegen_, invoke);
2436 }
2437 
VisitIntegerBitCount(HInvoke * invoke)2438 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2439   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
2440 }
2441 
VisitLongBitCount(HInvoke * invoke)2442 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
2443   CreateBitCountLocations(arena_, codegen_, invoke);
2444 }
2445 
VisitLongBitCount(HInvoke * invoke)2446 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
2447   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
2448 }
2449 
CreateOneBitLocations(ArenaAllocator * arena,HInvoke * invoke,bool is_high)2450 static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) {
2451   LocationSummary* locations = new (arena) LocationSummary(invoke,
2452                                                            LocationSummary::kNoCall,
2453                                                            kIntrinsified);
2454   locations->SetInAt(0, Location::Any());
2455   locations->SetOut(Location::RequiresRegister());
2456   locations->AddTemp(is_high ? Location::RegisterLocation(RCX)  // needs CL
2457                              : Location::RequiresRegister());  // any will do
2458 }
2459 
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)2460 static void GenOneBit(X86_64Assembler* assembler,
2461                       CodeGeneratorX86_64* codegen,
2462                       HInvoke* invoke,
2463                       bool is_high, bool is_long) {
2464   LocationSummary* locations = invoke->GetLocations();
2465   Location src = locations->InAt(0);
2466   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2467 
2468   if (invoke->InputAt(0)->IsConstant()) {
2469     // Evaluate this at compile time.
2470     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2471     if (value == 0) {
2472       __ xorl(out, out);  // Clears upper bits too.
2473       return;
2474     }
2475     // Nonzero value.
2476     if (is_high) {
2477       value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
2478                       : 31 - CLZ(static_cast<uint32_t>(value));
2479     } else {
2480       value = is_long ? CTZ(static_cast<uint64_t>(value))
2481                       : CTZ(static_cast<uint32_t>(value));
2482     }
2483     if (is_long) {
2484       codegen->Load64BitValue(out, 1L << value);
2485     } else {
2486       codegen->Load32BitValue(out, 1 << value);
2487     }
2488     return;
2489   }
2490 
2491   // Handle the non-constant cases.
2492   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
2493   if (is_high) {
2494     // Use architectural support: basically 1 << bsr.
2495     if (src.IsRegister()) {
2496       if (is_long) {
2497         __ bsrq(tmp, src.AsRegister<CpuRegister>());
2498       } else {
2499         __ bsrl(tmp, src.AsRegister<CpuRegister>());
2500       }
2501     } else if (is_long) {
2502       DCHECK(src.IsDoubleStackSlot());
2503       __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2504     } else {
2505       DCHECK(src.IsStackSlot());
2506       __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2507     }
2508     // BSR sets ZF if the input was zero.
2509     NearLabel is_zero, done;
2510     __ j(kEqual, &is_zero);
2511     __ movl(out, Immediate(1));  // Clears upper bits too.
2512     if (is_long) {
2513       __ shlq(out, tmp);
2514     } else {
2515       __ shll(out, tmp);
2516     }
2517     __ jmp(&done);
2518     __ Bind(&is_zero);
2519     __ xorl(out, out);  // Clears upper bits too.
2520     __ Bind(&done);
2521   } else  {
2522     // Copy input into temporary.
2523     if (src.IsRegister()) {
2524       if (is_long) {
2525         __ movq(tmp, src.AsRegister<CpuRegister>());
2526       } else {
2527         __ movl(tmp, src.AsRegister<CpuRegister>());
2528       }
2529     } else if (is_long) {
2530       DCHECK(src.IsDoubleStackSlot());
2531       __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2532     } else {
2533       DCHECK(src.IsStackSlot());
2534       __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2535     }
2536     // Do the bit twiddling: basically tmp & -tmp;
2537     if (is_long) {
2538       __ movq(out, tmp);
2539       __ negq(tmp);
2540       __ andq(out, tmp);
2541     } else {
2542       __ movl(out, tmp);
2543       __ negl(tmp);
2544       __ andl(out, tmp);
2545     }
2546   }
2547 }
2548 
VisitIntegerHighestOneBit(HInvoke * invoke)2549 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2550   CreateOneBitLocations(arena_, invoke, /* is_high */ true);
2551 }
2552 
VisitIntegerHighestOneBit(HInvoke * invoke)2553 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2554   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false);
2555 }
2556 
VisitLongHighestOneBit(HInvoke * invoke)2557 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2558   CreateOneBitLocations(arena_, invoke, /* is_high */ true);
2559 }
2560 
VisitLongHighestOneBit(HInvoke * invoke)2561 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2562   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true);
2563 }
2564 
VisitIntegerLowestOneBit(HInvoke * invoke)2565 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2566   CreateOneBitLocations(arena_, invoke, /* is_high */ false);
2567 }
2568 
VisitIntegerLowestOneBit(HInvoke * invoke)2569 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2570   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false);
2571 }
2572 
VisitLongLowestOneBit(HInvoke * invoke)2573 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2574   CreateOneBitLocations(arena_, invoke, /* is_high */ false);
2575 }
2576 
VisitLongLowestOneBit(HInvoke * invoke)2577 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2578   GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true);
2579 }
2580 
CreateLeadingZeroLocations(ArenaAllocator * arena,HInvoke * invoke)2581 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
2582   LocationSummary* locations = new (arena) LocationSummary(invoke,
2583                                                            LocationSummary::kNoCall,
2584                                                            kIntrinsified);
2585   locations->SetInAt(0, Location::Any());
2586   locations->SetOut(Location::RequiresRegister());
2587 }
2588 
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2589 static void GenLeadingZeros(X86_64Assembler* assembler,
2590                             CodeGeneratorX86_64* codegen,
2591                             HInvoke* invoke, bool is_long) {
2592   LocationSummary* locations = invoke->GetLocations();
2593   Location src = locations->InAt(0);
2594   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2595 
2596   int zero_value_result = is_long ? 64 : 32;
2597   if (invoke->InputAt(0)->IsConstant()) {
2598     // Evaluate this at compile time.
2599     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2600     if (value == 0) {
2601       value = zero_value_result;
2602     } else {
2603       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2604     }
2605     codegen->Load32BitValue(out, value);
2606     return;
2607   }
2608 
2609   // Handle the non-constant cases.
2610   if (src.IsRegister()) {
2611     if (is_long) {
2612       __ bsrq(out, src.AsRegister<CpuRegister>());
2613     } else {
2614       __ bsrl(out, src.AsRegister<CpuRegister>());
2615     }
2616   } else if (is_long) {
2617     DCHECK(src.IsDoubleStackSlot());
2618     __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2619   } else {
2620     DCHECK(src.IsStackSlot());
2621     __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2622   }
2623 
2624   // BSR sets ZF if the input was zero, and the output is undefined.
2625   NearLabel is_zero, done;
2626   __ j(kEqual, &is_zero);
2627 
2628   // Correct the result from BSR to get the CLZ result.
2629   __ xorl(out, Immediate(zero_value_result - 1));
2630   __ jmp(&done);
2631 
2632   // Fix the zero case with the expected result.
2633   __ Bind(&is_zero);
2634   __ movl(out, Immediate(zero_value_result));
2635 
2636   __ Bind(&done);
2637 }
2638 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2639 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2640   CreateLeadingZeroLocations(arena_, invoke);
2641 }
2642 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2643 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2644   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2645 }
2646 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2647 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2648   CreateLeadingZeroLocations(arena_, invoke);
2649 }
2650 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2651 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2652   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2653 }
2654 
CreateTrailingZeroLocations(ArenaAllocator * arena,HInvoke * invoke)2655 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
2656   LocationSummary* locations = new (arena) LocationSummary(invoke,
2657                                                            LocationSummary::kNoCall,
2658                                                            kIntrinsified);
2659   locations->SetInAt(0, Location::Any());
2660   locations->SetOut(Location::RequiresRegister());
2661 }
2662 
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2663 static void GenTrailingZeros(X86_64Assembler* assembler,
2664                              CodeGeneratorX86_64* codegen,
2665                              HInvoke* invoke, bool is_long) {
2666   LocationSummary* locations = invoke->GetLocations();
2667   Location src = locations->InAt(0);
2668   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2669 
2670   int zero_value_result = is_long ? 64 : 32;
2671   if (invoke->InputAt(0)->IsConstant()) {
2672     // Evaluate this at compile time.
2673     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2674     if (value == 0) {
2675       value = zero_value_result;
2676     } else {
2677       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2678     }
2679     codegen->Load32BitValue(out, value);
2680     return;
2681   }
2682 
2683   // Handle the non-constant cases.
2684   if (src.IsRegister()) {
2685     if (is_long) {
2686       __ bsfq(out, src.AsRegister<CpuRegister>());
2687     } else {
2688       __ bsfl(out, src.AsRegister<CpuRegister>());
2689     }
2690   } else if (is_long) {
2691     DCHECK(src.IsDoubleStackSlot());
2692     __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2693   } else {
2694     DCHECK(src.IsStackSlot());
2695     __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2696   }
2697 
2698   // BSF sets ZF if the input was zero, and the output is undefined.
2699   NearLabel done;
2700   __ j(kNotEqual, &done);
2701 
2702   // Fix the zero case with the expected result.
2703   __ movl(out, Immediate(zero_value_result));
2704 
2705   __ Bind(&done);
2706 }
2707 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2708 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2709   CreateTrailingZeroLocations(arena_, invoke);
2710 }
2711 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2712 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2713   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2714 }
2715 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2716 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2717   CreateTrailingZeroLocations(arena_, invoke);
2718 }
2719 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2720 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2721   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2722 }
2723 
2724 UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent)
2725 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
2726 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
2727 
2728 // 1.8.
2729 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
2730 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
2731 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
2732 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong)
2733 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject)
2734 
2735 UNREACHABLE_INTRINSICS(X86_64)
2736 
2737 #undef __
2738 
2739 }  // namespace x86_64
2740 }  // namespace art
2741