• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86.h"
18 
19 #include <limits>
20 
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "intrinsics.h"
27 #include "intrinsics_utils.h"
28 #include "lock_word.h"
29 #include "mirror/array-inl.h"
30 #include "mirror/object_array-inl.h"
31 #include "mirror/reference.h"
32 #include "mirror/string.h"
33 #include "scoped_thread_state_change-inl.h"
34 #include "thread-inl.h"
35 #include "utils/x86/assembler_x86.h"
36 #include "utils/x86/constants_x86.h"
37 
38 namespace art {
39 
40 namespace x86 {
41 
42 static constexpr int kDoubleNaNHigh = 0x7FF80000;
43 static constexpr int kDoubleNaNLow = 0x00000000;
44 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
45 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
46 
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)47 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
48   : arena_(codegen->GetGraph()->GetArena()),
49     codegen_(codegen) {
50 }
51 
52 
GetAssembler()53 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
54   return down_cast<X86Assembler*>(codegen_->GetAssembler());
55 }
56 
GetAllocator()57 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
58   return codegen_->GetGraph()->GetArena();
59 }
60 
TryDispatch(HInvoke * invoke)61 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
62   Dispatch(invoke);
63   LocationSummary* res = invoke->GetLocations();
64   if (res == nullptr) {
65     return false;
66   }
67   return res->Intrinsified();
68 }
69 
MoveArguments(HInvoke * invoke,CodeGeneratorX86 * codegen)70 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
71   InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
72   IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
73 }
74 
75 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
76 
77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
79 
80 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
81 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
82  public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)83   explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
84       : SlowPathCode(instruction) {
85     DCHECK(kEmitCompilerReadBarrier);
86     DCHECK(kUseBakerReadBarrier);
87   }
88 
EmitNativeCode(CodeGenerator * codegen)89   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
90     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
91     LocationSummary* locations = instruction_->GetLocations();
92     DCHECK(locations->CanCall());
93     DCHECK(instruction_->IsInvokeStaticOrDirect())
94         << "Unexpected instruction in read barrier arraycopy slow path: "
95         << instruction_->DebugName();
96     DCHECK(instruction_->GetLocations()->Intrinsified());
97     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
98 
99     int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
100     uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
101 
102     Register src = locations->InAt(0).AsRegister<Register>();
103     Location src_pos = locations->InAt(1);
104     Register dest = locations->InAt(2).AsRegister<Register>();
105     Location dest_pos = locations->InAt(3);
106     Location length = locations->InAt(4);
107     Location temp1_loc = locations->GetTemp(0);
108     Register temp1 = temp1_loc.AsRegister<Register>();
109     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
110     Register temp3 = locations->GetTemp(2).AsRegister<Register>();
111 
112     __ Bind(GetEntryLabel());
113     // In this code path, registers `temp1`, `temp2`, and `temp3`
114     // (resp.) are not used for the base source address, the base
115     // destination address, and the end source address (resp.), as in
116     // other SystemArrayCopy intrinsic code paths.  Instead they are
117     // (resp.) used for:
118     // - the loop index (`i`);
119     // - the source index (`src_index`) and the loaded (source)
120     //   reference (`value`); and
121     // - the destination index (`dest_index`).
122 
123     // i = 0
124     __ xorl(temp1, temp1);
125     NearLabel loop;
126     __ Bind(&loop);
127     // value = src_array[i + src_pos]
128     if (src_pos.IsConstant()) {
129       int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
130       int32_t adjusted_offset = offset + constant * element_size;
131       __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
132     } else {
133       __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
134       __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
135     }
136     __ MaybeUnpoisonHeapReference(temp2);
137     // TODO: Inline the mark bit check before calling the runtime?
138     // value = ReadBarrier::Mark(value)
139     // No need to save live registers; it's taken care of by the
140     // entrypoint. Also, there is no need to update the stack mask,
141     // as this runtime call will not trigger a garbage collection.
142     // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
143     // explanations.)
144     DCHECK_NE(temp2, ESP);
145     DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
146     int32_t entry_point_offset =
147         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
148     // This runtime call does not require a stack map.
149     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
150     __ MaybePoisonHeapReference(temp2);
151     // dest_array[i + dest_pos] = value
152     if (dest_pos.IsConstant()) {
153       int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
154       int32_t adjusted_offset = offset + constant * element_size;
155       __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
156     } else {
157       __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
158       __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
159     }
160     // ++i
161     __ addl(temp1, Immediate(1));
162     // if (i != length) goto loop
163     x86_codegen->GenerateIntCompare(temp1_loc, length);
164     __ j(kNotEqual, &loop);
165     __ jmp(GetExitLabel());
166   }
167 
GetDescription() const168   const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
169 
170  private:
171   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
172 };
173 
174 #undef __
175 
176 #define __ assembler->
177 
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke,bool is64bit)178 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
179   LocationSummary* locations = new (arena) LocationSummary(invoke,
180                                                            LocationSummary::kNoCall,
181                                                            kIntrinsified);
182   locations->SetInAt(0, Location::RequiresFpuRegister());
183   locations->SetOut(Location::RequiresRegister());
184   if (is64bit) {
185     locations->AddTemp(Location::RequiresFpuRegister());
186   }
187 }
188 
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke,bool is64bit)189 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
190   LocationSummary* locations = new (arena) LocationSummary(invoke,
191                                                            LocationSummary::kNoCall,
192                                                            kIntrinsified);
193   locations->SetInAt(0, Location::RequiresRegister());
194   locations->SetOut(Location::RequiresFpuRegister());
195   if (is64bit) {
196     locations->AddTemp(Location::RequiresFpuRegister());
197     locations->AddTemp(Location::RequiresFpuRegister());
198   }
199 }
200 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)201 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
202   Location input = locations->InAt(0);
203   Location output = locations->Out();
204   if (is64bit) {
205     // Need to use the temporary.
206     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
207     __ movsd(temp, input.AsFpuRegister<XmmRegister>());
208     __ movd(output.AsRegisterPairLow<Register>(), temp);
209     __ psrlq(temp, Immediate(32));
210     __ movd(output.AsRegisterPairHigh<Register>(), temp);
211   } else {
212     __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
213   }
214 }
215 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)216 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
217   Location input = locations->InAt(0);
218   Location output = locations->Out();
219   if (is64bit) {
220     // Need to use the temporary.
221     XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
222     XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
223     __ movd(temp1, input.AsRegisterPairLow<Register>());
224     __ movd(temp2, input.AsRegisterPairHigh<Register>());
225     __ punpckldq(temp1, temp2);
226     __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
227   } else {
228     __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
229   }
230 }
231 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)232 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
233   CreateFPToIntLocations(arena_, invoke, /* is64bit */ true);
234 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)235 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
236   CreateIntToFPLocations(arena_, invoke, /* is64bit */ true);
237 }
238 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)239 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
240   MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
241 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)242 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
243   MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
244 }
245 
VisitFloatFloatToRawIntBits(HInvoke * invoke)246 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
247   CreateFPToIntLocations(arena_, invoke, /* is64bit */ false);
248 }
VisitFloatIntBitsToFloat(HInvoke * invoke)249 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
250   CreateIntToFPLocations(arena_, invoke, /* is64bit */ false);
251 }
252 
VisitFloatFloatToRawIntBits(HInvoke * invoke)253 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
254   MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
255 }
VisitFloatIntBitsToFloat(HInvoke * invoke)256 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
257   MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
258 }
259 
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)260 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
261   LocationSummary* locations = new (arena) LocationSummary(invoke,
262                                                            LocationSummary::kNoCall,
263                                                            kIntrinsified);
264   locations->SetInAt(0, Location::RequiresRegister());
265   locations->SetOut(Location::SameAsFirstInput());
266 }
267 
CreateLongToIntLocations(ArenaAllocator * arena,HInvoke * invoke)268 static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
269   LocationSummary* locations = new (arena) LocationSummary(invoke,
270                                                            LocationSummary::kNoCall,
271                                                            kIntrinsified);
272   locations->SetInAt(0, Location::RequiresRegister());
273   locations->SetOut(Location::RequiresRegister());
274 }
275 
CreateLongToLongLocations(ArenaAllocator * arena,HInvoke * invoke)276 static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
277   LocationSummary* locations = new (arena) LocationSummary(invoke,
278                                                            LocationSummary::kNoCall,
279                                                            kIntrinsified);
280   locations->SetInAt(0, Location::RequiresRegister());
281   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
282 }
283 
GenReverseBytes(LocationSummary * locations,Primitive::Type size,X86Assembler * assembler)284 static void GenReverseBytes(LocationSummary* locations,
285                             Primitive::Type size,
286                             X86Assembler* assembler) {
287   Register out = locations->Out().AsRegister<Register>();
288 
289   switch (size) {
290     case Primitive::kPrimShort:
291       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
292       __ bswapl(out);
293       __ sarl(out, Immediate(16));
294       break;
295     case Primitive::kPrimInt:
296       __ bswapl(out);
297       break;
298     default:
299       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
300       UNREACHABLE();
301   }
302 }
303 
VisitIntegerReverseBytes(HInvoke * invoke)304 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
305   CreateIntToIntLocations(arena_, invoke);
306 }
307 
VisitIntegerReverseBytes(HInvoke * invoke)308 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
309   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
310 }
311 
VisitLongReverseBytes(HInvoke * invoke)312 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
313   CreateLongToLongLocations(arena_, invoke);
314 }
315 
VisitLongReverseBytes(HInvoke * invoke)316 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
317   LocationSummary* locations = invoke->GetLocations();
318   Location input = locations->InAt(0);
319   Register input_lo = input.AsRegisterPairLow<Register>();
320   Register input_hi = input.AsRegisterPairHigh<Register>();
321   Location output = locations->Out();
322   Register output_lo = output.AsRegisterPairLow<Register>();
323   Register output_hi = output.AsRegisterPairHigh<Register>();
324 
325   X86Assembler* assembler = GetAssembler();
326   // Assign the inputs to the outputs, mixing low/high.
327   __ movl(output_lo, input_hi);
328   __ movl(output_hi, input_lo);
329   __ bswapl(output_lo);
330   __ bswapl(output_hi);
331 }
332 
VisitShortReverseBytes(HInvoke * invoke)333 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
334   CreateIntToIntLocations(arena_, invoke);
335 }
336 
VisitShortReverseBytes(HInvoke * invoke)337 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
338   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
339 }
340 
341 
342 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
343 //       need is 64b.
344 
CreateFloatToFloat(ArenaAllocator * arena,HInvoke * invoke)345 static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
346   // TODO: Enable memory operations when the assembler supports them.
347   LocationSummary* locations = new (arena) LocationSummary(invoke,
348                                                            LocationSummary::kNoCall,
349                                                            kIntrinsified);
350   locations->SetInAt(0, Location::RequiresFpuRegister());
351   locations->SetOut(Location::SameAsFirstInput());
352   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
353   DCHECK(static_or_direct != nullptr);
354   if (static_or_direct->HasSpecialInput() &&
355       invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
356     // We need addressibility for the constant area.
357     locations->SetInAt(1, Location::RequiresRegister());
358     // We need a temporary to hold the constant.
359     locations->AddTemp(Location::RequiresFpuRegister());
360   }
361 }
362 
MathAbsFP(HInvoke * invoke,bool is64bit,X86Assembler * assembler,CodeGeneratorX86 * codegen)363 static void MathAbsFP(HInvoke* invoke,
364                       bool is64bit,
365                       X86Assembler* assembler,
366                       CodeGeneratorX86* codegen) {
367   LocationSummary* locations = invoke->GetLocations();
368   Location output = locations->Out();
369 
370   DCHECK(output.IsFpuRegister());
371   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
372     HX86ComputeBaseMethodAddress* method_address =
373         invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
374     DCHECK(locations->InAt(1).IsRegister());
375     // We also have a constant area pointer.
376     Register constant_area = locations->InAt(1).AsRegister<Register>();
377     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
378     if (is64bit) {
379       __ movsd(temp, codegen->LiteralInt64Address(
380           INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area));
381       __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
382     } else {
383       __ movss(temp, codegen->LiteralInt32Address(
384           INT32_C(0x7FFFFFFF), method_address, constant_area));
385       __ andps(output.AsFpuRegister<XmmRegister>(), temp);
386     }
387   } else {
388     // Create the right constant on an aligned stack.
389     if (is64bit) {
390       __ subl(ESP, Immediate(8));
391       __ pushl(Immediate(0x7FFFFFFF));
392       __ pushl(Immediate(0xFFFFFFFF));
393       __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
394     } else {
395       __ subl(ESP, Immediate(12));
396       __ pushl(Immediate(0x7FFFFFFF));
397       __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
398     }
399     __ addl(ESP, Immediate(16));
400   }
401 }
402 
VisitMathAbsDouble(HInvoke * invoke)403 void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
404   CreateFloatToFloat(arena_, invoke);
405 }
406 
VisitMathAbsDouble(HInvoke * invoke)407 void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
408   MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_);
409 }
410 
VisitMathAbsFloat(HInvoke * invoke)411 void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
412   CreateFloatToFloat(arena_, invoke);
413 }
414 
VisitMathAbsFloat(HInvoke * invoke)415 void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
416   MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_);
417 }
418 
CreateAbsIntLocation(ArenaAllocator * arena,HInvoke * invoke)419 static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
420   LocationSummary* locations = new (arena) LocationSummary(invoke,
421                                                            LocationSummary::kNoCall,
422                                                            kIntrinsified);
423   locations->SetInAt(0, Location::RegisterLocation(EAX));
424   locations->SetOut(Location::SameAsFirstInput());
425   locations->AddTemp(Location::RegisterLocation(EDX));
426 }
427 
GenAbsInteger(LocationSummary * locations,X86Assembler * assembler)428 static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
429   Location output = locations->Out();
430   Register out = output.AsRegister<Register>();
431   DCHECK_EQ(out, EAX);
432   Register temp = locations->GetTemp(0).AsRegister<Register>();
433   DCHECK_EQ(temp, EDX);
434 
435   // Sign extend EAX into EDX.
436   __ cdq();
437 
438   // XOR EAX with sign.
439   __ xorl(EAX, EDX);
440 
441   // Subtract out sign to correct.
442   __ subl(EAX, EDX);
443 
444   // The result is in EAX.
445 }
446 
CreateAbsLongLocation(ArenaAllocator * arena,HInvoke * invoke)447 static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
448   LocationSummary* locations = new (arena) LocationSummary(invoke,
449                                                            LocationSummary::kNoCall,
450                                                            kIntrinsified);
451   locations->SetInAt(0, Location::RequiresRegister());
452   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
453   locations->AddTemp(Location::RequiresRegister());
454 }
455 
GenAbsLong(LocationSummary * locations,X86Assembler * assembler)456 static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
457   Location input = locations->InAt(0);
458   Register input_lo = input.AsRegisterPairLow<Register>();
459   Register input_hi = input.AsRegisterPairHigh<Register>();
460   Location output = locations->Out();
461   Register output_lo = output.AsRegisterPairLow<Register>();
462   Register output_hi = output.AsRegisterPairHigh<Register>();
463   Register temp = locations->GetTemp(0).AsRegister<Register>();
464 
465   // Compute the sign into the temporary.
466   __ movl(temp, input_hi);
467   __ sarl(temp, Immediate(31));
468 
469   // Store the sign into the output.
470   __ movl(output_lo, temp);
471   __ movl(output_hi, temp);
472 
473   // XOR the input to the output.
474   __ xorl(output_lo, input_lo);
475   __ xorl(output_hi, input_hi);
476 
477   // Subtract the sign.
478   __ subl(output_lo, temp);
479   __ sbbl(output_hi, temp);
480 }
481 
VisitMathAbsInt(HInvoke * invoke)482 void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
483   CreateAbsIntLocation(arena_, invoke);
484 }
485 
VisitMathAbsInt(HInvoke * invoke)486 void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
487   GenAbsInteger(invoke->GetLocations(), GetAssembler());
488 }
489 
VisitMathAbsLong(HInvoke * invoke)490 void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
491   CreateAbsLongLocation(arena_, invoke);
492 }
493 
VisitMathAbsLong(HInvoke * invoke)494 void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
495   GenAbsLong(invoke->GetLocations(), GetAssembler());
496 }
497 
GenMinMaxFP(HInvoke * invoke,bool is_min,bool is_double,X86Assembler * assembler,CodeGeneratorX86 * codegen)498 static void GenMinMaxFP(HInvoke* invoke,
499                         bool is_min,
500                         bool is_double,
501                         X86Assembler* assembler,
502                         CodeGeneratorX86* codegen) {
503   LocationSummary* locations = invoke->GetLocations();
504   Location op1_loc = locations->InAt(0);
505   Location op2_loc = locations->InAt(1);
506   Location out_loc = locations->Out();
507   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
508 
509   // Shortcut for same input locations.
510   if (op1_loc.Equals(op2_loc)) {
511     DCHECK(out_loc.Equals(op1_loc));
512     return;
513   }
514 
515   //  (out := op1)
516   //  out <=? op2
517   //  if Nan jmp Nan_label
518   //  if out is min jmp done
519   //  if op2 is min jmp op2_label
520   //  handle -0/+0
521   //  jmp done
522   // Nan_label:
523   //  out := NaN
524   // op2_label:
525   //  out := op2
526   // done:
527   //
528   // This removes one jmp, but needs to copy one input (op1) to out.
529   //
530   // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
531 
532   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
533 
534   NearLabel nan, done, op2_label;
535   if (is_double) {
536     __ ucomisd(out, op2);
537   } else {
538     __ ucomiss(out, op2);
539   }
540 
541   __ j(Condition::kParityEven, &nan);
542 
543   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
544   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
545 
546   // Handle 0.0/-0.0.
547   if (is_min) {
548     if (is_double) {
549       __ orpd(out, op2);
550     } else {
551       __ orps(out, op2);
552     }
553   } else {
554     if (is_double) {
555       __ andpd(out, op2);
556     } else {
557       __ andps(out, op2);
558     }
559   }
560   __ jmp(&done);
561 
562   // NaN handling.
563   __ Bind(&nan);
564   // Do we have a constant area pointer?
565   if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
566     HX86ComputeBaseMethodAddress* method_address =
567         invoke->InputAt(2)->AsX86ComputeBaseMethodAddress();
568     DCHECK(locations->InAt(2).IsRegister());
569     Register constant_area = locations->InAt(2).AsRegister<Register>();
570     if (is_double) {
571       __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area));
572     } else {
573       __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area));
574     }
575   } else {
576     if (is_double) {
577       __ pushl(Immediate(kDoubleNaNHigh));
578       __ pushl(Immediate(kDoubleNaNLow));
579       __ movsd(out, Address(ESP, 0));
580       __ addl(ESP, Immediate(8));
581     } else {
582       __ pushl(Immediate(kFloatNaN));
583       __ movss(out, Address(ESP, 0));
584       __ addl(ESP, Immediate(4));
585     }
586   }
587   __ jmp(&done);
588 
589   // out := op2;
590   __ Bind(&op2_label);
591   if (is_double) {
592     __ movsd(out, op2);
593   } else {
594     __ movss(out, op2);
595   }
596 
597   // Done.
598   __ Bind(&done);
599 }
600 
CreateFPFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)601 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
602   LocationSummary* locations = new (arena) LocationSummary(invoke,
603                                                            LocationSummary::kNoCall,
604                                                            kIntrinsified);
605   locations->SetInAt(0, Location::RequiresFpuRegister());
606   locations->SetInAt(1, Location::RequiresFpuRegister());
607   // The following is sub-optimal, but all we can do for now. It would be fine to also accept
608   // the second input to be the output (we can simply swap inputs).
609   locations->SetOut(Location::SameAsFirstInput());
610   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
611   DCHECK(static_or_direct != nullptr);
612   if (static_or_direct->HasSpecialInput() &&
613       invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
614     locations->SetInAt(2, Location::RequiresRegister());
615   }
616 }
617 
VisitMathMinDoubleDouble(HInvoke * invoke)618 void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
619   CreateFPFPToFPLocations(arena_, invoke);
620 }
621 
VisitMathMinDoubleDouble(HInvoke * invoke)622 void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
623   GenMinMaxFP(invoke,
624               /* is_min */ true,
625               /* is_double */ true,
626               GetAssembler(),
627               codegen_);
628 }
629 
VisitMathMinFloatFloat(HInvoke * invoke)630 void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
631   CreateFPFPToFPLocations(arena_, invoke);
632 }
633 
VisitMathMinFloatFloat(HInvoke * invoke)634 void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
635   GenMinMaxFP(invoke,
636               /* is_min */ true,
637               /* is_double */ false,
638               GetAssembler(),
639               codegen_);
640 }
641 
VisitMathMaxDoubleDouble(HInvoke * invoke)642 void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
643   CreateFPFPToFPLocations(arena_, invoke);
644 }
645 
VisitMathMaxDoubleDouble(HInvoke * invoke)646 void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
647   GenMinMaxFP(invoke,
648               /* is_min */ false,
649               /* is_double */ true,
650               GetAssembler(),
651               codegen_);
652 }
653 
VisitMathMaxFloatFloat(HInvoke * invoke)654 void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
655   CreateFPFPToFPLocations(arena_, invoke);
656 }
657 
VisitMathMaxFloatFloat(HInvoke * invoke)658 void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
659   GenMinMaxFP(invoke,
660               /* is_min */ false,
661               /* is_double */ false,
662               GetAssembler(),
663               codegen_);
664 }
665 
GenMinMax(LocationSummary * locations,bool is_min,bool is_long,X86Assembler * assembler)666 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
667                       X86Assembler* assembler) {
668   Location op1_loc = locations->InAt(0);
669   Location op2_loc = locations->InAt(1);
670 
671   // Shortcut for same input locations.
672   if (op1_loc.Equals(op2_loc)) {
673     // Can return immediately, as op1_loc == out_loc.
674     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
675     //       a copy here.
676     DCHECK(locations->Out().Equals(op1_loc));
677     return;
678   }
679 
680   if (is_long) {
681     // Need to perform a subtract to get the sign right.
682     // op1 is already in the same location as the output.
683     Location output = locations->Out();
684     Register output_lo = output.AsRegisterPairLow<Register>();
685     Register output_hi = output.AsRegisterPairHigh<Register>();
686 
687     Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
688     Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
689 
690     // Spare register to compute the subtraction to set condition code.
691     Register temp = locations->GetTemp(0).AsRegister<Register>();
692 
693     // Subtract off op2_low.
694     __ movl(temp, output_lo);
695     __ subl(temp, op2_lo);
696 
697     // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
698     __ movl(temp, output_hi);
699     __ sbbl(temp, op2_hi);
700 
701     // Now the condition code is correct.
702     Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
703     __ cmovl(cond, output_lo, op2_lo);
704     __ cmovl(cond, output_hi, op2_hi);
705   } else {
706     Register out = locations->Out().AsRegister<Register>();
707     Register op2 = op2_loc.AsRegister<Register>();
708 
709     //  (out := op1)
710     //  out <=? op2
711     //  if out is min jmp done
712     //  out := op2
713     // done:
714 
715     __ cmpl(out, op2);
716     Condition cond = is_min ? Condition::kGreater : Condition::kLess;
717     __ cmovl(cond, out, op2);
718   }
719 }
720 
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)721 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
722   LocationSummary* locations = new (arena) LocationSummary(invoke,
723                                                            LocationSummary::kNoCall,
724                                                            kIntrinsified);
725   locations->SetInAt(0, Location::RequiresRegister());
726   locations->SetInAt(1, Location::RequiresRegister());
727   locations->SetOut(Location::SameAsFirstInput());
728 }
729 
CreateLongLongToLongLocations(ArenaAllocator * arena,HInvoke * invoke)730 static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
731   LocationSummary* locations = new (arena) LocationSummary(invoke,
732                                                            LocationSummary::kNoCall,
733                                                            kIntrinsified);
734   locations->SetInAt(0, Location::RequiresRegister());
735   locations->SetInAt(1, Location::RequiresRegister());
736   locations->SetOut(Location::SameAsFirstInput());
737   // Register to use to perform a long subtract to set cc.
738   locations->AddTemp(Location::RequiresRegister());
739 }
740 
VisitMathMinIntInt(HInvoke * invoke)741 void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
742   CreateIntIntToIntLocations(arena_, invoke);
743 }
744 
VisitMathMinIntInt(HInvoke * invoke)745 void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
746   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
747 }
748 
VisitMathMinLongLong(HInvoke * invoke)749 void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
750   CreateLongLongToLongLocations(arena_, invoke);
751 }
752 
VisitMathMinLongLong(HInvoke * invoke)753 void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
754   GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
755 }
756 
VisitMathMaxIntInt(HInvoke * invoke)757 void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
758   CreateIntIntToIntLocations(arena_, invoke);
759 }
760 
VisitMathMaxIntInt(HInvoke * invoke)761 void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
762   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
763 }
764 
VisitMathMaxLongLong(HInvoke * invoke)765 void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
766   CreateLongLongToLongLocations(arena_, invoke);
767 }
768 
VisitMathMaxLongLong(HInvoke * invoke)769 void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
770   GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
771 }
772 
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)773 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
774   LocationSummary* locations = new (arena) LocationSummary(invoke,
775                                                            LocationSummary::kNoCall,
776                                                            kIntrinsified);
777   locations->SetInAt(0, Location::RequiresFpuRegister());
778   locations->SetOut(Location::RequiresFpuRegister());
779 }
780 
VisitMathSqrt(HInvoke * invoke)781 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
782   CreateFPToFPLocations(arena_, invoke);
783 }
784 
VisitMathSqrt(HInvoke * invoke)785 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
786   LocationSummary* locations = invoke->GetLocations();
787   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
788   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
789 
790   GetAssembler()->sqrtsd(out, in);
791 }
792 
InvokeOutOfLineIntrinsic(CodeGeneratorX86 * codegen,HInvoke * invoke)793 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
794   MoveArguments(invoke, codegen);
795 
796   DCHECK(invoke->IsInvokeStaticOrDirect());
797   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
798                                       Location::RegisterLocation(EAX));
799   codegen->RecordPcInfo(invoke, invoke->GetDexPc());
800 
801   // Copy the result back to the expected output.
802   Location out = invoke->GetLocations()->Out();
803   if (out.IsValid()) {
804     DCHECK(out.IsRegister());
805     codegen->MoveFromReturnRegister(out, invoke->GetType());
806   }
807 }
808 
CreateSSE41FPToFPLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86 * codegen)809 static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
810                                       HInvoke* invoke,
811                                       CodeGeneratorX86* codegen) {
812   // Do we have instruction support?
813   if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
814     CreateFPToFPLocations(arena, invoke);
815     return;
816   }
817 
818   // We have to fall back to a call to the intrinsic.
819   LocationSummary* locations = new (arena) LocationSummary(invoke,
820                                                            LocationSummary::kCallOnMainOnly);
821   InvokeRuntimeCallingConvention calling_convention;
822   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
823   locations->SetOut(Location::FpuRegisterLocation(XMM0));
824   // Needs to be EAX for the invoke.
825   locations->AddTemp(Location::RegisterLocation(EAX));
826 }
827 
GenSSE41FPToFPIntrinsic(CodeGeneratorX86 * codegen,HInvoke * invoke,X86Assembler * assembler,int round_mode)828 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
829                                    HInvoke* invoke,
830                                    X86Assembler* assembler,
831                                    int round_mode) {
832   LocationSummary* locations = invoke->GetLocations();
833   if (locations->WillCall()) {
834     InvokeOutOfLineIntrinsic(codegen, invoke);
835   } else {
836     XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
837     XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
838     __ roundsd(out, in, Immediate(round_mode));
839   }
840 }
841 
VisitMathCeil(HInvoke * invoke)842 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
843   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
844 }
845 
VisitMathCeil(HInvoke * invoke)846 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
847   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
848 }
849 
VisitMathFloor(HInvoke * invoke)850 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
851   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
852 }
853 
VisitMathFloor(HInvoke * invoke)854 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
855   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
856 }
857 
VisitMathRint(HInvoke * invoke)858 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
859   CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
860 }
861 
VisitMathRint(HInvoke * invoke)862 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
863   GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
864 }
865 
VisitMathRoundFloat(HInvoke * invoke)866 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
867   // Do we have instruction support?
868   if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
869     HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
870     DCHECK(static_or_direct != nullptr);
871     LocationSummary* locations = new (arena_) LocationSummary(invoke,
872                                                               LocationSummary::kNoCall,
873                                                               kIntrinsified);
874     locations->SetInAt(0, Location::RequiresFpuRegister());
875     if (static_or_direct->HasSpecialInput() &&
876         invoke->InputAt(
877             static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
878       locations->SetInAt(1, Location::RequiresRegister());
879     }
880     locations->SetOut(Location::RequiresRegister());
881     locations->AddTemp(Location::RequiresFpuRegister());
882     locations->AddTemp(Location::RequiresFpuRegister());
883     return;
884   }
885 
886   // We have to fall back to a call to the intrinsic.
887   LocationSummary* locations = new (arena_) LocationSummary(invoke,
888                                                             LocationSummary::kCallOnMainOnly);
889   InvokeRuntimeCallingConvention calling_convention;
890   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
891   locations->SetOut(Location::RegisterLocation(EAX));
892   // Needs to be EAX for the invoke.
893   locations->AddTemp(Location::RegisterLocation(EAX));
894 }
895 
VisitMathRoundFloat(HInvoke * invoke)896 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
897   LocationSummary* locations = invoke->GetLocations();
898   if (locations->WillCall()) {  // TODO: can we reach this?
899     InvokeOutOfLineIntrinsic(codegen_, invoke);
900     return;
901   }
902 
903   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
904   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
905   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
906   Register out = locations->Out().AsRegister<Register>();
907   NearLabel skip_incr, done;
908   X86Assembler* assembler = GetAssembler();
909 
910   // Since no direct x86 rounding instruction matches the required semantics,
911   // this intrinsic is implemented as follows:
912   //  result = floor(in);
913   //  if (in - result >= 0.5f)
914   //    result = result + 1.0f;
915   __ movss(t2, in);
916   __ roundss(t1, in, Immediate(1));
917   __ subss(t2, t1);
918   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
919     // Direct constant area available.
920     HX86ComputeBaseMethodAddress* method_address =
921         invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
922     Register constant_area = locations->InAt(1).AsRegister<Register>();
923     __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
924                                                 method_address,
925                                                 constant_area));
926     __ j(kBelow, &skip_incr);
927     __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
928                                                method_address,
929                                                constant_area));
930     __ Bind(&skip_incr);
931   } else {
932     // No constant area: go through stack.
933     __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
934     __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
935     __ comiss(t2, Address(ESP, 4));
936     __ j(kBelow, &skip_incr);
937     __ addss(t1, Address(ESP, 0));
938     __ Bind(&skip_incr);
939     __ addl(ESP, Immediate(8));
940   }
941 
942   // Final conversion to an integer. Unfortunately this also does not have a
943   // direct x86 instruction, since NaN should map to 0 and large positive
944   // values need to be clipped to the extreme value.
945   __ movl(out, Immediate(kPrimIntMax));
946   __ cvtsi2ss(t2, out);
947   __ comiss(t1, t2);
948   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
949   __ movl(out, Immediate(0));  // does not change flags
950   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
951   __ cvttss2si(out, t1);
952   __ Bind(&done);
953 }
954 
CreateFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)955 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
956                                       HInvoke* invoke) {
957   LocationSummary* locations = new (arena) LocationSummary(invoke,
958                                                            LocationSummary::kCallOnMainOnly,
959                                                            kIntrinsified);
960   InvokeRuntimeCallingConvention calling_convention;
961   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
962   locations->SetOut(Location::FpuRegisterLocation(XMM0));
963 }
964 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)965 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
966   LocationSummary* locations = invoke->GetLocations();
967   DCHECK(locations->WillCall());
968   DCHECK(invoke->IsInvokeStaticOrDirect());
969   X86Assembler* assembler = codegen->GetAssembler();
970 
971   // We need some place to pass the parameters.
972   __ subl(ESP, Immediate(16));
973   __ cfi().AdjustCFAOffset(16);
974 
975   // Pass the parameters at the bottom of the stack.
976   __ movsd(Address(ESP, 0), XMM0);
977 
978   // If we have a second parameter, pass it next.
979   if (invoke->GetNumberOfArguments() == 2) {
980     __ movsd(Address(ESP, 8), XMM1);
981   }
982 
983   // Now do the actual call.
984   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
985 
986   // Extract the return value from the FP stack.
987   __ fstpl(Address(ESP, 0));
988   __ movsd(XMM0, Address(ESP, 0));
989 
990   // And clean up the stack.
991   __ addl(ESP, Immediate(16));
992   __ cfi().AdjustCFAOffset(-16);
993 }
994 
VisitMathCos(HInvoke * invoke)995 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
996   CreateFPToFPCallLocations(arena_, invoke);
997 }
998 
VisitMathCos(HInvoke * invoke)999 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
1000   GenFPToFPCall(invoke, codegen_, kQuickCos);
1001 }
1002 
VisitMathSin(HInvoke * invoke)1003 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
1004   CreateFPToFPCallLocations(arena_, invoke);
1005 }
1006 
VisitMathSin(HInvoke * invoke)1007 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
1008   GenFPToFPCall(invoke, codegen_, kQuickSin);
1009 }
1010 
VisitMathAcos(HInvoke * invoke)1011 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
1012   CreateFPToFPCallLocations(arena_, invoke);
1013 }
1014 
VisitMathAcos(HInvoke * invoke)1015 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
1016   GenFPToFPCall(invoke, codegen_, kQuickAcos);
1017 }
1018 
VisitMathAsin(HInvoke * invoke)1019 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
1020   CreateFPToFPCallLocations(arena_, invoke);
1021 }
1022 
VisitMathAsin(HInvoke * invoke)1023 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
1024   GenFPToFPCall(invoke, codegen_, kQuickAsin);
1025 }
1026 
VisitMathAtan(HInvoke * invoke)1027 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
1028   CreateFPToFPCallLocations(arena_, invoke);
1029 }
1030 
VisitMathAtan(HInvoke * invoke)1031 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
1032   GenFPToFPCall(invoke, codegen_, kQuickAtan);
1033 }
1034 
VisitMathCbrt(HInvoke * invoke)1035 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
1036   CreateFPToFPCallLocations(arena_, invoke);
1037 }
1038 
VisitMathCbrt(HInvoke * invoke)1039 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
1040   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
1041 }
1042 
VisitMathCosh(HInvoke * invoke)1043 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
1044   CreateFPToFPCallLocations(arena_, invoke);
1045 }
1046 
VisitMathCosh(HInvoke * invoke)1047 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
1048   GenFPToFPCall(invoke, codegen_, kQuickCosh);
1049 }
1050 
VisitMathExp(HInvoke * invoke)1051 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
1052   CreateFPToFPCallLocations(arena_, invoke);
1053 }
1054 
VisitMathExp(HInvoke * invoke)1055 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
1056   GenFPToFPCall(invoke, codegen_, kQuickExp);
1057 }
1058 
VisitMathExpm1(HInvoke * invoke)1059 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
1060   CreateFPToFPCallLocations(arena_, invoke);
1061 }
1062 
VisitMathExpm1(HInvoke * invoke)1063 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
1064   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
1065 }
1066 
VisitMathLog(HInvoke * invoke)1067 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
1068   CreateFPToFPCallLocations(arena_, invoke);
1069 }
1070 
VisitMathLog(HInvoke * invoke)1071 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
1072   GenFPToFPCall(invoke, codegen_, kQuickLog);
1073 }
1074 
VisitMathLog10(HInvoke * invoke)1075 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
1076   CreateFPToFPCallLocations(arena_, invoke);
1077 }
1078 
VisitMathLog10(HInvoke * invoke)1079 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
1080   GenFPToFPCall(invoke, codegen_, kQuickLog10);
1081 }
1082 
VisitMathSinh(HInvoke * invoke)1083 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
1084   CreateFPToFPCallLocations(arena_, invoke);
1085 }
1086 
VisitMathSinh(HInvoke * invoke)1087 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
1088   GenFPToFPCall(invoke, codegen_, kQuickSinh);
1089 }
1090 
VisitMathTan(HInvoke * invoke)1091 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
1092   CreateFPToFPCallLocations(arena_, invoke);
1093 }
1094 
VisitMathTan(HInvoke * invoke)1095 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
1096   GenFPToFPCall(invoke, codegen_, kQuickTan);
1097 }
1098 
VisitMathTanh(HInvoke * invoke)1099 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
1100   CreateFPToFPCallLocations(arena_, invoke);
1101 }
1102 
VisitMathTanh(HInvoke * invoke)1103 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
1104   GenFPToFPCall(invoke, codegen_, kQuickTanh);
1105 }
1106 
CreateFPFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)1107 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
1108                                         HInvoke* invoke) {
1109   LocationSummary* locations = new (arena) LocationSummary(invoke,
1110                                                            LocationSummary::kCallOnMainOnly,
1111                                                            kIntrinsified);
1112   InvokeRuntimeCallingConvention calling_convention;
1113   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
1114   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
1115   locations->SetOut(Location::FpuRegisterLocation(XMM0));
1116 }
1117 
VisitMathAtan2(HInvoke * invoke)1118 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
1119   CreateFPFPToFPCallLocations(arena_, invoke);
1120 }
1121 
VisitMathAtan2(HInvoke * invoke)1122 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
1123   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
1124 }
1125 
VisitMathHypot(HInvoke * invoke)1126 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
1127   CreateFPFPToFPCallLocations(arena_, invoke);
1128 }
1129 
VisitMathHypot(HInvoke * invoke)1130 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
1131   GenFPToFPCall(invoke, codegen_, kQuickHypot);
1132 }
1133 
VisitMathNextAfter(HInvoke * invoke)1134 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
1135   CreateFPFPToFPCallLocations(arena_, invoke);
1136 }
1137 
VisitMathNextAfter(HInvoke * invoke)1138 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
1139   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
1140 }
1141 
VisitSystemArrayCopyChar(HInvoke * invoke)1142 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1143   // We need at least two of the positions or length to be an integer constant,
1144   // or else we won't have enough free registers.
1145   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1146   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1147   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1148 
1149   int num_constants =
1150       ((src_pos != nullptr) ? 1 : 0)
1151       + ((dest_pos != nullptr) ? 1 : 0)
1152       + ((length != nullptr) ? 1 : 0);
1153 
1154   if (num_constants < 2) {
1155     // Not enough free registers.
1156     return;
1157   }
1158 
1159   // As long as we are checking, we might as well check to see if the src and dest
1160   // positions are >= 0.
1161   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
1162       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
1163     // We will have to fail anyways.
1164     return;
1165   }
1166 
1167   // And since we are already checking, check the length too.
1168   if (length != nullptr) {
1169     int32_t len = length->GetValue();
1170     if (len < 0) {
1171       // Just call as normal.
1172       return;
1173     }
1174   }
1175 
1176   // Okay, it is safe to generate inline code.
1177   LocationSummary* locations =
1178     new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
1179   // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
1180   locations->SetInAt(0, Location::RequiresRegister());
1181   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1182   locations->SetInAt(2, Location::RequiresRegister());
1183   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
1184   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
1185 
1186   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1187   locations->AddTemp(Location::RegisterLocation(ESI));
1188   locations->AddTemp(Location::RegisterLocation(EDI));
1189   locations->AddTemp(Location::RegisterLocation(ECX));
1190 }
1191 
CheckPosition(X86Assembler * assembler,Location pos,Register input,Location length,SlowPathCode * slow_path,Register temp,bool length_is_input_length=false)1192 static void CheckPosition(X86Assembler* assembler,
1193                           Location pos,
1194                           Register input,
1195                           Location length,
1196                           SlowPathCode* slow_path,
1197                           Register temp,
1198                           bool length_is_input_length = false) {
1199   // Where is the length in the Array?
1200   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1201 
1202   if (pos.IsConstant()) {
1203     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
1204     if (pos_const == 0) {
1205       if (!length_is_input_length) {
1206         // Check that length(input) >= length.
1207         if (length.IsConstant()) {
1208           __ cmpl(Address(input, length_offset),
1209                   Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1210         } else {
1211           __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
1212         }
1213         __ j(kLess, slow_path->GetEntryLabel());
1214       }
1215     } else {
1216       // Check that length(input) >= pos.
1217       __ movl(temp, Address(input, length_offset));
1218       __ subl(temp, Immediate(pos_const));
1219       __ j(kLess, slow_path->GetEntryLabel());
1220 
1221       // Check that (length(input) - pos) >= length.
1222       if (length.IsConstant()) {
1223         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1224       } else {
1225         __ cmpl(temp, length.AsRegister<Register>());
1226       }
1227       __ j(kLess, slow_path->GetEntryLabel());
1228     }
1229   } else if (length_is_input_length) {
1230     // The only way the copy can succeed is if pos is zero.
1231     Register pos_reg = pos.AsRegister<Register>();
1232     __ testl(pos_reg, pos_reg);
1233     __ j(kNotEqual, slow_path->GetEntryLabel());
1234   } else {
1235     // Check that pos >= 0.
1236     Register pos_reg = pos.AsRegister<Register>();
1237     __ testl(pos_reg, pos_reg);
1238     __ j(kLess, slow_path->GetEntryLabel());
1239 
1240     // Check that pos <= length(input).
1241     __ cmpl(Address(input, length_offset), pos_reg);
1242     __ j(kLess, slow_path->GetEntryLabel());
1243 
1244     // Check that (length(input) - pos) >= length.
1245     __ movl(temp, Address(input, length_offset));
1246     __ subl(temp, pos_reg);
1247     if (length.IsConstant()) {
1248       __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1249     } else {
1250       __ cmpl(temp, length.AsRegister<Register>());
1251     }
1252     __ j(kLess, slow_path->GetEntryLabel());
1253   }
1254 }
1255 
VisitSystemArrayCopyChar(HInvoke * invoke)1256 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1257   X86Assembler* assembler = GetAssembler();
1258   LocationSummary* locations = invoke->GetLocations();
1259 
1260   Register src = locations->InAt(0).AsRegister<Register>();
1261   Location srcPos = locations->InAt(1);
1262   Register dest = locations->InAt(2).AsRegister<Register>();
1263   Location destPos = locations->InAt(3);
1264   Location length = locations->InAt(4);
1265 
1266   // Temporaries that we need for MOVSW.
1267   Register src_base = locations->GetTemp(0).AsRegister<Register>();
1268   DCHECK_EQ(src_base, ESI);
1269   Register dest_base = locations->GetTemp(1).AsRegister<Register>();
1270   DCHECK_EQ(dest_base, EDI);
1271   Register count = locations->GetTemp(2).AsRegister<Register>();
1272   DCHECK_EQ(count, ECX);
1273 
1274   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1275   codegen_->AddSlowPath(slow_path);
1276 
1277   // Bail out if the source and destination are the same (to handle overlap).
1278   __ cmpl(src, dest);
1279   __ j(kEqual, slow_path->GetEntryLabel());
1280 
1281   // Bail out if the source is null.
1282   __ testl(src, src);
1283   __ j(kEqual, slow_path->GetEntryLabel());
1284 
1285   // Bail out if the destination is null.
1286   __ testl(dest, dest);
1287   __ j(kEqual, slow_path->GetEntryLabel());
1288 
1289   // If the length is negative, bail out.
1290   // We have already checked in the LocationsBuilder for the constant case.
1291   if (!length.IsConstant()) {
1292     __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
1293     __ j(kLess, slow_path->GetEntryLabel());
1294   }
1295 
1296   // We need the count in ECX.
1297   if (length.IsConstant()) {
1298     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1299   } else {
1300     __ movl(count, length.AsRegister<Register>());
1301   }
1302 
1303   // Validity checks: source. Use src_base as a temporary register.
1304   CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
1305 
1306   // Validity checks: dest. Use src_base as a temporary register.
1307   CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
1308 
1309   // Okay, everything checks out.  Finally time to do the copy.
1310   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1311   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1312   DCHECK_EQ(char_size, 2u);
1313 
1314   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1315 
1316   if (srcPos.IsConstant()) {
1317     int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
1318     __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
1319   } else {
1320     __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
1321                               ScaleFactor::TIMES_2, data_offset));
1322   }
1323   if (destPos.IsConstant()) {
1324     int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
1325 
1326     __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
1327   } else {
1328     __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
1329                                ScaleFactor::TIMES_2, data_offset));
1330   }
1331 
1332   // Do the move.
1333   __ rep_movsw();
1334 
1335   __ Bind(slow_path->GetExitLabel());
1336 }
1337 
VisitStringCompareTo(HInvoke * invoke)1338 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
1339   // The inputs plus one temp.
1340   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1341                                                             LocationSummary::kCallOnMainAndSlowPath,
1342                                                             kIntrinsified);
1343   InvokeRuntimeCallingConvention calling_convention;
1344   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1345   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1346   locations->SetOut(Location::RegisterLocation(EAX));
1347 }
1348 
VisitStringCompareTo(HInvoke * invoke)1349 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1350   X86Assembler* assembler = GetAssembler();
1351   LocationSummary* locations = invoke->GetLocations();
1352 
1353   // Note that the null check must have been done earlier.
1354   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1355 
1356   Register argument = locations->InAt(1).AsRegister<Register>();
1357   __ testl(argument, argument);
1358   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1359   codegen_->AddSlowPath(slow_path);
1360   __ j(kEqual, slow_path->GetEntryLabel());
1361 
1362   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1363   __ Bind(slow_path->GetExitLabel());
1364 }
1365 
VisitStringEquals(HInvoke * invoke)1366 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1367   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1368                                                             LocationSummary::kNoCall,
1369                                                             kIntrinsified);
1370   locations->SetInAt(0, Location::RequiresRegister());
1371   locations->SetInAt(1, Location::RequiresRegister());
1372 
1373   // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1374   locations->AddTemp(Location::RegisterLocation(ECX));
1375   locations->AddTemp(Location::RegisterLocation(EDI));
1376 
1377   // Set output, ESI needed for repe_cmpsl instruction anyways.
1378   locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1379 }
1380 
VisitStringEquals(HInvoke * invoke)1381 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1382   X86Assembler* assembler = GetAssembler();
1383   LocationSummary* locations = invoke->GetLocations();
1384 
1385   Register str = locations->InAt(0).AsRegister<Register>();
1386   Register arg = locations->InAt(1).AsRegister<Register>();
1387   Register ecx = locations->GetTemp(0).AsRegister<Register>();
1388   Register edi = locations->GetTemp(1).AsRegister<Register>();
1389   Register esi = locations->Out().AsRegister<Register>();
1390 
1391   NearLabel end, return_true, return_false;
1392 
1393   // Get offsets of count, value, and class fields within a string object.
1394   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1395   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1396   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1397 
1398   // Note that the null check must have been done earlier.
1399   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1400 
1401   StringEqualsOptimizations optimizations(invoke);
1402   if (!optimizations.GetArgumentNotNull()) {
1403     // Check if input is null, return false if it is.
1404     __ testl(arg, arg);
1405     __ j(kEqual, &return_false);
1406   }
1407 
1408   if (!optimizations.GetArgumentIsString()) {
1409     // Instanceof check for the argument by comparing class fields.
1410     // All string objects must have the same type since String cannot be subclassed.
1411     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1412     // If the argument is a string object, its class field must be equal to receiver's class field.
1413     __ movl(ecx, Address(str, class_offset));
1414     __ cmpl(ecx, Address(arg, class_offset));
1415     __ j(kNotEqual, &return_false);
1416   }
1417 
1418   // Reference equality check, return true if same reference.
1419   __ cmpl(str, arg);
1420   __ j(kEqual, &return_true);
1421 
1422   // Load length and compression flag of receiver string.
1423   __ movl(ecx, Address(str, count_offset));
1424   // Check if lengths and compression flags are equal, return false if they're not.
1425   // Two identical strings will always have same compression style since
1426   // compression style is decided on alloc.
1427   __ cmpl(ecx, Address(arg, count_offset));
1428   __ j(kNotEqual, &return_false);
1429   // Return true if strings are empty. Even with string compression `count == 0` means empty.
1430   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1431                 "Expecting 0=compressed, 1=uncompressed");
1432   __ jecxz(&return_true);
1433 
1434   if (mirror::kUseStringCompression) {
1435     NearLabel string_uncompressed;
1436     // Extract length and differentiate between both compressed or both uncompressed.
1437     // Different compression style is cut above.
1438     __ shrl(ecx, Immediate(1));
1439     __ j(kCarrySet, &string_uncompressed);
1440     // Divide string length by 2, rounding up, and continue as if uncompressed.
1441     __ addl(ecx, Immediate(1));
1442     __ shrl(ecx, Immediate(1));
1443     __ Bind(&string_uncompressed);
1444   }
1445   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1446   __ leal(esi, Address(str, value_offset));
1447   __ leal(edi, Address(arg, value_offset));
1448 
1449   // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1450   // divisible by 2.
1451   __ addl(ecx, Immediate(1));
1452   __ shrl(ecx, Immediate(1));
1453 
1454   // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1455   // or 4 characters (compressed) at a time.
1456   DCHECK_ALIGNED(value_offset, 4);
1457   static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1458 
1459   // Loop to compare strings two characters at a time starting at the beginning of the string.
1460   __ repe_cmpsl();
1461   // If strings are not equal, zero flag will be cleared.
1462   __ j(kNotEqual, &return_false);
1463 
1464   // Return true and exit the function.
1465   // If loop does not result in returning false, we return true.
1466   __ Bind(&return_true);
1467   __ movl(esi, Immediate(1));
1468   __ jmp(&end);
1469 
1470   // Return false and exit the function.
1471   __ Bind(&return_false);
1472   __ xorl(esi, esi);
1473   __ Bind(&end);
1474 }
1475 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1476 static void CreateStringIndexOfLocations(HInvoke* invoke,
1477                                          ArenaAllocator* allocator,
1478                                          bool start_at_zero) {
1479   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1480                                                                LocationSummary::kCallOnSlowPath,
1481                                                                kIntrinsified);
1482   // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1483   locations->SetInAt(0, Location::RegisterLocation(EDI));
1484   // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1485   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1486   // of the instruction explicitly.
1487   // Note: This works as we don't clobber EAX anywhere.
1488   locations->SetInAt(1, Location::RegisterLocation(EAX));
1489   if (!start_at_zero) {
1490     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1491   }
1492   // As we clobber EDI during execution anyways, also use it as the output.
1493   locations->SetOut(Location::SameAsFirstInput());
1494 
1495   // repne scasw uses ECX as the counter.
1496   locations->AddTemp(Location::RegisterLocation(ECX));
1497   // Need another temporary to be able to compute the result.
1498   locations->AddTemp(Location::RequiresRegister());
1499   if (mirror::kUseStringCompression) {
1500     // Need another temporary to be able to save unflagged string length.
1501     locations->AddTemp(Location::RequiresRegister());
1502   }
1503 }
1504 
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,ArenaAllocator * allocator,bool start_at_zero)1505 static void GenerateStringIndexOf(HInvoke* invoke,
1506                                   X86Assembler* assembler,
1507                                   CodeGeneratorX86* codegen,
1508                                   ArenaAllocator* allocator,
1509                                   bool start_at_zero) {
1510   LocationSummary* locations = invoke->GetLocations();
1511 
1512   // Note that the null check must have been done earlier.
1513   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1514 
1515   Register string_obj = locations->InAt(0).AsRegister<Register>();
1516   Register search_value = locations->InAt(1).AsRegister<Register>();
1517   Register counter = locations->GetTemp(0).AsRegister<Register>();
1518   Register string_length = locations->GetTemp(1).AsRegister<Register>();
1519   Register out = locations->Out().AsRegister<Register>();
1520   // Only used when string compression feature is on.
1521   Register string_length_flagged;
1522 
1523   // Check our assumptions for registers.
1524   DCHECK_EQ(string_obj, EDI);
1525   DCHECK_EQ(search_value, EAX);
1526   DCHECK_EQ(counter, ECX);
1527   DCHECK_EQ(out, EDI);
1528 
1529   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1530   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1531   SlowPathCode* slow_path = nullptr;
1532   HInstruction* code_point = invoke->InputAt(1);
1533   if (code_point->IsIntConstant()) {
1534     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1535     std::numeric_limits<uint16_t>::max()) {
1536       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1537       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1538       slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1539       codegen->AddSlowPath(slow_path);
1540       __ jmp(slow_path->GetEntryLabel());
1541       __ Bind(slow_path->GetExitLabel());
1542       return;
1543     }
1544   } else if (code_point->GetType() != Primitive::kPrimChar) {
1545     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1546     slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1547     codegen->AddSlowPath(slow_path);
1548     __ j(kAbove, slow_path->GetEntryLabel());
1549   }
1550 
1551   // From here down, we know that we are looking for a char that fits in 16 bits.
1552   // Location of reference to data array within the String object.
1553   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1554   // Location of count within the String object.
1555   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1556 
1557   // Load the count field of the string containing the length and compression flag.
1558   __ movl(string_length, Address(string_obj, count_offset));
1559 
1560   // Do a zero-length check. Even with string compression `count == 0` means empty.
1561   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1562                 "Expecting 0=compressed, 1=uncompressed");
1563   // TODO: Support jecxz.
1564   NearLabel not_found_label;
1565   __ testl(string_length, string_length);
1566   __ j(kEqual, &not_found_label);
1567 
1568   if (mirror::kUseStringCompression) {
1569     string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1570     __ movl(string_length_flagged, string_length);
1571     // Extract the length and shift out the least significant bit used as compression flag.
1572     __ shrl(string_length, Immediate(1));
1573   }
1574 
1575   if (start_at_zero) {
1576     // Number of chars to scan is the same as the string length.
1577     __ movl(counter, string_length);
1578 
1579     // Move to the start of the string.
1580     __ addl(string_obj, Immediate(value_offset));
1581   } else {
1582     Register start_index = locations->InAt(2).AsRegister<Register>();
1583 
1584     // Do a start_index check.
1585     __ cmpl(start_index, string_length);
1586     __ j(kGreaterEqual, &not_found_label);
1587 
1588     // Ensure we have a start index >= 0;
1589     __ xorl(counter, counter);
1590     __ cmpl(start_index, Immediate(0));
1591     __ cmovl(kGreater, counter, start_index);
1592 
1593     if (mirror::kUseStringCompression) {
1594       NearLabel modify_counter, offset_uncompressed_label;
1595       __ testl(string_length_flagged, Immediate(1));
1596       __ j(kNotZero, &offset_uncompressed_label);
1597       // Move to the start of the string: string_obj + value_offset + start_index.
1598       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1599       __ jmp(&modify_counter);
1600 
1601       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1602       __ Bind(&offset_uncompressed_label);
1603       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1604 
1605       // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1606       // compare.
1607       __ Bind(&modify_counter);
1608     } else {
1609       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1610     }
1611     __ negl(counter);
1612     __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1613   }
1614 
1615   if (mirror::kUseStringCompression) {
1616     NearLabel uncompressed_string_comparison;
1617     NearLabel comparison_done;
1618     __ testl(string_length_flagged, Immediate(1));
1619     __ j(kNotZero, &uncompressed_string_comparison);
1620 
1621     // Check if EAX (search_value) is ASCII.
1622     __ cmpl(search_value, Immediate(127));
1623     __ j(kGreater, &not_found_label);
1624     // Comparing byte-per-byte.
1625     __ repne_scasb();
1626     __ jmp(&comparison_done);
1627 
1628     // Everything is set up for repne scasw:
1629     //   * Comparison address in EDI.
1630     //   * Counter in ECX.
1631     __ Bind(&uncompressed_string_comparison);
1632     __ repne_scasw();
1633     __ Bind(&comparison_done);
1634   } else {
1635     __ repne_scasw();
1636   }
1637   // Did we find a match?
1638   __ j(kNotEqual, &not_found_label);
1639 
1640   // Yes, we matched.  Compute the index of the result.
1641   __ subl(string_length, counter);
1642   __ leal(out, Address(string_length, -1));
1643 
1644   NearLabel done;
1645   __ jmp(&done);
1646 
1647   // Failed to match; return -1.
1648   __ Bind(&not_found_label);
1649   __ movl(out, Immediate(-1));
1650 
1651   // And join up at the end.
1652   __ Bind(&done);
1653   if (slow_path != nullptr) {
1654     __ Bind(slow_path->GetExitLabel());
1655   }
1656 }
1657 
VisitStringIndexOf(HInvoke * invoke)1658 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1659   CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
1660 }
1661 
VisitStringIndexOf(HInvoke * invoke)1662 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1663   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1664 }
1665 
VisitStringIndexOfAfter(HInvoke * invoke)1666 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1667   CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
1668 }
1669 
VisitStringIndexOfAfter(HInvoke * invoke)1670 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1671   GenerateStringIndexOf(
1672       invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1673 }
1674 
VisitStringNewStringFromBytes(HInvoke * invoke)1675 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1676   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1677                                                             LocationSummary::kCallOnMainAndSlowPath,
1678                                                             kIntrinsified);
1679   InvokeRuntimeCallingConvention calling_convention;
1680   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1681   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1682   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1683   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1684   locations->SetOut(Location::RegisterLocation(EAX));
1685 }
1686 
VisitStringNewStringFromBytes(HInvoke * invoke)1687 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1688   X86Assembler* assembler = GetAssembler();
1689   LocationSummary* locations = invoke->GetLocations();
1690 
1691   Register byte_array = locations->InAt(0).AsRegister<Register>();
1692   __ testl(byte_array, byte_array);
1693   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1694   codegen_->AddSlowPath(slow_path);
1695   __ j(kEqual, slow_path->GetEntryLabel());
1696 
1697   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1698   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1699   __ Bind(slow_path->GetExitLabel());
1700 }
1701 
VisitStringNewStringFromChars(HInvoke * invoke)1702 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1703   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1704                                                             LocationSummary::kCallOnMainOnly,
1705                                                             kIntrinsified);
1706   InvokeRuntimeCallingConvention calling_convention;
1707   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1708   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1709   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1710   locations->SetOut(Location::RegisterLocation(EAX));
1711 }
1712 
VisitStringNewStringFromChars(HInvoke * invoke)1713 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1714   // No need to emit code checking whether `locations->InAt(2)` is a null
1715   // pointer, as callers of the native method
1716   //
1717   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1718   //
1719   // all include a null check on `data` before calling that method.
1720   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1721   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1722 }
1723 
VisitStringNewStringFromString(HInvoke * invoke)1724 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1725   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1726                                                             LocationSummary::kCallOnMainAndSlowPath,
1727                                                             kIntrinsified);
1728   InvokeRuntimeCallingConvention calling_convention;
1729   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1730   locations->SetOut(Location::RegisterLocation(EAX));
1731 }
1732 
VisitStringNewStringFromString(HInvoke * invoke)1733 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1734   X86Assembler* assembler = GetAssembler();
1735   LocationSummary* locations = invoke->GetLocations();
1736 
1737   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1738   __ testl(string_to_copy, string_to_copy);
1739   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1740   codegen_->AddSlowPath(slow_path);
1741   __ j(kEqual, slow_path->GetEntryLabel());
1742 
1743   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1744   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1745   __ Bind(slow_path->GetExitLabel());
1746 }
1747 
VisitStringGetCharsNoCheck(HInvoke * invoke)1748 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1749   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1750   LocationSummary* locations = new (arena_) LocationSummary(invoke,
1751                                                             LocationSummary::kNoCall,
1752                                                             kIntrinsified);
1753   locations->SetInAt(0, Location::RequiresRegister());
1754   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1755   // Place srcEnd in ECX to save a move below.
1756   locations->SetInAt(2, Location::RegisterLocation(ECX));
1757   locations->SetInAt(3, Location::RequiresRegister());
1758   locations->SetInAt(4, Location::RequiresRegister());
1759 
1760   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1761   // We don't have enough registers to also grab ECX, so handle below.
1762   locations->AddTemp(Location::RegisterLocation(ESI));
1763   locations->AddTemp(Location::RegisterLocation(EDI));
1764 }
1765 
VisitStringGetCharsNoCheck(HInvoke * invoke)1766 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1767   X86Assembler* assembler = GetAssembler();
1768   LocationSummary* locations = invoke->GetLocations();
1769 
1770   size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
1771   // Location of data in char array buffer.
1772   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1773   // Location of char array data in string.
1774   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1775 
1776   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1777   Register obj = locations->InAt(0).AsRegister<Register>();
1778   Location srcBegin = locations->InAt(1);
1779   int srcBegin_value =
1780     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1781   Register srcEnd = locations->InAt(2).AsRegister<Register>();
1782   Register dst = locations->InAt(3).AsRegister<Register>();
1783   Register dstBegin = locations->InAt(4).AsRegister<Register>();
1784 
1785   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1786   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1787   DCHECK_EQ(char_size, 2u);
1788 
1789   // Compute the number of chars (words) to move.
1790   // Save ECX, since we don't know if it will be used later.
1791   __ pushl(ECX);
1792   int stack_adjust = kX86WordSize;
1793   __ cfi().AdjustCFAOffset(stack_adjust);
1794   DCHECK_EQ(srcEnd, ECX);
1795   if (srcBegin.IsConstant()) {
1796     __ subl(ECX, Immediate(srcBegin_value));
1797   } else {
1798     DCHECK(srcBegin.IsRegister());
1799     __ subl(ECX, srcBegin.AsRegister<Register>());
1800   }
1801 
1802   NearLabel done;
1803   if (mirror::kUseStringCompression) {
1804     // Location of count in string
1805     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1806     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1807     DCHECK_EQ(c_char_size, 1u);
1808     __ pushl(EAX);
1809     __ cfi().AdjustCFAOffset(stack_adjust);
1810 
1811     NearLabel copy_loop, copy_uncompressed;
1812     __ testl(Address(obj, count_offset), Immediate(1));
1813     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1814                   "Expecting 0=compressed, 1=uncompressed");
1815     __ j(kNotZero, &copy_uncompressed);
1816     // Compute the address of the source string by adding the number of chars from
1817     // the source beginning to the value offset of a string.
1818     __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1819 
1820     // Start the loop to copy String's value to Array of Char.
1821     __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1822     __ Bind(&copy_loop);
1823     __ jecxz(&done);
1824     // Use EAX temporary (convert byte from ESI to word).
1825     // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1826     __ movzxb(EAX, Address(ESI, 0));
1827     __ movw(Address(EDI, 0), EAX);
1828     __ leal(EDI, Address(EDI, char_size));
1829     __ leal(ESI, Address(ESI, c_char_size));
1830     // TODO: Add support for LOOP to X86Assembler.
1831     __ subl(ECX, Immediate(1));
1832     __ jmp(&copy_loop);
1833     __ Bind(&copy_uncompressed);
1834   }
1835 
1836   // Do the copy for uncompressed string.
1837   // Compute the address of the destination buffer.
1838   __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1839   __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1840   __ rep_movsw();
1841 
1842   __ Bind(&done);
1843   if (mirror::kUseStringCompression) {
1844     // Restore EAX.
1845     __ popl(EAX);
1846     __ cfi().AdjustCFAOffset(-stack_adjust);
1847   }
1848   // Restore ECX.
1849   __ popl(ECX);
1850   __ cfi().AdjustCFAOffset(-stack_adjust);
1851 }
1852 
GenPeek(LocationSummary * locations,Primitive::Type size,X86Assembler * assembler)1853 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1854   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1855   Location out_loc = locations->Out();
1856   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1857   // to avoid a SIGBUS.
1858   switch (size) {
1859     case Primitive::kPrimByte:
1860       __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1861       break;
1862     case Primitive::kPrimShort:
1863       __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1864       break;
1865     case Primitive::kPrimInt:
1866       __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1867       break;
1868     case Primitive::kPrimLong:
1869       __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1870       __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1871       break;
1872     default:
1873       LOG(FATAL) << "Type not recognized for peek: " << size;
1874       UNREACHABLE();
1875   }
1876 }
1877 
VisitMemoryPeekByte(HInvoke * invoke)1878 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1879   CreateLongToIntLocations(arena_, invoke);
1880 }
1881 
VisitMemoryPeekByte(HInvoke * invoke)1882 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1883   GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1884 }
1885 
VisitMemoryPeekIntNative(HInvoke * invoke)1886 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1887   CreateLongToIntLocations(arena_, invoke);
1888 }
1889 
VisitMemoryPeekIntNative(HInvoke * invoke)1890 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1891   GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1892 }
1893 
VisitMemoryPeekLongNative(HInvoke * invoke)1894 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1895   CreateLongToLongLocations(arena_, invoke);
1896 }
1897 
VisitMemoryPeekLongNative(HInvoke * invoke)1898 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1899   GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1900 }
1901 
VisitMemoryPeekShortNative(HInvoke * invoke)1902 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1903   CreateLongToIntLocations(arena_, invoke);
1904 }
1905 
VisitMemoryPeekShortNative(HInvoke * invoke)1906 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1907   GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1908 }
1909 
CreateLongIntToVoidLocations(ArenaAllocator * arena,Primitive::Type size,HInvoke * invoke)1910 static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
1911                                          HInvoke* invoke) {
1912   LocationSummary* locations = new (arena) LocationSummary(invoke,
1913                                                            LocationSummary::kNoCall,
1914                                                            kIntrinsified);
1915   locations->SetInAt(0, Location::RequiresRegister());
1916   HInstruction* value = invoke->InputAt(1);
1917   if (size == Primitive::kPrimByte) {
1918     locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1919   } else {
1920     locations->SetInAt(1, Location::RegisterOrConstant(value));
1921   }
1922 }
1923 
GenPoke(LocationSummary * locations,Primitive::Type size,X86Assembler * assembler)1924 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1925   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1926   Location value_loc = locations->InAt(1);
1927   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1928   // to avoid a SIGBUS.
1929   switch (size) {
1930     case Primitive::kPrimByte:
1931       if (value_loc.IsConstant()) {
1932         __ movb(Address(address, 0),
1933                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1934       } else {
1935         __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1936       }
1937       break;
1938     case Primitive::kPrimShort:
1939       if (value_loc.IsConstant()) {
1940         __ movw(Address(address, 0),
1941                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1942       } else {
1943         __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1944       }
1945       break;
1946     case Primitive::kPrimInt:
1947       if (value_loc.IsConstant()) {
1948         __ movl(Address(address, 0),
1949                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1950       } else {
1951         __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1952       }
1953       break;
1954     case Primitive::kPrimLong:
1955       if (value_loc.IsConstant()) {
1956         int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1957         __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1958         __ movl(Address(address, 4), Immediate(High32Bits(value)));
1959       } else {
1960         __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1961         __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1962       }
1963       break;
1964     default:
1965       LOG(FATAL) << "Type not recognized for poke: " << size;
1966       UNREACHABLE();
1967   }
1968 }
1969 
VisitMemoryPokeByte(HInvoke * invoke)1970 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1971   CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
1972 }
1973 
VisitMemoryPokeByte(HInvoke * invoke)1974 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1975   GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1976 }
1977 
VisitMemoryPokeIntNative(HInvoke * invoke)1978 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1979   CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
1980 }
1981 
VisitMemoryPokeIntNative(HInvoke * invoke)1982 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1983   GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1984 }
1985 
VisitMemoryPokeLongNative(HInvoke * invoke)1986 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1987   CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
1988 }
1989 
VisitMemoryPokeLongNative(HInvoke * invoke)1990 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1991   GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1992 }
1993 
VisitMemoryPokeShortNative(HInvoke * invoke)1994 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1995   CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
1996 }
1997 
VisitMemoryPokeShortNative(HInvoke * invoke)1998 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1999   GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
2000 }
2001 
VisitThreadCurrentThread(HInvoke * invoke)2002 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
2003   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2004                                                             LocationSummary::kNoCall,
2005                                                             kIntrinsified);
2006   locations->SetOut(Location::RequiresRegister());
2007 }
2008 
VisitThreadCurrentThread(HInvoke * invoke)2009 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
2010   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
2011   GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
2012 }
2013 
GenUnsafeGet(HInvoke * invoke,Primitive::Type type,bool is_volatile,CodeGeneratorX86 * codegen)2014 static void GenUnsafeGet(HInvoke* invoke,
2015                          Primitive::Type type,
2016                          bool is_volatile,
2017                          CodeGeneratorX86* codegen) {
2018   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2019   LocationSummary* locations = invoke->GetLocations();
2020   Location base_loc = locations->InAt(1);
2021   Register base = base_loc.AsRegister<Register>();
2022   Location offset_loc = locations->InAt(2);
2023   Register offset = offset_loc.AsRegisterPairLow<Register>();
2024   Location output_loc = locations->Out();
2025 
2026   switch (type) {
2027     case Primitive::kPrimInt: {
2028       Register output = output_loc.AsRegister<Register>();
2029       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2030       break;
2031     }
2032 
2033     case Primitive::kPrimNot: {
2034       Register output = output_loc.AsRegister<Register>();
2035       if (kEmitCompilerReadBarrier) {
2036         if (kUseBakerReadBarrier) {
2037           Address src(base, offset, ScaleFactor::TIMES_1, 0);
2038           codegen->GenerateReferenceLoadWithBakerReadBarrier(
2039               invoke, output_loc, base, src, /* needs_null_check */ false);
2040         } else {
2041           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2042           codegen->GenerateReadBarrierSlow(
2043               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
2044         }
2045       } else {
2046         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2047         __ MaybeUnpoisonHeapReference(output);
2048       }
2049       break;
2050     }
2051 
2052     case Primitive::kPrimLong: {
2053         Register output_lo = output_loc.AsRegisterPairLow<Register>();
2054         Register output_hi = output_loc.AsRegisterPairHigh<Register>();
2055         if (is_volatile) {
2056           // Need to use a XMM to read atomically.
2057           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2058           __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
2059           __ movd(output_lo, temp);
2060           __ psrlq(temp, Immediate(32));
2061           __ movd(output_hi, temp);
2062         } else {
2063           __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
2064           __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
2065         }
2066       }
2067       break;
2068 
2069     default:
2070       LOG(FATAL) << "Unsupported op size " << type;
2071       UNREACHABLE();
2072   }
2073 }
2074 
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type,bool is_volatile)2075 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
2076                                           HInvoke* invoke,
2077                                           Primitive::Type type,
2078                                           bool is_volatile) {
2079   bool can_call = kEmitCompilerReadBarrier &&
2080       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
2081        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
2082   LocationSummary* locations = new (arena) LocationSummary(invoke,
2083                                                            (can_call
2084                                                                 ? LocationSummary::kCallOnSlowPath
2085                                                                 : LocationSummary::kNoCall),
2086                                                            kIntrinsified);
2087   if (can_call && kUseBakerReadBarrier) {
2088     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2089   }
2090   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2091   locations->SetInAt(1, Location::RequiresRegister());
2092   locations->SetInAt(2, Location::RequiresRegister());
2093   if (type == Primitive::kPrimLong) {
2094     if (is_volatile) {
2095       // Need to use XMM to read volatile.
2096       locations->AddTemp(Location::RequiresFpuRegister());
2097       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2098     } else {
2099       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2100     }
2101   } else {
2102     locations->SetOut(Location::RequiresRegister(),
2103                       (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2104   }
2105 }
2106 
VisitUnsafeGet(HInvoke * invoke)2107 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
2108   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
2109 }
VisitUnsafeGetVolatile(HInvoke * invoke)2110 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
2111   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
2112 }
VisitUnsafeGetLong(HInvoke * invoke)2113 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
2114   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
2115 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2116 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2117   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
2118 }
VisitUnsafeGetObject(HInvoke * invoke)2119 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
2120   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
2121 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2122 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2123   CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
2124 }
2125 
2126 
VisitUnsafeGet(HInvoke * invoke)2127 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
2128   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2129 }
VisitUnsafeGetVolatile(HInvoke * invoke)2130 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
2131   GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
2132 }
VisitUnsafeGetLong(HInvoke * invoke)2133 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
2134   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2135 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2136 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2137   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
2138 }
VisitUnsafeGetObject(HInvoke * invoke)2139 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
2140   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2141 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2142 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2143   GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
2144 }
2145 
2146 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke,bool is_volatile)2147 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
2148                                                        Primitive::Type type,
2149                                                        HInvoke* invoke,
2150                                                        bool is_volatile) {
2151   LocationSummary* locations = new (arena) LocationSummary(invoke,
2152                                                            LocationSummary::kNoCall,
2153                                                            kIntrinsified);
2154   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2155   locations->SetInAt(1, Location::RequiresRegister());
2156   locations->SetInAt(2, Location::RequiresRegister());
2157   locations->SetInAt(3, Location::RequiresRegister());
2158   if (type == Primitive::kPrimNot) {
2159     // Need temp registers for card-marking.
2160     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2161     // Ensure the value is in a byte register.
2162     locations->AddTemp(Location::RegisterLocation(ECX));
2163   } else if (type == Primitive::kPrimLong && is_volatile) {
2164     locations->AddTemp(Location::RequiresFpuRegister());
2165     locations->AddTemp(Location::RequiresFpuRegister());
2166   }
2167 }
2168 
VisitUnsafePut(HInvoke * invoke)2169 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
2170   CreateIntIntIntIntToVoidPlusTempsLocations(
2171       arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
2172 }
VisitUnsafePutOrdered(HInvoke * invoke)2173 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2174   CreateIntIntIntIntToVoidPlusTempsLocations(
2175       arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
2176 }
VisitUnsafePutVolatile(HInvoke * invoke)2177 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2178   CreateIntIntIntIntToVoidPlusTempsLocations(
2179       arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true);
2180 }
VisitUnsafePutObject(HInvoke * invoke)2181 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
2182   CreateIntIntIntIntToVoidPlusTempsLocations(
2183       arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
2184 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2185 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2186   CreateIntIntIntIntToVoidPlusTempsLocations(
2187       arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
2188 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2189 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2190   CreateIntIntIntIntToVoidPlusTempsLocations(
2191       arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true);
2192 }
VisitUnsafePutLong(HInvoke * invoke)2193 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
2194   CreateIntIntIntIntToVoidPlusTempsLocations(
2195       arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
2196 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2197 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2198   CreateIntIntIntIntToVoidPlusTempsLocations(
2199       arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
2200 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2201 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2202   CreateIntIntIntIntToVoidPlusTempsLocations(
2203       arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true);
2204 }
2205 
2206 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2207 // memory model.
GenUnsafePut(LocationSummary * locations,Primitive::Type type,bool is_volatile,CodeGeneratorX86 * codegen)2208 static void GenUnsafePut(LocationSummary* locations,
2209                          Primitive::Type type,
2210                          bool is_volatile,
2211                          CodeGeneratorX86* codegen) {
2212   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2213   Register base = locations->InAt(1).AsRegister<Register>();
2214   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2215   Location value_loc = locations->InAt(3);
2216 
2217   if (type == Primitive::kPrimLong) {
2218     Register value_lo = value_loc.AsRegisterPairLow<Register>();
2219     Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2220     if (is_volatile) {
2221       XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2222       XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2223       __ movd(temp1, value_lo);
2224       __ movd(temp2, value_hi);
2225       __ punpckldq(temp1, temp2);
2226       __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2227     } else {
2228       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2229       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2230     }
2231   } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
2232     Register temp = locations->GetTemp(0).AsRegister<Register>();
2233     __ movl(temp, value_loc.AsRegister<Register>());
2234     __ PoisonHeapReference(temp);
2235     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2236   } else {
2237     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2238   }
2239 
2240   if (is_volatile) {
2241     codegen->MemoryFence();
2242   }
2243 
2244   if (type == Primitive::kPrimNot) {
2245     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2246     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2247                         locations->GetTemp(1).AsRegister<Register>(),
2248                         base,
2249                         value_loc.AsRegister<Register>(),
2250                         value_can_be_null);
2251   }
2252 }
2253 
VisitUnsafePut(HInvoke * invoke)2254 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
2255   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2256 }
VisitUnsafePutOrdered(HInvoke * invoke)2257 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2258   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2259 }
VisitUnsafePutVolatile(HInvoke * invoke)2260 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2261   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
2262 }
VisitUnsafePutObject(HInvoke * invoke)2263 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
2264   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2265 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2266 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2267   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2268 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2269 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2270   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
2271 }
VisitUnsafePutLong(HInvoke * invoke)2272 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
2273   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2274 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2275 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2276   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2277 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2278 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2279   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
2280 }
2281 
CreateIntIntIntIntIntToInt(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)2282 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
2283                                        Primitive::Type type,
2284                                        HInvoke* invoke) {
2285   bool can_call = kEmitCompilerReadBarrier &&
2286       kUseBakerReadBarrier &&
2287       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
2288   LocationSummary* locations = new (arena) LocationSummary(invoke,
2289                                                            (can_call
2290                                                                 ? LocationSummary::kCallOnSlowPath
2291                                                                 : LocationSummary::kNoCall),
2292                                                            kIntrinsified);
2293   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2294   locations->SetInAt(1, Location::RequiresRegister());
2295   // Offset is a long, but in 32 bit mode, we only need the low word.
2296   // Can we update the invoke here to remove a TypeConvert to Long?
2297   locations->SetInAt(2, Location::RequiresRegister());
2298   // Expected value must be in EAX or EDX:EAX.
2299   // For long, new value must be in ECX:EBX.
2300   if (type == Primitive::kPrimLong) {
2301     locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2302     locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2303   } else {
2304     locations->SetInAt(3, Location::RegisterLocation(EAX));
2305     locations->SetInAt(4, Location::RequiresRegister());
2306   }
2307 
2308   // Force a byte register for the output.
2309   locations->SetOut(Location::RegisterLocation(EAX));
2310   if (type == Primitive::kPrimNot) {
2311     // Need temporary registers for card-marking, and possibly for
2312     // (Baker) read barrier.
2313     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2314     // Need a byte register for marking.
2315     locations->AddTemp(Location::RegisterLocation(ECX));
2316   }
2317 }
2318 
VisitUnsafeCASInt(HInvoke * invoke)2319 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2320   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
2321 }
2322 
VisitUnsafeCASLong(HInvoke * invoke)2323 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2324   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
2325 }
2326 
VisitUnsafeCASObject(HInvoke * invoke)2327 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
2328   // The only read barrier implementation supporting the
2329   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2330   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2331     return;
2332   }
2333 
2334   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
2335 }
2336 
GenCAS(Primitive::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2337 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2338   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2339   LocationSummary* locations = invoke->GetLocations();
2340 
2341   Register base = locations->InAt(1).AsRegister<Register>();
2342   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2343   Location out = locations->Out();
2344   DCHECK_EQ(out.AsRegister<Register>(), EAX);
2345 
2346   // The address of the field within the holding object.
2347   Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
2348 
2349   if (type == Primitive::kPrimNot) {
2350     // The only read barrier implementation supporting the
2351     // UnsafeCASObject intrinsic is the Baker-style read barriers.
2352     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2353 
2354     Location temp1_loc = locations->GetTemp(0);
2355     Register temp1 = temp1_loc.AsRegister<Register>();
2356     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2357 
2358     Register expected = locations->InAt(3).AsRegister<Register>();
2359     // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
2360     DCHECK_EQ(expected, EAX);
2361     Register value = locations->InAt(4).AsRegister<Register>();
2362 
2363     // Mark card for object assuming new value is stored.
2364     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2365     codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
2366 
2367     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2368       // Need to make sure the reference stored in the field is a to-space
2369       // one before attempting the CAS or the CAS could fail incorrectly.
2370       codegen->GenerateReferenceLoadWithBakerReadBarrier(
2371           invoke,
2372           temp1_loc,  // Unused, used only as a "temporary" within the read barrier.
2373           base,
2374           field_addr,
2375           /* needs_null_check */ false,
2376           /* always_update_field */ true,
2377           &temp2);
2378     }
2379 
2380     bool base_equals_value = (base == value);
2381     if (kPoisonHeapReferences) {
2382       if (base_equals_value) {
2383         // If `base` and `value` are the same register location, move
2384         // `value` to a temporary register.  This way, poisoning
2385         // `value` won't invalidate `base`.
2386         value = temp1;
2387         __ movl(value, base);
2388       }
2389 
2390       // Check that the register allocator did not assign the location
2391       // of `expected` (EAX) to `value` nor to `base`, so that heap
2392       // poisoning (when enabled) works as intended below.
2393       // - If `value` were equal to `expected`, both references would
2394       //   be poisoned twice, meaning they would not be poisoned at
2395       //   all, as heap poisoning uses address negation.
2396       // - If `base` were equal to `expected`, poisoning `expected`
2397       //   would invalidate `base`.
2398       DCHECK_NE(value, expected);
2399       DCHECK_NE(base, expected);
2400 
2401       __ PoisonHeapReference(expected);
2402       __ PoisonHeapReference(value);
2403     }
2404 
2405     __ LockCmpxchgl(field_addr, value);
2406 
2407     // LOCK CMPXCHG has full barrier semantics, and we don't need
2408     // scheduling barriers at this time.
2409 
2410     // Convert ZF into the Boolean result.
2411     __ setb(kZero, out.AsRegister<Register>());
2412     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2413 
2414     // If heap poisoning is enabled, we need to unpoison the values
2415     // that were poisoned earlier.
2416     if (kPoisonHeapReferences) {
2417       if (base_equals_value) {
2418         // `value` has been moved to a temporary register, no need to
2419         // unpoison it.
2420       } else {
2421         // Ensure `value` is different from `out`, so that unpoisoning
2422         // the former does not invalidate the latter.
2423         DCHECK_NE(value, out.AsRegister<Register>());
2424         __ UnpoisonHeapReference(value);
2425       }
2426       // Do not unpoison the reference contained in register
2427       // `expected`, as it is the same as register `out` (EAX).
2428     }
2429   } else {
2430     if (type == Primitive::kPrimInt) {
2431       // Ensure the expected value is in EAX (required by the CMPXCHG
2432       // instruction).
2433       DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
2434       __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
2435     } else if (type == Primitive::kPrimLong) {
2436       // Ensure the expected value is in EAX:EDX and that the new
2437       // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2438       DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
2439       DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
2440       DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
2441       DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
2442       __ LockCmpxchg8b(field_addr);
2443     } else {
2444       LOG(FATAL) << "Unexpected CAS type " << type;
2445     }
2446 
2447     // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2448     // don't need scheduling barriers at this time.
2449 
2450     // Convert ZF into the Boolean result.
2451     __ setb(kZero, out.AsRegister<Register>());
2452     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2453   }
2454 }
2455 
VisitUnsafeCASInt(HInvoke * invoke)2456 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2457   GenCAS(Primitive::kPrimInt, invoke, codegen_);
2458 }
2459 
VisitUnsafeCASLong(HInvoke * invoke)2460 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2461   GenCAS(Primitive::kPrimLong, invoke, codegen_);
2462 }
2463 
VisitUnsafeCASObject(HInvoke * invoke)2464 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2465   // The only read barrier implementation supporting the
2466   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2467   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2468 
2469   GenCAS(Primitive::kPrimNot, invoke, codegen_);
2470 }
2471 
VisitIntegerReverse(HInvoke * invoke)2472 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2473   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2474                                                            LocationSummary::kNoCall,
2475                                                            kIntrinsified);
2476   locations->SetInAt(0, Location::RequiresRegister());
2477   locations->SetOut(Location::SameAsFirstInput());
2478   locations->AddTemp(Location::RequiresRegister());
2479 }
2480 
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2481 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2482                      X86Assembler* assembler) {
2483   Immediate imm_shift(shift);
2484   Immediate imm_mask(mask);
2485   __ movl(temp, reg);
2486   __ shrl(reg, imm_shift);
2487   __ andl(temp, imm_mask);
2488   __ andl(reg, imm_mask);
2489   __ shll(temp, imm_shift);
2490   __ orl(reg, temp);
2491 }
2492 
VisitIntegerReverse(HInvoke * invoke)2493 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2494   X86Assembler* assembler = GetAssembler();
2495   LocationSummary* locations = invoke->GetLocations();
2496 
2497   Register reg = locations->InAt(0).AsRegister<Register>();
2498   Register temp = locations->GetTemp(0).AsRegister<Register>();
2499 
2500   /*
2501    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2502    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2503    * compared to generic luni implementation which has 5 rounds of swapping bits.
2504    * x = bswap x
2505    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2506    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2507    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2508    */
2509   __ bswapl(reg);
2510   SwapBits(reg, temp, 1, 0x55555555, assembler);
2511   SwapBits(reg, temp, 2, 0x33333333, assembler);
2512   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2513 }
2514 
VisitLongReverse(HInvoke * invoke)2515 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2516   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2517                                                            LocationSummary::kNoCall,
2518                                                            kIntrinsified);
2519   locations->SetInAt(0, Location::RequiresRegister());
2520   locations->SetOut(Location::SameAsFirstInput());
2521   locations->AddTemp(Location::RequiresRegister());
2522 }
2523 
VisitLongReverse(HInvoke * invoke)2524 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2525   X86Assembler* assembler = GetAssembler();
2526   LocationSummary* locations = invoke->GetLocations();
2527 
2528   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2529   Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2530   Register temp = locations->GetTemp(0).AsRegister<Register>();
2531 
2532   // We want to swap high/low, then bswap each one, and then do the same
2533   // as a 32 bit reverse.
2534   // Exchange high and low.
2535   __ movl(temp, reg_low);
2536   __ movl(reg_low, reg_high);
2537   __ movl(reg_high, temp);
2538 
2539   // bit-reverse low
2540   __ bswapl(reg_low);
2541   SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2542   SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2543   SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2544 
2545   // bit-reverse high
2546   __ bswapl(reg_high);
2547   SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2548   SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2549   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2550 }
2551 
CreateBitCountLocations(ArenaAllocator * arena,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2552 static void CreateBitCountLocations(
2553     ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2554   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2555     // Do nothing if there is no popcnt support. This results in generating
2556     // a call for the intrinsic rather than direct code.
2557     return;
2558   }
2559   LocationSummary* locations = new (arena) LocationSummary(invoke,
2560                                                            LocationSummary::kNoCall,
2561                                                            kIntrinsified);
2562   if (is_long) {
2563     locations->AddTemp(Location::RequiresRegister());
2564   }
2565   locations->SetInAt(0, Location::Any());
2566   locations->SetOut(Location::RequiresRegister());
2567 }
2568 
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2569 static void GenBitCount(X86Assembler* assembler,
2570                         CodeGeneratorX86* codegen,
2571                         HInvoke* invoke, bool is_long) {
2572   LocationSummary* locations = invoke->GetLocations();
2573   Location src = locations->InAt(0);
2574   Register out = locations->Out().AsRegister<Register>();
2575 
2576   if (invoke->InputAt(0)->IsConstant()) {
2577     // Evaluate this at compile time.
2578     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2579     int32_t result = is_long
2580         ? POPCOUNT(static_cast<uint64_t>(value))
2581         : POPCOUNT(static_cast<uint32_t>(value));
2582     codegen->Load32BitValue(out, result);
2583     return;
2584   }
2585 
2586   // Handle the non-constant cases.
2587   if (!is_long) {
2588     if (src.IsRegister()) {
2589       __ popcntl(out, src.AsRegister<Register>());
2590     } else {
2591       DCHECK(src.IsStackSlot());
2592       __ popcntl(out, Address(ESP, src.GetStackIndex()));
2593     }
2594   } else {
2595     // The 64-bit case needs to worry about two parts.
2596     Register temp = locations->GetTemp(0).AsRegister<Register>();
2597     if (src.IsRegisterPair()) {
2598       __ popcntl(temp, src.AsRegisterPairLow<Register>());
2599       __ popcntl(out, src.AsRegisterPairHigh<Register>());
2600     } else {
2601       DCHECK(src.IsDoubleStackSlot());
2602       __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2603       __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2604     }
2605     __ addl(out, temp);
2606   }
2607 }
2608 
VisitIntegerBitCount(HInvoke * invoke)2609 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2610   CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
2611 }
2612 
VisitIntegerBitCount(HInvoke * invoke)2613 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2614   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
2615 }
2616 
VisitLongBitCount(HInvoke * invoke)2617 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2618   CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
2619 }
2620 
VisitLongBitCount(HInvoke * invoke)2621 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2622   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
2623 }
2624 
CreateLeadingZeroLocations(ArenaAllocator * arena,HInvoke * invoke,bool is_long)2625 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2626   LocationSummary* locations = new (arena) LocationSummary(invoke,
2627                                                            LocationSummary::kNoCall,
2628                                                            kIntrinsified);
2629   if (is_long) {
2630     locations->SetInAt(0, Location::RequiresRegister());
2631   } else {
2632     locations->SetInAt(0, Location::Any());
2633   }
2634   locations->SetOut(Location::RequiresRegister());
2635 }
2636 
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2637 static void GenLeadingZeros(X86Assembler* assembler,
2638                             CodeGeneratorX86* codegen,
2639                             HInvoke* invoke, bool is_long) {
2640   LocationSummary* locations = invoke->GetLocations();
2641   Location src = locations->InAt(0);
2642   Register out = locations->Out().AsRegister<Register>();
2643 
2644   if (invoke->InputAt(0)->IsConstant()) {
2645     // Evaluate this at compile time.
2646     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2647     if (value == 0) {
2648       value = is_long ? 64 : 32;
2649     } else {
2650       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2651     }
2652     codegen->Load32BitValue(out, value);
2653     return;
2654   }
2655 
2656   // Handle the non-constant cases.
2657   if (!is_long) {
2658     if (src.IsRegister()) {
2659       __ bsrl(out, src.AsRegister<Register>());
2660     } else {
2661       DCHECK(src.IsStackSlot());
2662       __ bsrl(out, Address(ESP, src.GetStackIndex()));
2663     }
2664 
2665     // BSR sets ZF if the input was zero, and the output is undefined.
2666     NearLabel all_zeroes, done;
2667     __ j(kEqual, &all_zeroes);
2668 
2669     // Correct the result from BSR to get the final CLZ result.
2670     __ xorl(out, Immediate(31));
2671     __ jmp(&done);
2672 
2673     // Fix the zero case with the expected result.
2674     __ Bind(&all_zeroes);
2675     __ movl(out, Immediate(32));
2676 
2677     __ Bind(&done);
2678     return;
2679   }
2680 
2681   // 64 bit case needs to worry about both parts of the register.
2682   DCHECK(src.IsRegisterPair());
2683   Register src_lo = src.AsRegisterPairLow<Register>();
2684   Register src_hi = src.AsRegisterPairHigh<Register>();
2685   NearLabel handle_low, done, all_zeroes;
2686 
2687   // Is the high word zero?
2688   __ testl(src_hi, src_hi);
2689   __ j(kEqual, &handle_low);
2690 
2691   // High word is not zero. We know that the BSR result is defined in this case.
2692   __ bsrl(out, src_hi);
2693 
2694   // Correct the result from BSR to get the final CLZ result.
2695   __ xorl(out, Immediate(31));
2696   __ jmp(&done);
2697 
2698   // High word was zero.  We have to compute the low word count and add 32.
2699   __ Bind(&handle_low);
2700   __ bsrl(out, src_lo);
2701   __ j(kEqual, &all_zeroes);
2702 
2703   // We had a valid result.  Use an XOR to both correct the result and add 32.
2704   __ xorl(out, Immediate(63));
2705   __ jmp(&done);
2706 
2707   // All zero case.
2708   __ Bind(&all_zeroes);
2709   __ movl(out, Immediate(64));
2710 
2711   __ Bind(&done);
2712 }
2713 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2714 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2715   CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false);
2716 }
2717 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2718 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2719   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2720 }
2721 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2722 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2723   CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true);
2724 }
2725 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2726 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2727   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2728 }
2729 
CreateTrailingZeroLocations(ArenaAllocator * arena,HInvoke * invoke,bool is_long)2730 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2731   LocationSummary* locations = new (arena) LocationSummary(invoke,
2732                                                            LocationSummary::kNoCall,
2733                                                            kIntrinsified);
2734   if (is_long) {
2735     locations->SetInAt(0, Location::RequiresRegister());
2736   } else {
2737     locations->SetInAt(0, Location::Any());
2738   }
2739   locations->SetOut(Location::RequiresRegister());
2740 }
2741 
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2742 static void GenTrailingZeros(X86Assembler* assembler,
2743                              CodeGeneratorX86* codegen,
2744                              HInvoke* invoke, bool is_long) {
2745   LocationSummary* locations = invoke->GetLocations();
2746   Location src = locations->InAt(0);
2747   Register out = locations->Out().AsRegister<Register>();
2748 
2749   if (invoke->InputAt(0)->IsConstant()) {
2750     // Evaluate this at compile time.
2751     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2752     if (value == 0) {
2753       value = is_long ? 64 : 32;
2754     } else {
2755       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2756     }
2757     codegen->Load32BitValue(out, value);
2758     return;
2759   }
2760 
2761   // Handle the non-constant cases.
2762   if (!is_long) {
2763     if (src.IsRegister()) {
2764       __ bsfl(out, src.AsRegister<Register>());
2765     } else {
2766       DCHECK(src.IsStackSlot());
2767       __ bsfl(out, Address(ESP, src.GetStackIndex()));
2768     }
2769 
2770     // BSF sets ZF if the input was zero, and the output is undefined.
2771     NearLabel done;
2772     __ j(kNotEqual, &done);
2773 
2774     // Fix the zero case with the expected result.
2775     __ movl(out, Immediate(32));
2776 
2777     __ Bind(&done);
2778     return;
2779   }
2780 
2781   // 64 bit case needs to worry about both parts of the register.
2782   DCHECK(src.IsRegisterPair());
2783   Register src_lo = src.AsRegisterPairLow<Register>();
2784   Register src_hi = src.AsRegisterPairHigh<Register>();
2785   NearLabel done, all_zeroes;
2786 
2787   // If the low word is zero, then ZF will be set.  If not, we have the answer.
2788   __ bsfl(out, src_lo);
2789   __ j(kNotEqual, &done);
2790 
2791   // Low word was zero.  We have to compute the high word count and add 32.
2792   __ bsfl(out, src_hi);
2793   __ j(kEqual, &all_zeroes);
2794 
2795   // We had a valid result.  Add 32 to account for the low word being zero.
2796   __ addl(out, Immediate(32));
2797   __ jmp(&done);
2798 
2799   // All zero case.
2800   __ Bind(&all_zeroes);
2801   __ movl(out, Immediate(64));
2802 
2803   __ Bind(&done);
2804 }
2805 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2806 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2807   CreateTrailingZeroLocations(arena_, invoke, /* is_long */ false);
2808 }
2809 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2810 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2811   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2812 }
2813 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2814 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2815   CreateTrailingZeroLocations(arena_, invoke, /* is_long */ true);
2816 }
2817 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2818 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2819   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2820 }
2821 
VisitReferenceGetReferent(HInvoke * invoke)2822 void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
2823   if (kEmitCompilerReadBarrier) {
2824     // Do not intrinsify this call with the read barrier configuration.
2825     return;
2826   }
2827   LocationSummary* locations = new (arena_) LocationSummary(invoke,
2828                                                             LocationSummary::kCallOnSlowPath,
2829                                                             kIntrinsified);
2830   locations->SetInAt(0, Location::RequiresRegister());
2831   locations->SetOut(Location::SameAsFirstInput());
2832   locations->AddTemp(Location::RequiresRegister());
2833 }
2834 
VisitReferenceGetReferent(HInvoke * invoke)2835 void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
2836   DCHECK(!kEmitCompilerReadBarrier);
2837   LocationSummary* locations = invoke->GetLocations();
2838   X86Assembler* assembler = GetAssembler();
2839 
2840   Register obj = locations->InAt(0).AsRegister<Register>();
2841   Register out = locations->Out().AsRegister<Register>();
2842 
2843   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
2844   codegen_->AddSlowPath(slow_path);
2845 
2846   // Load ArtMethod first.
2847   HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2848   DCHECK(invoke_direct != nullptr);
2849   Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
2850       invoke_direct, locations->GetTemp(0));
2851   DCHECK(temp_loc.Equals(locations->GetTemp(0)));
2852   Register temp = temp_loc.AsRegister<Register>();
2853 
2854   // Now get declaring class.
2855   __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
2856 
2857   uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2858   uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2859   DCHECK_NE(slow_path_flag_offset, 0u);
2860   DCHECK_NE(disable_flag_offset, 0u);
2861   DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
2862 
2863   // Check static flags preventing us for using intrinsic.
2864   if (slow_path_flag_offset == disable_flag_offset + 1) {
2865     __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
2866     __ j(kNotEqual, slow_path->GetEntryLabel());
2867   } else {
2868     __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
2869     __ j(kNotEqual, slow_path->GetEntryLabel());
2870     __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
2871     __ j(kNotEqual, slow_path->GetEntryLabel());
2872   }
2873 
2874   // Fast path.
2875   __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
2876   codegen_->MaybeRecordImplicitNullCheck(invoke);
2877   __ MaybeUnpoisonHeapReference(out);
2878   __ Bind(slow_path->GetExitLabel());
2879 }
2880 
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)2881 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2882   return instruction->InputAt(input0) == instruction->InputAt(input1);
2883 }
2884 
2885 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(X86Assembler * assembler,Primitive::Type type,const Register & array,const Location & pos,const Register & base)2886 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
2887                                           Primitive::Type type,
2888                                           const Register& array,
2889                                           const Location& pos,
2890                                           const Register& base) {
2891   // This routine is only used by the SystemArrayCopy intrinsic at the
2892   // moment. We can allow Primitive::kPrimNot as `type` to implement
2893   // the SystemArrayCopyChar intrinsic.
2894   DCHECK_EQ(type, Primitive::kPrimNot);
2895   const int32_t element_size = Primitive::ComponentSize(type);
2896   const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
2897   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2898 
2899   if (pos.IsConstant()) {
2900     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2901     __ leal(base, Address(array, element_size * constant + data_offset));
2902   } else {
2903     __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
2904   }
2905 }
2906 
2907 // Compute end source address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(X86Assembler * assembler,Primitive::Type type,const Location & copy_length,const Register & base,const Register & end)2908 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
2909                                          Primitive::Type type,
2910                                          const Location& copy_length,
2911                                          const Register& base,
2912                                          const Register& end) {
2913   // This routine is only used by the SystemArrayCopy intrinsic at the
2914   // moment. We can allow Primitive::kPrimNot as `type` to implement
2915   // the SystemArrayCopyChar intrinsic.
2916   DCHECK_EQ(type, Primitive::kPrimNot);
2917   const int32_t element_size = Primitive::ComponentSize(type);
2918   const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
2919 
2920   if (copy_length.IsConstant()) {
2921     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2922     __ leal(end, Address(base, element_size * constant));
2923   } else {
2924     __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
2925   }
2926 }
2927 
VisitSystemArrayCopy(HInvoke * invoke)2928 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
2929   // The only read barrier implementation supporting the
2930   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2931   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2932     return;
2933   }
2934 
2935   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2936   if (invoke->GetLocations() != nullptr) {
2937     // Need a byte register for marking.
2938     invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2939 
2940     static constexpr size_t kSrc = 0;
2941     static constexpr size_t kSrcPos = 1;
2942     static constexpr size_t kDest = 2;
2943     static constexpr size_t kDestPos = 3;
2944     static constexpr size_t kLength = 4;
2945 
2946     if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2947         !invoke->InputAt(kDestPos)->IsIntConstant() &&
2948         !invoke->InputAt(kLength)->IsIntConstant()) {
2949       if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2950           !IsSameInput(invoke, kSrcPos, kLength) &&
2951           !IsSameInput(invoke, kDestPos, kLength) &&
2952           !IsSameInput(invoke, kSrc, kDest)) {
2953         // Not enough registers, make the length also take a stack slot.
2954         invoke->GetLocations()->SetInAt(kLength, Location::Any());
2955       }
2956     }
2957   }
2958 }
2959 
VisitSystemArrayCopy(HInvoke * invoke)2960 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
2961   // The only read barrier implementation supporting the
2962   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2963   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2964 
2965   X86Assembler* assembler = GetAssembler();
2966   LocationSummary* locations = invoke->GetLocations();
2967 
2968   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2969   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2970   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2971   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2972   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2973 
2974   Register src = locations->InAt(0).AsRegister<Register>();
2975   Location src_pos = locations->InAt(1);
2976   Register dest = locations->InAt(2).AsRegister<Register>();
2977   Location dest_pos = locations->InAt(3);
2978   Location length_arg = locations->InAt(4);
2979   Location length = length_arg;
2980   Location temp1_loc = locations->GetTemp(0);
2981   Register temp1 = temp1_loc.AsRegister<Register>();
2982   Location temp2_loc = locations->GetTemp(1);
2983   Register temp2 = temp2_loc.AsRegister<Register>();
2984 
2985   SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
2986   codegen_->AddSlowPath(intrinsic_slow_path);
2987 
2988   NearLabel conditions_on_positions_validated;
2989   SystemArrayCopyOptimizations optimizations(invoke);
2990 
2991   // If source and destination are the same, we go to slow path if we need to do
2992   // forward copying.
2993   if (src_pos.IsConstant()) {
2994     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2995     if (dest_pos.IsConstant()) {
2996       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2997       if (optimizations.GetDestinationIsSource()) {
2998         // Checked when building locations.
2999         DCHECK_GE(src_pos_constant, dest_pos_constant);
3000       } else if (src_pos_constant < dest_pos_constant) {
3001         __ cmpl(src, dest);
3002         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3003       }
3004     } else {
3005       if (!optimizations.GetDestinationIsSource()) {
3006         __ cmpl(src, dest);
3007         __ j(kNotEqual, &conditions_on_positions_validated);
3008       }
3009       __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
3010       __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
3011     }
3012   } else {
3013     if (!optimizations.GetDestinationIsSource()) {
3014       __ cmpl(src, dest);
3015       __ j(kNotEqual, &conditions_on_positions_validated);
3016     }
3017     if (dest_pos.IsConstant()) {
3018       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
3019       __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
3020       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
3021     } else {
3022       __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
3023       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
3024     }
3025   }
3026 
3027   __ Bind(&conditions_on_positions_validated);
3028 
3029   if (!optimizations.GetSourceIsNotNull()) {
3030     // Bail out if the source is null.
3031     __ testl(src, src);
3032     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3033   }
3034 
3035   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
3036     // Bail out if the destination is null.
3037     __ testl(dest, dest);
3038     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3039   }
3040 
3041   Location temp3_loc = locations->GetTemp(2);
3042   Register temp3 = temp3_loc.AsRegister<Register>();
3043   if (length.IsStackSlot()) {
3044     __ movl(temp3, Address(ESP, length.GetStackIndex()));
3045     length = Location::RegisterLocation(temp3);
3046   }
3047 
3048   // If the length is negative, bail out.
3049   // We have already checked in the LocationsBuilder for the constant case.
3050   if (!length.IsConstant() &&
3051       !optimizations.GetCountIsSourceLength() &&
3052       !optimizations.GetCountIsDestinationLength()) {
3053     __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
3054     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
3055   }
3056 
3057   // Validity checks: source.
3058   CheckPosition(assembler,
3059                 src_pos,
3060                 src,
3061                 length,
3062                 intrinsic_slow_path,
3063                 temp1,
3064                 optimizations.GetCountIsSourceLength());
3065 
3066   // Validity checks: dest.
3067   CheckPosition(assembler,
3068                 dest_pos,
3069                 dest,
3070                 length,
3071                 intrinsic_slow_path,
3072                 temp1,
3073                 optimizations.GetCountIsDestinationLength());
3074 
3075   if (!optimizations.GetDoesNotNeedTypeCheck()) {
3076     // Check whether all elements of the source array are assignable to the component
3077     // type of the destination array. We do two checks: the classes are the same,
3078     // or the destination is Object[]. If none of these checks succeed, we go to the
3079     // slow path.
3080 
3081     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3082       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3083         // /* HeapReference<Class> */ temp1 = src->klass_
3084         codegen_->GenerateFieldLoadWithBakerReadBarrier(
3085             invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
3086         // Bail out if the source is not a non primitive array.
3087         // /* HeapReference<Class> */ temp1 = temp1->component_type_
3088         codegen_->GenerateFieldLoadWithBakerReadBarrier(
3089             invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
3090         __ testl(temp1, temp1);
3091         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3092         // If heap poisoning is enabled, `temp1` has been unpoisoned
3093         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3094       } else {
3095         // /* HeapReference<Class> */ temp1 = src->klass_
3096         __ movl(temp1, Address(src, class_offset));
3097         __ MaybeUnpoisonHeapReference(temp1);
3098         // Bail out if the source is not a non primitive array.
3099         // /* HeapReference<Class> */ temp1 = temp1->component_type_
3100         __ movl(temp1, Address(temp1, component_offset));
3101         __ testl(temp1, temp1);
3102         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3103         __ MaybeUnpoisonHeapReference(temp1);
3104       }
3105       __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
3106       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3107     }
3108 
3109     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3110       if (length.Equals(Location::RegisterLocation(temp3))) {
3111         // When Baker read barriers are enabled, register `temp3`,
3112         // which in the present case contains the `length` parameter,
3113         // will be overwritten below.  Make the `length` location
3114         // reference the original stack location; it will be moved
3115         // back to `temp3` later if necessary.
3116         DCHECK(length_arg.IsStackSlot());
3117         length = length_arg;
3118       }
3119 
3120       // /* HeapReference<Class> */ temp1 = dest->klass_
3121       codegen_->GenerateFieldLoadWithBakerReadBarrier(
3122           invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
3123 
3124       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3125         // Bail out if the destination is not a non primitive array.
3126         //
3127         // Register `temp1` is not trashed by the read barrier emitted
3128         // by GenerateFieldLoadWithBakerReadBarrier below, as that
3129         // method produces a call to a ReadBarrierMarkRegX entry point,
3130         // which saves all potentially live registers, including
3131         // temporaries such a `temp1`.
3132         // /* HeapReference<Class> */ temp2 = temp1->component_type_
3133         codegen_->GenerateFieldLoadWithBakerReadBarrier(
3134             invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false);
3135         __ testl(temp2, temp2);
3136         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3137         // If heap poisoning is enabled, `temp2` has been unpoisoned
3138         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3139         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
3140         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3141       }
3142 
3143       // For the same reason given earlier, `temp1` is not trashed by the
3144       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
3145       // /* HeapReference<Class> */ temp2 = src->klass_
3146       codegen_->GenerateFieldLoadWithBakerReadBarrier(
3147           invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
3148       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
3149       __ cmpl(temp1, temp2);
3150 
3151       if (optimizations.GetDestinationIsTypedObjectArray()) {
3152         NearLabel do_copy;
3153         __ j(kEqual, &do_copy);
3154         // /* HeapReference<Class> */ temp1 = temp1->component_type_
3155         codegen_->GenerateFieldLoadWithBakerReadBarrier(
3156             invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
3157         // We do not need to emit a read barrier for the following
3158         // heap reference load, as `temp1` is only used in a
3159         // comparison with null below, and this reference is not
3160         // kept afterwards.
3161         __ cmpl(Address(temp1, super_offset), Immediate(0));
3162         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3163         __ Bind(&do_copy);
3164       } else {
3165         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3166       }
3167     } else {
3168       // Non read barrier code.
3169 
3170       // /* HeapReference<Class> */ temp1 = dest->klass_
3171       __ movl(temp1, Address(dest, class_offset));
3172       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3173         __ MaybeUnpoisonHeapReference(temp1);
3174         // Bail out if the destination is not a non primitive array.
3175         // /* HeapReference<Class> */ temp2 = temp1->component_type_
3176         __ movl(temp2, Address(temp1, component_offset));
3177         __ testl(temp2, temp2);
3178         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3179         __ MaybeUnpoisonHeapReference(temp2);
3180         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
3181         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3182         // Re-poison the heap reference to make the compare instruction below
3183         // compare two poisoned references.
3184         __ PoisonHeapReference(temp1);
3185       }
3186 
3187       // Note: if heap poisoning is on, we are comparing two poisoned references here.
3188       __ cmpl(temp1, Address(src, class_offset));
3189 
3190       if (optimizations.GetDestinationIsTypedObjectArray()) {
3191         NearLabel do_copy;
3192         __ j(kEqual, &do_copy);
3193         __ MaybeUnpoisonHeapReference(temp1);
3194         // /* HeapReference<Class> */ temp1 = temp1->component_type_
3195         __ movl(temp1, Address(temp1, component_offset));
3196         __ MaybeUnpoisonHeapReference(temp1);
3197         __ cmpl(Address(temp1, super_offset), Immediate(0));
3198         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3199         __ Bind(&do_copy);
3200       } else {
3201         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3202       }
3203     }
3204   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3205     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3206     // Bail out if the source is not a non primitive array.
3207     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3208       // /* HeapReference<Class> */ temp1 = src->klass_
3209       codegen_->GenerateFieldLoadWithBakerReadBarrier(
3210           invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
3211       // /* HeapReference<Class> */ temp1 = temp1->component_type_
3212       codegen_->GenerateFieldLoadWithBakerReadBarrier(
3213           invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
3214       __ testl(temp1, temp1);
3215       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3216       // If heap poisoning is enabled, `temp1` has been unpoisoned
3217       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3218     } else {
3219       // /* HeapReference<Class> */ temp1 = src->klass_
3220       __ movl(temp1, Address(src, class_offset));
3221       __ MaybeUnpoisonHeapReference(temp1);
3222       // /* HeapReference<Class> */ temp1 = temp1->component_type_
3223       __ movl(temp1, Address(temp1, component_offset));
3224       __ testl(temp1, temp1);
3225       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3226       __ MaybeUnpoisonHeapReference(temp1);
3227     }
3228     __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
3229     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3230   }
3231 
3232   const Primitive::Type type = Primitive::kPrimNot;
3233   const int32_t element_size = Primitive::ComponentSize(type);
3234 
3235   // Compute the base source address in `temp1`.
3236   GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
3237 
3238   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3239     // If it is needed (in the case of the fast-path loop), the base
3240     // destination address is computed later, as `temp2` is used for
3241     // intermediate computations.
3242 
3243     // Compute the end source address in `temp3`.
3244     if (length.IsStackSlot()) {
3245       // Location `length` is again pointing at a stack slot, as
3246       // register `temp3` (which was containing the length parameter
3247       // earlier) has been overwritten; restore it now
3248       DCHECK(length.Equals(length_arg));
3249       __ movl(temp3, Address(ESP, length.GetStackIndex()));
3250       length = Location::RegisterLocation(temp3);
3251     }
3252     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
3253 
3254     // SystemArrayCopy implementation for Baker read barriers (see
3255     // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
3256     //
3257     //   if (src_ptr != end_ptr) {
3258     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3259     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
3260     //     bool is_gray = (rb_state == ReadBarrier::GrayState());
3261     //     if (is_gray) {
3262     //       // Slow-path copy.
3263     //       for (size_t i = 0; i != length; ++i) {
3264     //         dest_array[dest_pos + i] =
3265     //             MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
3266     //       }
3267     //     } else {
3268     //       // Fast-path copy.
3269     //       do {
3270     //         *dest_ptr++ = *src_ptr++;
3271     //       } while (src_ptr != end_ptr)
3272     //     }
3273     //   }
3274 
3275     NearLabel loop, done;
3276 
3277     // Don't enter copy loop if `length == 0`.
3278     __ cmpl(temp1, temp3);
3279     __ j(kEqual, &done);
3280 
3281     // Given the numeric representation, it's enough to check the low bit of the rb_state.
3282     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
3283     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3284     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
3285     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
3286     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
3287 
3288     // if (rb_state == ReadBarrier::GrayState())
3289     //   goto slow_path;
3290     // At this point, just do the "if" and make sure that flags are preserved until the branch.
3291     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
3292 
3293     // Load fence to prevent load-load reordering.
3294     // Note that this is a no-op, thanks to the x86 memory model.
3295     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3296 
3297     // Slow path used to copy array when `src` is gray.
3298     SlowPathCode* read_barrier_slow_path =
3299         new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
3300     codegen_->AddSlowPath(read_barrier_slow_path);
3301 
3302     // We have done the "if" of the gray bit check above, now branch based on the flags.
3303     __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
3304 
3305     // Fast-path copy.
3306     // Compute the base destination address in `temp2`.
3307     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
3308     // Iterate over the arrays and do a raw copy of the objects. We don't need to
3309     // poison/unpoison.
3310     __ Bind(&loop);
3311     __ pushl(Address(temp1, 0));
3312     __ cfi().AdjustCFAOffset(4);
3313     __ popl(Address(temp2, 0));
3314     __ cfi().AdjustCFAOffset(-4);
3315     __ addl(temp1, Immediate(element_size));
3316     __ addl(temp2, Immediate(element_size));
3317     __ cmpl(temp1, temp3);
3318     __ j(kNotEqual, &loop);
3319 
3320     __ Bind(read_barrier_slow_path->GetExitLabel());
3321     __ Bind(&done);
3322   } else {
3323     // Non read barrier code.
3324     // Compute the base destination address in `temp2`.
3325     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
3326     // Compute the end source address in `temp3`.
3327     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
3328     // Iterate over the arrays and do a raw copy of the objects. We don't need to
3329     // poison/unpoison.
3330     NearLabel loop, done;
3331     __ cmpl(temp1, temp3);
3332     __ j(kEqual, &done);
3333     __ Bind(&loop);
3334     __ pushl(Address(temp1, 0));
3335     __ cfi().AdjustCFAOffset(4);
3336     __ popl(Address(temp2, 0));
3337     __ cfi().AdjustCFAOffset(-4);
3338     __ addl(temp1, Immediate(element_size));
3339     __ addl(temp2, Immediate(element_size));
3340     __ cmpl(temp1, temp3);
3341     __ j(kNotEqual, &loop);
3342     __ Bind(&done);
3343   }
3344 
3345   // We only need one card marking on the destination array.
3346   codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
3347 
3348   __ Bind(intrinsic_slow_path->GetExitLabel());
3349 }
3350 
VisitIntegerValueOf(HInvoke * invoke)3351 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
3352   InvokeRuntimeCallingConvention calling_convention;
3353   IntrinsicVisitor::ComputeIntegerValueOfLocations(
3354       invoke,
3355       codegen_,
3356       Location::RegisterLocation(EAX),
3357       Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3358 }
3359 
VisitIntegerValueOf(HInvoke * invoke)3360 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
3361   IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
3362   LocationSummary* locations = invoke->GetLocations();
3363   X86Assembler* assembler = GetAssembler();
3364 
3365   Register out = locations->Out().AsRegister<Register>();
3366   InvokeRuntimeCallingConvention calling_convention;
3367   if (invoke->InputAt(0)->IsConstant()) {
3368     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3369     if (value >= info.low && value <= info.high) {
3370       // Just embed the j.l.Integer in the code.
3371       ScopedObjectAccess soa(Thread::Current());
3372       mirror::Object* boxed = info.cache->Get(value + (-info.low));
3373       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
3374       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
3375       __ movl(out, Immediate(address));
3376     } else {
3377       // Allocate and initialize a new j.l.Integer.
3378       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3379       // JIT object table.
3380       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3381       __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
3382       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3383       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3384       __ movl(Address(out, info.value_offset), Immediate(value));
3385     }
3386   } else {
3387     Register in = locations->InAt(0).AsRegister<Register>();
3388     // Check bounds of our cache.
3389     __ leal(out, Address(in, -info.low));
3390     __ cmpl(out, Immediate(info.high - info.low + 1));
3391     NearLabel allocate, done;
3392     __ j(kAboveEqual, &allocate);
3393     // If the value is within the bounds, load the j.l.Integer directly from the array.
3394     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3395     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3396     __ movl(out, Address(out, TIMES_4, data_offset + address));
3397     __ MaybeUnpoisonHeapReference(out);
3398     __ jmp(&done);
3399     __ Bind(&allocate);
3400     // Otherwise allocate and initialize a new j.l.Integer.
3401     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3402     __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
3403     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3404     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3405     __ movl(Address(out, info.value_offset), in);
3406     __ Bind(&done);
3407   }
3408 }
3409 
3410 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
3411 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
3412 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
3413 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
3414 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
3415 UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
3416 UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
3417 
3418 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
3419 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
3420 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
3421 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
3422 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
3423 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend);
3424 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
3425 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
3426 
3427 // 1.8.
3428 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
3429 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
3430 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
3431 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
3432 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
3433 
3434 UNREACHABLE_INTRINSICS(X86)
3435 
3436 #undef __
3437 
3438 }  // namespace x86
3439 }  // namespace art
3440