• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86.h"
18 
19 #include <limits>
20 
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "data_type-inl.h"
26 #include "entrypoints/quick/quick_entrypoints.h"
27 #include "heap_poisoning.h"
28 #include "intrinsics.h"
29 #include "intrinsics_utils.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/object_array-inl.h"
33 #include "mirror/reference.h"
34 #include "mirror/string.h"
35 #include "mirror/var_handle.h"
36 #include "scoped_thread_state_change-inl.h"
37 #include "thread-current-inl.h"
38 #include "utils/x86/assembler_x86.h"
39 #include "utils/x86/constants_x86.h"
40 
41 namespace art HIDDEN {
42 
43 namespace x86 {
44 
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)45 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
46   : allocator_(codegen->GetGraph()->GetAllocator()),
47     codegen_(codegen) {
48 }
49 
50 
GetAssembler()51 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
52   return down_cast<X86Assembler*>(codegen_->GetAssembler());
53 }
54 
GetAllocator()55 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
56   return codegen_->GetGraph()->GetAllocator();
57 }
58 
TryDispatch(HInvoke * invoke)59 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
60   Dispatch(invoke);
61   LocationSummary* res = invoke->GetLocations();
62   if (res == nullptr) {
63     return false;
64   }
65   return res->Intrinsified();
66 }
67 
68 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
69 
70 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
71 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
72 
73 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
74 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
75  public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)76   explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
77       : SlowPathCode(instruction) {
78     DCHECK(gUseReadBarrier);
79     DCHECK(kUseBakerReadBarrier);
80   }
81 
EmitNativeCode(CodeGenerator * codegen)82   void EmitNativeCode(CodeGenerator* codegen) override {
83     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
84     LocationSummary* locations = instruction_->GetLocations();
85     DCHECK(locations->CanCall());
86     DCHECK(instruction_->IsInvokeStaticOrDirect())
87         << "Unexpected instruction in read barrier arraycopy slow path: "
88         << instruction_->DebugName();
89     DCHECK(instruction_->GetLocations()->Intrinsified());
90     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
91 
92     int32_t element_size = DataType::Size(DataType::Type::kReference);
93     uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
94 
95     Register src = locations->InAt(0).AsRegister<Register>();
96     Location src_pos = locations->InAt(1);
97     Register dest = locations->InAt(2).AsRegister<Register>();
98     Location dest_pos = locations->InAt(3);
99     Location length = locations->InAt(4);
100     Location temp1_loc = locations->GetTemp(0);
101     Register temp1 = temp1_loc.AsRegister<Register>();
102     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
103     Register temp3 = locations->GetTemp(2).AsRegister<Register>();
104 
105     __ Bind(GetEntryLabel());
106     // In this code path, registers `temp1`, `temp2`, and `temp3`
107     // (resp.) are not used for the base source address, the base
108     // destination address, and the end source address (resp.), as in
109     // other SystemArrayCopy intrinsic code paths.  Instead they are
110     // (resp.) used for:
111     // - the loop index (`i`);
112     // - the source index (`src_index`) and the loaded (source)
113     //   reference (`value`); and
114     // - the destination index (`dest_index`).
115 
116     // i = 0
117     __ xorl(temp1, temp1);
118     NearLabel loop;
119     __ Bind(&loop);
120     // value = src_array[i + src_pos]
121     if (src_pos.IsConstant()) {
122       int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
123       int32_t adjusted_offset = offset + constant * element_size;
124       __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
125     } else {
126       __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
127       __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
128     }
129     __ MaybeUnpoisonHeapReference(temp2);
130     // TODO: Inline the mark bit check before calling the runtime?
131     // value = ReadBarrier::Mark(value)
132     // No need to save live registers; it's taken care of by the
133     // entrypoint. Also, there is no need to update the stack mask,
134     // as this runtime call will not trigger a garbage collection.
135     // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
136     // explanations.)
137     DCHECK_NE(temp2, ESP);
138     DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
139     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
140     // This runtime call does not require a stack map.
141     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
142     __ MaybePoisonHeapReference(temp2);
143     // dest_array[i + dest_pos] = value
144     if (dest_pos.IsConstant()) {
145       int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
146       int32_t adjusted_offset = offset + constant * element_size;
147       __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
148     } else {
149       __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
150       __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
151     }
152     // ++i
153     __ addl(temp1, Immediate(1));
154     // if (i != length) goto loop
155     x86_codegen->GenerateIntCompare(temp1_loc, length);
156     __ j(kNotEqual, &loop);
157     __ jmp(GetExitLabel());
158   }
159 
GetDescription() const160   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
161 
162  private:
163   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
164 };
165 
166 #undef __
167 
168 #define __ assembler->
169 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)170 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
171   LocationSummary* locations =
172       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
173   locations->SetInAt(0, Location::RequiresFpuRegister());
174   locations->SetOut(Location::RequiresRegister());
175   if (is64bit) {
176     locations->AddTemp(Location::RequiresFpuRegister());
177   }
178 }
179 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)180 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
181   LocationSummary* locations =
182       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
183   locations->SetInAt(0, Location::RequiresRegister());
184   locations->SetOut(Location::RequiresFpuRegister());
185   if (is64bit) {
186     locations->AddTemp(Location::RequiresFpuRegister());
187     locations->AddTemp(Location::RequiresFpuRegister());
188   }
189 }
190 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)191 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
192   Location input = locations->InAt(0);
193   Location output = locations->Out();
194   if (is64bit) {
195     // Need to use the temporary.
196     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
197     __ movsd(temp, input.AsFpuRegister<XmmRegister>());
198     __ movd(output.AsRegisterPairLow<Register>(), temp);
199     __ psrlq(temp, Immediate(32));
200     __ movd(output.AsRegisterPairHigh<Register>(), temp);
201   } else {
202     __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
203   }
204 }
205 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)206 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
207   Location input = locations->InAt(0);
208   Location output = locations->Out();
209   if (is64bit) {
210     // Need to use the temporary.
211     XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
212     XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
213     __ movd(temp1, input.AsRegisterPairLow<Register>());
214     __ movd(temp2, input.AsRegisterPairHigh<Register>());
215     __ punpckldq(temp1, temp2);
216     __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
217   } else {
218     __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
219   }
220 }
221 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)222 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
223   CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
224 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)225 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
226   CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
227 }
228 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)229 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
230   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
231 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)232 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
233   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
234 }
235 
VisitFloatFloatToRawIntBits(HInvoke * invoke)236 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
237   CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
238 }
VisitFloatIntBitsToFloat(HInvoke * invoke)239 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
240   CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
241 }
242 
VisitFloatFloatToRawIntBits(HInvoke * invoke)243 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
244   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
245 }
VisitFloatIntBitsToFloat(HInvoke * invoke)246 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
247   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
248 }
249 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)250 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
251   LocationSummary* locations =
252       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
253   locations->SetInAt(0, Location::RequiresRegister());
254   locations->SetOut(Location::SameAsFirstInput());
255 }
256 
CreateLongToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)257 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
258   LocationSummary* locations =
259       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
260   locations->SetInAt(0, Location::RequiresRegister());
261   locations->SetOut(Location::RequiresRegister());
262 }
263 
CreateLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)264 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
265   LocationSummary* locations =
266       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
267   locations->SetInAt(0, Location::RequiresRegister());
268   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
269 }
270 
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)271 static void GenReverseBytes(LocationSummary* locations,
272                             DataType::Type size,
273                             X86Assembler* assembler) {
274   Register out = locations->Out().AsRegister<Register>();
275 
276   switch (size) {
277     case DataType::Type::kInt16:
278       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
279       __ bswapl(out);
280       __ sarl(out, Immediate(16));
281       break;
282     case DataType::Type::kInt32:
283       __ bswapl(out);
284       break;
285     default:
286       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
287       UNREACHABLE();
288   }
289 }
290 
VisitIntegerReverseBytes(HInvoke * invoke)291 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
292   CreateIntToIntLocations(allocator_, invoke);
293 }
294 
VisitIntegerReverseBytes(HInvoke * invoke)295 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
296   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
297 }
298 
VisitLongReverseBytes(HInvoke * invoke)299 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
300   CreateLongToLongLocations(allocator_, invoke);
301 }
302 
VisitLongReverseBytes(HInvoke * invoke)303 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
304   LocationSummary* locations = invoke->GetLocations();
305   Location input = locations->InAt(0);
306   Register input_lo = input.AsRegisterPairLow<Register>();
307   Register input_hi = input.AsRegisterPairHigh<Register>();
308   Location output = locations->Out();
309   Register output_lo = output.AsRegisterPairLow<Register>();
310   Register output_hi = output.AsRegisterPairHigh<Register>();
311 
312   X86Assembler* assembler = GetAssembler();
313   // Assign the inputs to the outputs, mixing low/high.
314   __ movl(output_lo, input_hi);
315   __ movl(output_hi, input_lo);
316   __ bswapl(output_lo);
317   __ bswapl(output_hi);
318 }
319 
VisitShortReverseBytes(HInvoke * invoke)320 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
321   CreateIntToIntLocations(allocator_, invoke);
322 }
323 
VisitShortReverseBytes(HInvoke * invoke)324 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
325   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
326 }
327 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)328 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
329   LocationSummary* locations =
330       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
331   locations->SetInAt(0, Location::RequiresFpuRegister());
332   locations->SetOut(Location::RequiresFpuRegister());
333 }
334 
VisitMathSqrt(HInvoke * invoke)335 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
336   CreateFPToFPLocations(allocator_, invoke);
337 }
338 
VisitMathSqrt(HInvoke * invoke)339 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
340   LocationSummary* locations = invoke->GetLocations();
341   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
342   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
343 
344   GetAssembler()->sqrtsd(out, in);
345 }
346 
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen)347 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
348                                        HInvoke* invoke,
349                                        CodeGeneratorX86* codegen) {
350   // Do we have instruction support?
351   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
352     return;
353   }
354 
355   CreateFPToFPLocations(allocator, invoke);
356 }
357 
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86Assembler * assembler,int round_mode)358 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86Assembler* assembler, int round_mode) {
359   LocationSummary* locations = invoke->GetLocations();
360   DCHECK(!locations->WillCall());
361   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
362   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
363   __ roundsd(out, in, Immediate(round_mode));
364 }
365 
VisitMathCeil(HInvoke * invoke)366 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
367   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
368 }
369 
VisitMathCeil(HInvoke * invoke)370 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
371   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
372 }
373 
VisitMathFloor(HInvoke * invoke)374 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
375   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
376 }
377 
VisitMathFloor(HInvoke * invoke)378 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
379   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
380 }
381 
VisitMathRint(HInvoke * invoke)382 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
383   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
384 }
385 
VisitMathRint(HInvoke * invoke)386 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
387   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
388 }
389 
VisitMathRoundFloat(HInvoke * invoke)390 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
391   // Do we have instruction support?
392   if (!codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
393     return;
394   }
395 
396   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
397   DCHECK(static_or_direct != nullptr);
398   LocationSummary* locations =
399       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
400   locations->SetInAt(0, Location::RequiresFpuRegister());
401   if (static_or_direct->HasSpecialInput() &&
402       invoke->InputAt(
403           static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
404     locations->SetInAt(1, Location::RequiresRegister());
405   }
406   locations->SetOut(Location::RequiresRegister());
407   locations->AddTemp(Location::RequiresFpuRegister());
408   locations->AddTemp(Location::RequiresFpuRegister());
409 }
410 
VisitMathRoundFloat(HInvoke * invoke)411 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
412   LocationSummary* locations = invoke->GetLocations();
413   DCHECK(!locations->WillCall());
414 
415   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
416   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
417   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
418   Register out = locations->Out().AsRegister<Register>();
419   NearLabel skip_incr, done;
420   X86Assembler* assembler = GetAssembler();
421 
422   // Since no direct x86 rounding instruction matches the required semantics,
423   // this intrinsic is implemented as follows:
424   //  result = floor(in);
425   //  if (in - result >= 0.5f)
426   //    result = result + 1.0f;
427   __ movss(t2, in);
428   __ roundss(t1, in, Immediate(1));
429   __ subss(t2, t1);
430   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
431     // Direct constant area available.
432     HX86ComputeBaseMethodAddress* method_address =
433         invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
434     Register constant_area = locations->InAt(1).AsRegister<Register>();
435     __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
436                                                 method_address,
437                                                 constant_area));
438     __ j(kBelow, &skip_incr);
439     __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
440                                                method_address,
441                                                constant_area));
442     __ Bind(&skip_incr);
443   } else {
444     // No constant area: go through stack.
445     __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
446     __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
447     __ comiss(t2, Address(ESP, 4));
448     __ j(kBelow, &skip_incr);
449     __ addss(t1, Address(ESP, 0));
450     __ Bind(&skip_incr);
451     __ addl(ESP, Immediate(8));
452   }
453 
454   // Final conversion to an integer. Unfortunately this also does not have a
455   // direct x86 instruction, since NaN should map to 0 and large positive
456   // values need to be clipped to the extreme value.
457   __ movl(out, Immediate(kPrimIntMax));
458   __ cvtsi2ss(t2, out);
459   __ comiss(t1, t2);
460   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
461   __ movl(out, Immediate(0));  // does not change flags
462   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
463   __ cvttss2si(out, t1);
464   __ Bind(&done);
465 }
466 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)467 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
468   LocationSummary* locations =
469       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
470   InvokeRuntimeCallingConvention calling_convention;
471   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
472   locations->SetOut(Location::FpuRegisterLocation(XMM0));
473 }
474 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)475 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
476   LocationSummary* locations = invoke->GetLocations();
477   DCHECK(locations->WillCall());
478   DCHECK(invoke->IsInvokeStaticOrDirect());
479   X86Assembler* assembler = codegen->GetAssembler();
480 
481   // We need some place to pass the parameters.
482   __ subl(ESP, Immediate(16));
483   __ cfi().AdjustCFAOffset(16);
484 
485   // Pass the parameters at the bottom of the stack.
486   __ movsd(Address(ESP, 0), XMM0);
487 
488   // If we have a second parameter, pass it next.
489   if (invoke->GetNumberOfArguments() == 2) {
490     __ movsd(Address(ESP, 8), XMM1);
491   }
492 
493   // Now do the actual call.
494   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
495 
496   // Extract the return value from the FP stack.
497   __ fstpl(Address(ESP, 0));
498   __ movsd(XMM0, Address(ESP, 0));
499 
500   // And clean up the stack.
501   __ addl(ESP, Immediate(16));
502   __ cfi().AdjustCFAOffset(-16);
503 }
504 
CreateLowestOneBitLocations(ArenaAllocator * allocator,bool is_long,HInvoke * invoke)505 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
506   LocationSummary* locations =
507       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
508   if (is_long) {
509     locations->SetInAt(0, Location::RequiresRegister());
510   } else {
511     locations->SetInAt(0, Location::Any());
512   }
513   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
514 }
515 
GenLowestOneBit(X86Assembler * assembler,CodeGeneratorX86 * codegen,bool is_long,HInvoke * invoke)516 static void GenLowestOneBit(X86Assembler* assembler,
517                       CodeGeneratorX86* codegen,
518                       bool is_long,
519                       HInvoke* invoke) {
520   LocationSummary* locations = invoke->GetLocations();
521   Location src = locations->InAt(0);
522   Location out_loc = locations->Out();
523 
524   if (invoke->InputAt(0)->IsConstant()) {
525     // Evaluate this at compile time.
526     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
527     if (value == 0) {
528       if (is_long) {
529         __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
530         __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
531       } else {
532         __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
533       }
534       return;
535     }
536     // Nonzero value.
537     value = is_long ? CTZ(static_cast<uint64_t>(value))
538                     : CTZ(static_cast<uint32_t>(value));
539     if (is_long) {
540       if (value >= 32) {
541         int shift = value-32;
542         codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
543         codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
544       } else {
545         codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
546         codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
547       }
548     } else {
549       codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
550     }
551     return;
552   }
553   // Handle non constant case
554   if (is_long) {
555     DCHECK(src.IsRegisterPair());
556     Register src_lo = src.AsRegisterPairLow<Register>();
557     Register src_hi = src.AsRegisterPairHigh<Register>();
558 
559     Register out_lo = out_loc.AsRegisterPairLow<Register>();
560     Register out_hi = out_loc.AsRegisterPairHigh<Register>();
561 
562     __ movl(out_lo, src_lo);
563     __ movl(out_hi, src_hi);
564 
565     __ negl(out_lo);
566     __ adcl(out_hi, Immediate(0));
567     __ negl(out_hi);
568 
569     __ andl(out_lo, src_lo);
570     __ andl(out_hi, src_hi);
571   } else {
572     if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
573       Register out = out_loc.AsRegister<Register>();
574       __ blsi(out, src.AsRegister<Register>());
575     } else {
576       Register out = out_loc.AsRegister<Register>();
577       // Do tmp & -tmp
578       if (src.IsRegister()) {
579         __ movl(out, src.AsRegister<Register>());
580       } else {
581         DCHECK(src.IsStackSlot());
582         __ movl(out, Address(ESP, src.GetStackIndex()));
583       }
584       __ negl(out);
585 
586       if (src.IsRegister()) {
587         __ andl(out, src.AsRegister<Register>());
588       } else {
589         __ andl(out, Address(ESP, src.GetStackIndex()));
590       }
591     }
592   }
593 }
594 
VisitMathCos(HInvoke * invoke)595 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
596   CreateFPToFPCallLocations(allocator_, invoke);
597 }
598 
VisitMathCos(HInvoke * invoke)599 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
600   GenFPToFPCall(invoke, codegen_, kQuickCos);
601 }
602 
VisitMathSin(HInvoke * invoke)603 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
604   CreateFPToFPCallLocations(allocator_, invoke);
605 }
606 
VisitMathSin(HInvoke * invoke)607 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
608   GenFPToFPCall(invoke, codegen_, kQuickSin);
609 }
610 
VisitMathAcos(HInvoke * invoke)611 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
612   CreateFPToFPCallLocations(allocator_, invoke);
613 }
614 
VisitMathAcos(HInvoke * invoke)615 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
616   GenFPToFPCall(invoke, codegen_, kQuickAcos);
617 }
618 
VisitMathAsin(HInvoke * invoke)619 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
620   CreateFPToFPCallLocations(allocator_, invoke);
621 }
622 
VisitMathAsin(HInvoke * invoke)623 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
624   GenFPToFPCall(invoke, codegen_, kQuickAsin);
625 }
626 
VisitMathAtan(HInvoke * invoke)627 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
628   CreateFPToFPCallLocations(allocator_, invoke);
629 }
630 
VisitMathAtan(HInvoke * invoke)631 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
632   GenFPToFPCall(invoke, codegen_, kQuickAtan);
633 }
634 
VisitMathCbrt(HInvoke * invoke)635 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
636   CreateFPToFPCallLocations(allocator_, invoke);
637 }
638 
VisitMathCbrt(HInvoke * invoke)639 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
640   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
641 }
642 
VisitMathCosh(HInvoke * invoke)643 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
644   CreateFPToFPCallLocations(allocator_, invoke);
645 }
646 
VisitMathCosh(HInvoke * invoke)647 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
648   GenFPToFPCall(invoke, codegen_, kQuickCosh);
649 }
650 
VisitMathExp(HInvoke * invoke)651 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
652   CreateFPToFPCallLocations(allocator_, invoke);
653 }
654 
VisitMathExp(HInvoke * invoke)655 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
656   GenFPToFPCall(invoke, codegen_, kQuickExp);
657 }
658 
VisitMathExpm1(HInvoke * invoke)659 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
660   CreateFPToFPCallLocations(allocator_, invoke);
661 }
662 
VisitMathExpm1(HInvoke * invoke)663 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
664   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
665 }
666 
VisitMathLog(HInvoke * invoke)667 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
668   CreateFPToFPCallLocations(allocator_, invoke);
669 }
670 
VisitMathLog(HInvoke * invoke)671 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
672   GenFPToFPCall(invoke, codegen_, kQuickLog);
673 }
674 
VisitMathLog10(HInvoke * invoke)675 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
676   CreateFPToFPCallLocations(allocator_, invoke);
677 }
678 
VisitMathLog10(HInvoke * invoke)679 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
680   GenFPToFPCall(invoke, codegen_, kQuickLog10);
681 }
682 
VisitMathSinh(HInvoke * invoke)683 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
684   CreateFPToFPCallLocations(allocator_, invoke);
685 }
686 
VisitMathSinh(HInvoke * invoke)687 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
688   GenFPToFPCall(invoke, codegen_, kQuickSinh);
689 }
690 
VisitMathTan(HInvoke * invoke)691 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
692   CreateFPToFPCallLocations(allocator_, invoke);
693 }
694 
VisitMathTan(HInvoke * invoke)695 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
696   GenFPToFPCall(invoke, codegen_, kQuickTan);
697 }
698 
VisitMathTanh(HInvoke * invoke)699 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
700   CreateFPToFPCallLocations(allocator_, invoke);
701 }
702 
VisitMathTanh(HInvoke * invoke)703 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
704   GenFPToFPCall(invoke, codegen_, kQuickTanh);
705 }
706 
VisitIntegerLowestOneBit(HInvoke * invoke)707 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
708   CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
709 }
VisitIntegerLowestOneBit(HInvoke * invoke)710 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
711   GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
712 }
713 
VisitLongLowestOneBit(HInvoke * invoke)714 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
715   CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
716 }
717 
VisitLongLowestOneBit(HInvoke * invoke)718 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
719   GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
720 }
721 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)722 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
723   LocationSummary* locations =
724       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
725   InvokeRuntimeCallingConvention calling_convention;
726   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
727   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
728   locations->SetOut(Location::FpuRegisterLocation(XMM0));
729 }
730 
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)731 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
732   DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
733   LocationSummary* locations =
734       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
735   InvokeRuntimeCallingConvention calling_convention;
736   locations->SetInAt(0, Location::RequiresFpuRegister());
737   locations->SetInAt(1, Location::RequiresFpuRegister());
738   locations->SetInAt(2, Location::RequiresFpuRegister());
739   locations->SetOut(Location::SameAsFirstInput());
740 }
741 
VisitMathAtan2(HInvoke * invoke)742 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
743   CreateFPFPToFPCallLocations(allocator_, invoke);
744 }
745 
VisitMathAtan2(HInvoke * invoke)746 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
747   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
748 }
749 
VisitMathPow(HInvoke * invoke)750 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
751   CreateFPFPToFPCallLocations(allocator_, invoke);
752 }
753 
VisitMathPow(HInvoke * invoke)754 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
755   GenFPToFPCall(invoke, codegen_, kQuickPow);
756 }
757 
VisitMathHypot(HInvoke * invoke)758 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
759   CreateFPFPToFPCallLocations(allocator_, invoke);
760 }
761 
VisitMathHypot(HInvoke * invoke)762 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
763   GenFPToFPCall(invoke, codegen_, kQuickHypot);
764 }
765 
VisitMathNextAfter(HInvoke * invoke)766 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
767   CreateFPFPToFPCallLocations(allocator_, invoke);
768 }
769 
VisitMathNextAfter(HInvoke * invoke)770 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
771   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
772 }
773 
CreateSystemArrayCopyLocations(HInvoke * invoke)774 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
775   // We need at least two of the positions or length to be an integer constant,
776   // or else we won't have enough free registers.
777   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
778   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
779   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
780 
781   int num_constants =
782       ((src_pos != nullptr) ? 1 : 0)
783       + ((dest_pos != nullptr) ? 1 : 0)
784       + ((length != nullptr) ? 1 : 0);
785 
786   if (num_constants < 2) {
787     // Not enough free registers.
788     return;
789   }
790 
791   // As long as we are checking, we might as well check to see if the src and dest
792   // positions are >= 0.
793   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
794       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
795     // We will have to fail anyways.
796     return;
797   }
798 
799   // And since we are already checking, check the length too.
800   if (length != nullptr) {
801     int32_t len = length->GetValue();
802     if (len < 0) {
803       // Just call as normal.
804       return;
805     }
806   }
807 
808   // Okay, it is safe to generate inline code.
809   LocationSummary* locations =
810       new (invoke->GetBlock()->GetGraph()->GetAllocator())
811       LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
812   // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
813   locations->SetInAt(0, Location::RequiresRegister());
814   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
815   locations->SetInAt(2, Location::RequiresRegister());
816   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
817   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
818 
819   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
820   locations->AddTemp(Location::RegisterLocation(ESI));
821   locations->AddTemp(Location::RegisterLocation(EDI));
822   locations->AddTemp(Location::RegisterLocation(ECX));
823 }
824 
CheckPosition(X86Assembler * assembler,Location pos,Register input,Location length,SlowPathCode * slow_path,Register temp,bool length_is_input_length=false)825 static void CheckPosition(X86Assembler* assembler,
826                           Location pos,
827                           Register input,
828                           Location length,
829                           SlowPathCode* slow_path,
830                           Register temp,
831                           bool length_is_input_length = false) {
832   // Where is the length in the Array?
833   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
834 
835   if (pos.IsConstant()) {
836     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
837     if (pos_const == 0) {
838       if (!length_is_input_length) {
839         // Check that length(input) >= length.
840         if (length.IsConstant()) {
841           __ cmpl(Address(input, length_offset),
842                   Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
843         } else {
844           __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
845         }
846         __ j(kLess, slow_path->GetEntryLabel());
847       }
848     } else {
849       // Check that length(input) >= pos.
850       __ movl(temp, Address(input, length_offset));
851       __ subl(temp, Immediate(pos_const));
852       __ j(kLess, slow_path->GetEntryLabel());
853 
854       // Check that (length(input) - pos) >= length.
855       if (length.IsConstant()) {
856         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
857       } else {
858         __ cmpl(temp, length.AsRegister<Register>());
859       }
860       __ j(kLess, slow_path->GetEntryLabel());
861     }
862   } else if (length_is_input_length) {
863     // The only way the copy can succeed is if pos is zero.
864     Register pos_reg = pos.AsRegister<Register>();
865     __ testl(pos_reg, pos_reg);
866     __ j(kNotEqual, slow_path->GetEntryLabel());
867   } else {
868     // Check that pos >= 0.
869     Register pos_reg = pos.AsRegister<Register>();
870     __ testl(pos_reg, pos_reg);
871     __ j(kLess, slow_path->GetEntryLabel());
872 
873     // Check that pos <= length(input).
874     __ cmpl(Address(input, length_offset), pos_reg);
875     __ j(kLess, slow_path->GetEntryLabel());
876 
877     // Check that (length(input) - pos) >= length.
878     __ movl(temp, Address(input, length_offset));
879     __ subl(temp, pos_reg);
880     if (length.IsConstant()) {
881       __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
882     } else {
883       __ cmpl(temp, length.AsRegister<Register>());
884     }
885     __ j(kLess, slow_path->GetEntryLabel());
886   }
887 }
888 
SystemArrayCopyPrimitive(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,DataType::Type type)889 static void SystemArrayCopyPrimitive(HInvoke* invoke,
890                                      X86Assembler* assembler,
891                                      CodeGeneratorX86* codegen,
892                                      DataType::Type type) {
893   LocationSummary* locations = invoke->GetLocations();
894   Register src = locations->InAt(0).AsRegister<Register>();
895   Location src_pos = locations->InAt(1);
896   Register dest = locations->InAt(2).AsRegister<Register>();
897   Location dest_pos = locations->InAt(3);
898   Location length = locations->InAt(4);
899 
900   // Temporaries that we need for MOVSB/W/L.
901   Register src_base = locations->GetTemp(0).AsRegister<Register>();
902   DCHECK_EQ(src_base, ESI);
903   Register dest_base = locations->GetTemp(1).AsRegister<Register>();
904   DCHECK_EQ(dest_base, EDI);
905   Register count = locations->GetTemp(2).AsRegister<Register>();
906   DCHECK_EQ(count, ECX);
907 
908   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
909   codegen->AddSlowPath(slow_path);
910 
911   // Bail out if the source and destination are the same (to handle overlap).
912   __ cmpl(src, dest);
913   __ j(kEqual, slow_path->GetEntryLabel());
914 
915   // Bail out if the source is null.
916   __ testl(src, src);
917   __ j(kEqual, slow_path->GetEntryLabel());
918 
919   // Bail out if the destination is null.
920   __ testl(dest, dest);
921   __ j(kEqual, slow_path->GetEntryLabel());
922 
923   // If the length is negative, bail out.
924   // We have already checked in the LocationsBuilder for the constant case.
925   if (!length.IsConstant()) {
926     __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
927     __ j(kLess, slow_path->GetEntryLabel());
928   }
929 
930   // We need the count in ECX.
931   if (length.IsConstant()) {
932     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
933   } else {
934     __ movl(count, length.AsRegister<Register>());
935   }
936 
937   // Validity checks: source. Use src_base as a temporary register.
938   CheckPosition(assembler, src_pos, src, Location::RegisterLocation(count), slow_path, src_base);
939 
940   // Validity checks: dest. Use src_base as a temporary register.
941   CheckPosition(assembler, dest_pos, dest, Location::RegisterLocation(count), slow_path, src_base);
942 
943   // Okay, everything checks out.  Finally time to do the copy.
944   // Check assumption that sizeof(Char) is 2 (used in scaling below).
945   const size_t data_size = DataType::Size(type);
946   const ScaleFactor scale_factor = CodeGenerator::ScaleFactorForType(type);
947   const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
948 
949   if (src_pos.IsConstant()) {
950     int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
951     __ leal(src_base, Address(src, data_size * src_pos_const + data_offset));
952   } else {
953     __ leal(src_base, Address(src, src_pos.AsRegister<Register>(), scale_factor, data_offset));
954   }
955   if (dest_pos.IsConstant()) {
956     int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
957     __ leal(dest_base, Address(dest, data_size * dest_pos_const + data_offset));
958   } else {
959     __ leal(dest_base, Address(dest, dest_pos.AsRegister<Register>(), scale_factor, data_offset));
960   }
961 
962   // Do the move.
963   switch (type) {
964     case DataType::Type::kInt8:
965        __ rep_movsb();
966        break;
967     case DataType::Type::kUint16:
968        __ rep_movsw();
969        break;
970     case DataType::Type::kInt32:
971        __ rep_movsl();
972        break;
973     default:
974        LOG(FATAL) << "Unexpected data type for intrinsic";
975   }
976   __ Bind(slow_path->GetExitLabel());
977 }
978 
VisitSystemArrayCopyChar(HInvoke * invoke)979 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
980   CreateSystemArrayCopyLocations(invoke);
981 }
982 
VisitSystemArrayCopyChar(HInvoke * invoke)983 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
984   X86Assembler* assembler = GetAssembler();
985   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
986 }
987 
VisitSystemArrayCopyByte(HInvoke * invoke)988 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyByte(HInvoke* invoke) {
989   X86Assembler* assembler = GetAssembler();
990   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
991 }
992 
VisitSystemArrayCopyByte(HInvoke * invoke)993 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyByte(HInvoke* invoke) {
994   CreateSystemArrayCopyLocations(invoke);
995 }
996 
VisitSystemArrayCopyInt(HInvoke * invoke)997 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyInt(HInvoke* invoke) {
998   X86Assembler* assembler = GetAssembler();
999   SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
1000 }
1001 
VisitSystemArrayCopyInt(HInvoke * invoke)1002 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyInt(HInvoke* invoke) {
1003   CreateSystemArrayCopyLocations(invoke);
1004 }
1005 
VisitStringCompareTo(HInvoke * invoke)1006 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
1007   // The inputs plus one temp.
1008   LocationSummary* locations = new (allocator_) LocationSummary(
1009       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1010   InvokeRuntimeCallingConvention calling_convention;
1011   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1012   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1013   locations->SetOut(Location::RegisterLocation(EAX));
1014 }
1015 
VisitStringCompareTo(HInvoke * invoke)1016 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1017   X86Assembler* assembler = GetAssembler();
1018   LocationSummary* locations = invoke->GetLocations();
1019 
1020   // Note that the null check must have been done earlier.
1021   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1022 
1023   Register argument = locations->InAt(1).AsRegister<Register>();
1024   __ testl(argument, argument);
1025   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1026   codegen_->AddSlowPath(slow_path);
1027   __ j(kEqual, slow_path->GetEntryLabel());
1028 
1029   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1030   __ Bind(slow_path->GetExitLabel());
1031 }
1032 
VisitStringEquals(HInvoke * invoke)1033 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1034   LocationSummary* locations =
1035       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1036   locations->SetInAt(0, Location::RequiresRegister());
1037   locations->SetInAt(1, Location::RequiresRegister());
1038 
1039   // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1040   locations->AddTemp(Location::RegisterLocation(ECX));
1041   locations->AddTemp(Location::RegisterLocation(EDI));
1042 
1043   // Set output, ESI needed for repe_cmpsl instruction anyways.
1044   locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1045 }
1046 
VisitStringEquals(HInvoke * invoke)1047 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1048   X86Assembler* assembler = GetAssembler();
1049   LocationSummary* locations = invoke->GetLocations();
1050 
1051   Register str = locations->InAt(0).AsRegister<Register>();
1052   Register arg = locations->InAt(1).AsRegister<Register>();
1053   Register ecx = locations->GetTemp(0).AsRegister<Register>();
1054   Register edi = locations->GetTemp(1).AsRegister<Register>();
1055   Register esi = locations->Out().AsRegister<Register>();
1056 
1057   NearLabel end, return_true, return_false;
1058 
1059   // Get offsets of count, value, and class fields within a string object.
1060   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1061   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1062   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1063 
1064   // Note that the null check must have been done earlier.
1065   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1066 
1067   StringEqualsOptimizations optimizations(invoke);
1068   if (!optimizations.GetArgumentNotNull()) {
1069     // Check if input is null, return false if it is.
1070     __ testl(arg, arg);
1071     __ j(kEqual, &return_false);
1072   }
1073 
1074   if (!optimizations.GetArgumentIsString()) {
1075     // Instanceof check for the argument by comparing class fields.
1076     // All string objects must have the same type since String cannot be subclassed.
1077     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1078     // If the argument is a string object, its class field must be equal to receiver's class field.
1079     //
1080     // As the String class is expected to be non-movable, we can read the class
1081     // field from String.equals' arguments without read barriers.
1082     AssertNonMovableStringClass();
1083     // Also, because we use the loaded class references only to compare them, we
1084     // don't need to unpoison them.
1085     // /* HeapReference<Class> */ ecx = str->klass_
1086     __ movl(ecx, Address(str, class_offset));
1087     // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
1088     __ cmpl(ecx, Address(arg, class_offset));
1089     __ j(kNotEqual, &return_false);
1090   }
1091 
1092   // Reference equality check, return true if same reference.
1093   __ cmpl(str, arg);
1094   __ j(kEqual, &return_true);
1095 
1096   // Load length and compression flag of receiver string.
1097   __ movl(ecx, Address(str, count_offset));
1098   // Check if lengths and compression flags are equal, return false if they're not.
1099   // Two identical strings will always have same compression style since
1100   // compression style is decided on alloc.
1101   __ cmpl(ecx, Address(arg, count_offset));
1102   __ j(kNotEqual, &return_false);
1103   // Return true if strings are empty. Even with string compression `count == 0` means empty.
1104   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1105                 "Expecting 0=compressed, 1=uncompressed");
1106   __ jecxz(&return_true);
1107 
1108   if (mirror::kUseStringCompression) {
1109     NearLabel string_uncompressed;
1110     // Extract length and differentiate between both compressed or both uncompressed.
1111     // Different compression style is cut above.
1112     __ shrl(ecx, Immediate(1));
1113     __ j(kCarrySet, &string_uncompressed);
1114     // Divide string length by 2, rounding up, and continue as if uncompressed.
1115     __ addl(ecx, Immediate(1));
1116     __ shrl(ecx, Immediate(1));
1117     __ Bind(&string_uncompressed);
1118   }
1119   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1120   __ leal(esi, Address(str, value_offset));
1121   __ leal(edi, Address(arg, value_offset));
1122 
1123   // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1124   // divisible by 2.
1125   __ addl(ecx, Immediate(1));
1126   __ shrl(ecx, Immediate(1));
1127 
1128   // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1129   // or 4 characters (compressed) at a time.
1130   DCHECK_ALIGNED(value_offset, 4);
1131   static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1132 
1133   // Loop to compare strings two characters at a time starting at the beginning of the string.
1134   __ repe_cmpsl();
1135   // If strings are not equal, zero flag will be cleared.
1136   __ j(kNotEqual, &return_false);
1137 
1138   // Return true and exit the function.
1139   // If loop does not result in returning false, we return true.
1140   __ Bind(&return_true);
1141   __ movl(esi, Immediate(1));
1142   __ jmp(&end);
1143 
1144   // Return false and exit the function.
1145   __ Bind(&return_false);
1146   __ xorl(esi, esi);
1147   __ Bind(&end);
1148 }
1149 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1150 static void CreateStringIndexOfLocations(HInvoke* invoke,
1151                                          ArenaAllocator* allocator,
1152                                          bool start_at_zero) {
1153   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1154                                                                LocationSummary::kCallOnSlowPath,
1155                                                                kIntrinsified);
1156   // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1157   locations->SetInAt(0, Location::RegisterLocation(EDI));
1158   // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1159   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1160   // of the instruction explicitly.
1161   // Note: This works as we don't clobber EAX anywhere.
1162   locations->SetInAt(1, Location::RegisterLocation(EAX));
1163   if (!start_at_zero) {
1164     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1165   }
1166   // As we clobber EDI during execution anyways, also use it as the output.
1167   locations->SetOut(Location::SameAsFirstInput());
1168 
1169   // repne scasw uses ECX as the counter.
1170   locations->AddTemp(Location::RegisterLocation(ECX));
1171   // Need another temporary to be able to compute the result.
1172   locations->AddTemp(Location::RequiresRegister());
1173   if (mirror::kUseStringCompression) {
1174     // Need another temporary to be able to save unflagged string length.
1175     locations->AddTemp(Location::RequiresRegister());
1176   }
1177 }
1178 
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,bool start_at_zero)1179 static void GenerateStringIndexOf(HInvoke* invoke,
1180                                   X86Assembler* assembler,
1181                                   CodeGeneratorX86* codegen,
1182                                   bool start_at_zero) {
1183   LocationSummary* locations = invoke->GetLocations();
1184 
1185   // Note that the null check must have been done earlier.
1186   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1187 
1188   Register string_obj = locations->InAt(0).AsRegister<Register>();
1189   Register search_value = locations->InAt(1).AsRegister<Register>();
1190   Register counter = locations->GetTemp(0).AsRegister<Register>();
1191   Register string_length = locations->GetTemp(1).AsRegister<Register>();
1192   Register out = locations->Out().AsRegister<Register>();
1193   // Only used when string compression feature is on.
1194   Register string_length_flagged;
1195 
1196   // Check our assumptions for registers.
1197   DCHECK_EQ(string_obj, EDI);
1198   DCHECK_EQ(search_value, EAX);
1199   DCHECK_EQ(counter, ECX);
1200   DCHECK_EQ(out, EDI);
1201 
1202   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1203   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1204   SlowPathCode* slow_path = nullptr;
1205   HInstruction* code_point = invoke->InputAt(1);
1206   if (code_point->IsIntConstant()) {
1207     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1208     std::numeric_limits<uint16_t>::max()) {
1209       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1210       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1211       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1212       codegen->AddSlowPath(slow_path);
1213       __ jmp(slow_path->GetEntryLabel());
1214       __ Bind(slow_path->GetExitLabel());
1215       return;
1216     }
1217   } else if (code_point->GetType() != DataType::Type::kUint16) {
1218     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1219     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1220     codegen->AddSlowPath(slow_path);
1221     __ j(kAbove, slow_path->GetEntryLabel());
1222   }
1223 
1224   // From here down, we know that we are looking for a char that fits in 16 bits.
1225   // Location of reference to data array within the String object.
1226   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1227   // Location of count within the String object.
1228   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1229 
1230   // Load the count field of the string containing the length and compression flag.
1231   __ movl(string_length, Address(string_obj, count_offset));
1232 
1233   // Do a zero-length check. Even with string compression `count == 0` means empty.
1234   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1235                 "Expecting 0=compressed, 1=uncompressed");
1236   // TODO: Support jecxz.
1237   NearLabel not_found_label;
1238   __ testl(string_length, string_length);
1239   __ j(kEqual, &not_found_label);
1240 
1241   if (mirror::kUseStringCompression) {
1242     string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1243     __ movl(string_length_flagged, string_length);
1244     // Extract the length and shift out the least significant bit used as compression flag.
1245     __ shrl(string_length, Immediate(1));
1246   }
1247 
1248   if (start_at_zero) {
1249     // Number of chars to scan is the same as the string length.
1250     __ movl(counter, string_length);
1251 
1252     // Move to the start of the string.
1253     __ addl(string_obj, Immediate(value_offset));
1254   } else {
1255     Register start_index = locations->InAt(2).AsRegister<Register>();
1256 
1257     // Do a start_index check.
1258     __ cmpl(start_index, string_length);
1259     __ j(kGreaterEqual, &not_found_label);
1260 
1261     // Ensure we have a start index >= 0;
1262     __ xorl(counter, counter);
1263     __ cmpl(start_index, Immediate(0));
1264     __ cmovl(kGreater, counter, start_index);
1265 
1266     if (mirror::kUseStringCompression) {
1267       NearLabel modify_counter, offset_uncompressed_label;
1268       __ testl(string_length_flagged, Immediate(1));
1269       __ j(kNotZero, &offset_uncompressed_label);
1270       // Move to the start of the string: string_obj + value_offset + start_index.
1271       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1272       __ jmp(&modify_counter);
1273 
1274       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1275       __ Bind(&offset_uncompressed_label);
1276       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1277 
1278       // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1279       // compare.
1280       __ Bind(&modify_counter);
1281     } else {
1282       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1283     }
1284     __ negl(counter);
1285     __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1286   }
1287 
1288   if (mirror::kUseStringCompression) {
1289     NearLabel uncompressed_string_comparison;
1290     NearLabel comparison_done;
1291     __ testl(string_length_flagged, Immediate(1));
1292     __ j(kNotZero, &uncompressed_string_comparison);
1293 
1294     // Check if EAX (search_value) is ASCII.
1295     __ cmpl(search_value, Immediate(127));
1296     __ j(kGreater, &not_found_label);
1297     // Comparing byte-per-byte.
1298     __ repne_scasb();
1299     __ jmp(&comparison_done);
1300 
1301     // Everything is set up for repne scasw:
1302     //   * Comparison address in EDI.
1303     //   * Counter in ECX.
1304     __ Bind(&uncompressed_string_comparison);
1305     __ repne_scasw();
1306     __ Bind(&comparison_done);
1307   } else {
1308     __ repne_scasw();
1309   }
1310   // Did we find a match?
1311   __ j(kNotEqual, &not_found_label);
1312 
1313   // Yes, we matched.  Compute the index of the result.
1314   __ subl(string_length, counter);
1315   __ leal(out, Address(string_length, -1));
1316 
1317   NearLabel done;
1318   __ jmp(&done);
1319 
1320   // Failed to match; return -1.
1321   __ Bind(&not_found_label);
1322   __ movl(out, Immediate(-1));
1323 
1324   // And join up at the end.
1325   __ Bind(&done);
1326   if (slow_path != nullptr) {
1327     __ Bind(slow_path->GetExitLabel());
1328   }
1329 }
1330 
VisitStringIndexOf(HInvoke * invoke)1331 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1332   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1333 }
1334 
VisitStringIndexOf(HInvoke * invoke)1335 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1336   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1337 }
1338 
VisitStringIndexOfAfter(HInvoke * invoke)1339 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1340   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1341 }
1342 
VisitStringIndexOfAfter(HInvoke * invoke)1343 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1344   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1345 }
1346 
VisitStringNewStringFromBytes(HInvoke * invoke)1347 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1348   LocationSummary* locations = new (allocator_) LocationSummary(
1349       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1350   InvokeRuntimeCallingConvention calling_convention;
1351   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1352   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1353   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1354   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1355   locations->SetOut(Location::RegisterLocation(EAX));
1356 }
1357 
VisitStringNewStringFromBytes(HInvoke * invoke)1358 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1359   X86Assembler* assembler = GetAssembler();
1360   LocationSummary* locations = invoke->GetLocations();
1361 
1362   Register byte_array = locations->InAt(0).AsRegister<Register>();
1363   __ testl(byte_array, byte_array);
1364   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1365   codegen_->AddSlowPath(slow_path);
1366   __ j(kEqual, slow_path->GetEntryLabel());
1367 
1368   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1369   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1370   __ Bind(slow_path->GetExitLabel());
1371 }
1372 
VisitStringNewStringFromChars(HInvoke * invoke)1373 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1374   LocationSummary* locations =
1375       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1376   InvokeRuntimeCallingConvention calling_convention;
1377   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1378   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1379   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1380   locations->SetOut(Location::RegisterLocation(EAX));
1381 }
1382 
VisitStringNewStringFromChars(HInvoke * invoke)1383 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1384   // No need to emit code checking whether `locations->InAt(2)` is a null
1385   // pointer, as callers of the native method
1386   //
1387   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1388   //
1389   // all include a null check on `data` before calling that method.
1390   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1391   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1392 }
1393 
VisitStringNewStringFromString(HInvoke * invoke)1394 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1395   LocationSummary* locations = new (allocator_) LocationSummary(
1396       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1397   InvokeRuntimeCallingConvention calling_convention;
1398   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1399   locations->SetOut(Location::RegisterLocation(EAX));
1400 }
1401 
VisitStringNewStringFromString(HInvoke * invoke)1402 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1403   X86Assembler* assembler = GetAssembler();
1404   LocationSummary* locations = invoke->GetLocations();
1405 
1406   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1407   __ testl(string_to_copy, string_to_copy);
1408   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1409   codegen_->AddSlowPath(slow_path);
1410   __ j(kEqual, slow_path->GetEntryLabel());
1411 
1412   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1413   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1414   __ Bind(slow_path->GetExitLabel());
1415 }
1416 
VisitStringGetCharsNoCheck(HInvoke * invoke)1417 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1418   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1419   LocationSummary* locations =
1420       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1421   locations->SetInAt(0, Location::RequiresRegister());
1422   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1423   // Place srcEnd in ECX to save a move below.
1424   locations->SetInAt(2, Location::RegisterLocation(ECX));
1425   locations->SetInAt(3, Location::RequiresRegister());
1426   locations->SetInAt(4, Location::RequiresRegister());
1427 
1428   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1429   // We don't have enough registers to also grab ECX, so handle below.
1430   locations->AddTemp(Location::RegisterLocation(ESI));
1431   locations->AddTemp(Location::RegisterLocation(EDI));
1432 }
1433 
VisitStringGetCharsNoCheck(HInvoke * invoke)1434 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1435   X86Assembler* assembler = GetAssembler();
1436   LocationSummary* locations = invoke->GetLocations();
1437 
1438   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1439   // Location of data in char array buffer.
1440   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1441   // Location of char array data in string.
1442   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1443 
1444   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1445   Register obj = locations->InAt(0).AsRegister<Register>();
1446   Location srcBegin = locations->InAt(1);
1447   int srcBegin_value =
1448     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1449   Register srcEnd = locations->InAt(2).AsRegister<Register>();
1450   Register dst = locations->InAt(3).AsRegister<Register>();
1451   Register dstBegin = locations->InAt(4).AsRegister<Register>();
1452 
1453   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1454   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1455   DCHECK_EQ(char_size, 2u);
1456 
1457   // Compute the number of chars (words) to move.
1458   // Save ECX, since we don't know if it will be used later.
1459   __ pushl(ECX);
1460   int stack_adjust = kX86WordSize;
1461   __ cfi().AdjustCFAOffset(stack_adjust);
1462   DCHECK_EQ(srcEnd, ECX);
1463   if (srcBegin.IsConstant()) {
1464     __ subl(ECX, Immediate(srcBegin_value));
1465   } else {
1466     DCHECK(srcBegin.IsRegister());
1467     __ subl(ECX, srcBegin.AsRegister<Register>());
1468   }
1469 
1470   NearLabel done;
1471   if (mirror::kUseStringCompression) {
1472     // Location of count in string
1473     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1474     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1475     DCHECK_EQ(c_char_size, 1u);
1476     __ pushl(EAX);
1477     __ cfi().AdjustCFAOffset(stack_adjust);
1478 
1479     NearLabel copy_loop, copy_uncompressed;
1480     __ testl(Address(obj, count_offset), Immediate(1));
1481     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1482                   "Expecting 0=compressed, 1=uncompressed");
1483     __ j(kNotZero, &copy_uncompressed);
1484     // Compute the address of the source string by adding the number of chars from
1485     // the source beginning to the value offset of a string.
1486     __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1487 
1488     // Start the loop to copy String's value to Array of Char.
1489     __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1490     __ Bind(&copy_loop);
1491     __ jecxz(&done);
1492     // Use EAX temporary (convert byte from ESI to word).
1493     // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1494     __ movzxb(EAX, Address(ESI, 0));
1495     __ movw(Address(EDI, 0), EAX);
1496     __ leal(EDI, Address(EDI, char_size));
1497     __ leal(ESI, Address(ESI, c_char_size));
1498     // TODO: Add support for LOOP to X86Assembler.
1499     __ subl(ECX, Immediate(1));
1500     __ jmp(&copy_loop);
1501     __ Bind(&copy_uncompressed);
1502   }
1503 
1504   // Do the copy for uncompressed string.
1505   // Compute the address of the destination buffer.
1506   __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1507   __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1508   __ rep_movsw();
1509 
1510   __ Bind(&done);
1511   if (mirror::kUseStringCompression) {
1512     // Restore EAX.
1513     __ popl(EAX);
1514     __ cfi().AdjustCFAOffset(-stack_adjust);
1515   }
1516   // Restore ECX.
1517   __ popl(ECX);
1518   __ cfi().AdjustCFAOffset(-stack_adjust);
1519 }
1520 
GenPeek(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1521 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1522   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1523   Location out_loc = locations->Out();
1524   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1525   // to avoid a SIGBUS.
1526   switch (size) {
1527     case DataType::Type::kInt8:
1528       __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1529       break;
1530     case DataType::Type::kInt16:
1531       __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1532       break;
1533     case DataType::Type::kInt32:
1534       __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1535       break;
1536     case DataType::Type::kInt64:
1537       __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1538       __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1539       break;
1540     default:
1541       LOG(FATAL) << "Type not recognized for peek: " << size;
1542       UNREACHABLE();
1543   }
1544 }
1545 
VisitMemoryPeekByte(HInvoke * invoke)1546 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1547   CreateLongToIntLocations(allocator_, invoke);
1548 }
1549 
VisitMemoryPeekByte(HInvoke * invoke)1550 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1551   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1552 }
1553 
VisitMemoryPeekIntNative(HInvoke * invoke)1554 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1555   CreateLongToIntLocations(allocator_, invoke);
1556 }
1557 
VisitMemoryPeekIntNative(HInvoke * invoke)1558 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1559   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1560 }
1561 
VisitMemoryPeekLongNative(HInvoke * invoke)1562 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1563   CreateLongToLongLocations(allocator_, invoke);
1564 }
1565 
VisitMemoryPeekLongNative(HInvoke * invoke)1566 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1567   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1568 }
1569 
VisitMemoryPeekShortNative(HInvoke * invoke)1570 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1571   CreateLongToIntLocations(allocator_, invoke);
1572 }
1573 
VisitMemoryPeekShortNative(HInvoke * invoke)1574 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1575   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1576 }
1577 
CreateLongIntToVoidLocations(ArenaAllocator * allocator,DataType::Type size,HInvoke * invoke)1578 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
1579                                          DataType::Type size,
1580                                          HInvoke* invoke) {
1581   LocationSummary* locations =
1582       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1583   locations->SetInAt(0, Location::RequiresRegister());
1584   HInstruction* value = invoke->InputAt(1);
1585   if (size == DataType::Type::kInt8) {
1586     locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1587   } else {
1588     locations->SetInAt(1, Location::RegisterOrConstant(value));
1589   }
1590 }
1591 
GenPoke(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1592 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1593   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1594   Location value_loc = locations->InAt(1);
1595   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1596   // to avoid a SIGBUS.
1597   switch (size) {
1598     case DataType::Type::kInt8:
1599       if (value_loc.IsConstant()) {
1600         __ movb(Address(address, 0),
1601                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1602       } else {
1603         __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1604       }
1605       break;
1606     case DataType::Type::kInt16:
1607       if (value_loc.IsConstant()) {
1608         __ movw(Address(address, 0),
1609                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1610       } else {
1611         __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1612       }
1613       break;
1614     case DataType::Type::kInt32:
1615       if (value_loc.IsConstant()) {
1616         __ movl(Address(address, 0),
1617                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1618       } else {
1619         __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1620       }
1621       break;
1622     case DataType::Type::kInt64:
1623       if (value_loc.IsConstant()) {
1624         int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1625         __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1626         __ movl(Address(address, 4), Immediate(High32Bits(value)));
1627       } else {
1628         __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1629         __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1630       }
1631       break;
1632     default:
1633       LOG(FATAL) << "Type not recognized for poke: " << size;
1634       UNREACHABLE();
1635   }
1636 }
1637 
VisitMemoryPokeByte(HInvoke * invoke)1638 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1639   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
1640 }
1641 
VisitMemoryPokeByte(HInvoke * invoke)1642 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1643   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1644 }
1645 
VisitMemoryPokeIntNative(HInvoke * invoke)1646 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1647   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
1648 }
1649 
VisitMemoryPokeIntNative(HInvoke * invoke)1650 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1651   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1652 }
1653 
VisitMemoryPokeLongNative(HInvoke * invoke)1654 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1655   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
1656 }
1657 
VisitMemoryPokeLongNative(HInvoke * invoke)1658 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1659   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1660 }
1661 
VisitMemoryPokeShortNative(HInvoke * invoke)1662 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1663   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
1664 }
1665 
VisitMemoryPokeShortNative(HInvoke * invoke)1666 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1667   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1668 }
1669 
VisitThreadCurrentThread(HInvoke * invoke)1670 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1671   LocationSummary* locations =
1672       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1673   locations->SetOut(Location::RequiresRegister());
1674 }
1675 
VisitThreadCurrentThread(HInvoke * invoke)1676 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1677   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1678   GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
1679 }
1680 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1681 static void GenUnsafeGet(HInvoke* invoke,
1682                          DataType::Type type,
1683                          bool is_volatile,
1684                          CodeGeneratorX86* codegen) {
1685   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1686   LocationSummary* locations = invoke->GetLocations();
1687   Location base_loc = locations->InAt(1);
1688   Register base = base_loc.AsRegister<Register>();
1689   Location offset_loc = locations->InAt(2);
1690   Register offset = offset_loc.AsRegisterPairLow<Register>();
1691   Location output_loc = locations->Out();
1692 
1693   switch (type) {
1694     case DataType::Type::kInt32: {
1695       Register output = output_loc.AsRegister<Register>();
1696       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1697       break;
1698     }
1699 
1700     case DataType::Type::kReference: {
1701       Register output = output_loc.AsRegister<Register>();
1702       if (gUseReadBarrier) {
1703         if (kUseBakerReadBarrier) {
1704           Address src(base, offset, ScaleFactor::TIMES_1, 0);
1705           codegen->GenerateReferenceLoadWithBakerReadBarrier(
1706               invoke, output_loc, base, src, /* needs_null_check= */ false);
1707         } else {
1708           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1709           codegen->GenerateReadBarrierSlow(
1710               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1711         }
1712       } else {
1713         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1714         __ MaybeUnpoisonHeapReference(output);
1715       }
1716       break;
1717     }
1718 
1719     case DataType::Type::kInt64: {
1720         Register output_lo = output_loc.AsRegisterPairLow<Register>();
1721         Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1722         if (is_volatile) {
1723           // Need to use a XMM to read atomically.
1724           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1725           __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1726           __ movd(output_lo, temp);
1727           __ psrlq(temp, Immediate(32));
1728           __ movd(output_hi, temp);
1729         } else {
1730           __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1731           __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1732         }
1733       }
1734       break;
1735 
1736     default:
1737       LOG(FATAL) << "Unsupported op size " << type;
1738       UNREACHABLE();
1739   }
1740 }
1741 
UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic)1742 static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
1743   switch (intrinsic) {
1744     case Intrinsics::kUnsafeGetObject:
1745     case Intrinsics::kUnsafeGetObjectVolatile:
1746     case Intrinsics::kJdkUnsafeGetObject:
1747     case Intrinsics::kJdkUnsafeGetObjectVolatile:
1748     case Intrinsics::kJdkUnsafeGetObjectAcquire:
1749       return true;
1750     default:
1751       break;
1752   }
1753   return false;
1754 }
1755 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type,bool is_volatile)1756 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1757                                           HInvoke* invoke,
1758                                           DataType::Type type,
1759                                           bool is_volatile) {
1760   bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
1761   LocationSummary* locations =
1762       new (allocator) LocationSummary(invoke,
1763                                       can_call
1764                                           ? LocationSummary::kCallOnSlowPath
1765                                           : LocationSummary::kNoCall,
1766                                       kIntrinsified);
1767   if (can_call && kUseBakerReadBarrier) {
1768     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1769   }
1770   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1771   locations->SetInAt(1, Location::RequiresRegister());
1772   locations->SetInAt(2, Location::RequiresRegister());
1773   if (type == DataType::Type::kInt64) {
1774     if (is_volatile) {
1775       // Need to use XMM to read volatile.
1776       locations->AddTemp(Location::RequiresFpuRegister());
1777       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1778     } else {
1779       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1780     }
1781   } else {
1782     locations->SetOut(Location::RequiresRegister(),
1783                       (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1784   }
1785 }
1786 
VisitUnsafeGet(HInvoke * invoke)1787 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1788   VisitJdkUnsafeGet(invoke);
1789 }
VisitUnsafeGetVolatile(HInvoke * invoke)1790 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1791   VisitJdkUnsafeGetVolatile(invoke);
1792 }
VisitUnsafeGetLong(HInvoke * invoke)1793 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1794   VisitJdkUnsafeGetLong(invoke);
1795 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1796 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1797   VisitJdkUnsafeGetLongVolatile(invoke);
1798 }
VisitUnsafeGetObject(HInvoke * invoke)1799 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1800   VisitJdkUnsafeGetObject(invoke);
1801 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1802 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1803   VisitJdkUnsafeGetObjectVolatile(invoke);
1804 }
1805 
1806 
VisitUnsafeGet(HInvoke * invoke)1807 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1808   VisitJdkUnsafeGet(invoke);
1809 }
VisitUnsafeGetVolatile(HInvoke * invoke)1810 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1811   VisitJdkUnsafeGetVolatile(invoke);
1812 }
VisitUnsafeGetLong(HInvoke * invoke)1813 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1814   VisitJdkUnsafeGetLong(invoke);
1815 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1816 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1817   VisitJdkUnsafeGetLongVolatile(invoke);
1818 }
VisitUnsafeGetObject(HInvoke * invoke)1819 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1820   VisitJdkUnsafeGetObject(invoke);
1821 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1822 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1823   VisitJdkUnsafeGetObjectVolatile(invoke);
1824 }
1825 
1826 
VisitJdkUnsafeGet(HInvoke * invoke)1827 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGet(HInvoke* invoke) {
1828   CreateIntIntIntToIntLocations(
1829       allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/ false);
1830 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1831 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1832   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/ true);
1833 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1834 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1835   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/ true);
1836 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1837 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1838   CreateIntIntIntToIntLocations(
1839       allocator_, invoke, DataType::Type::kInt64, /*is_volatile=*/ false);
1840 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1841 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1842   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /*is_volatile=*/ true);
1843 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1844 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1845   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /*is_volatile=*/ true);
1846 }
VisitJdkUnsafeGetObject(HInvoke * invoke)1847 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetObject(HInvoke* invoke) {
1848   CreateIntIntIntToIntLocations(
1849       allocator_, invoke, DataType::Type::kReference, /*is_volatile=*/ false);
1850 }
VisitJdkUnsafeGetObjectVolatile(HInvoke * invoke)1851 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
1852   CreateIntIntIntToIntLocations(
1853       allocator_, invoke, DataType::Type::kReference, /*is_volatile=*/ true);
1854 }
VisitJdkUnsafeGetObjectAcquire(HInvoke * invoke)1855 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
1856   CreateIntIntIntToIntLocations(
1857       allocator_, invoke, DataType::Type::kReference, /*is_volatile=*/ true);
1858 }
1859 
VisitJdkUnsafeGet(HInvoke * invoke)1860 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGet(HInvoke* invoke) {
1861   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
1862 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1863 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1864   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1865 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1866 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1867   GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1868 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1869 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1870   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
1871 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1872 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1873   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1874 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1875 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1876   GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1877 }
VisitJdkUnsafeGetObject(HInvoke * invoke)1878 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetObject(HInvoke* invoke) {
1879   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
1880 }
VisitJdkUnsafeGetObjectVolatile(HInvoke * invoke)1881 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
1882   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1883 }
VisitJdkUnsafeGetObjectAcquire(HInvoke * invoke)1884 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
1885   GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1886 }
1887 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1888 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1889                                                        DataType::Type type,
1890                                                        HInvoke* invoke,
1891                                                        bool is_volatile) {
1892   LocationSummary* locations =
1893       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1894   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1895   locations->SetInAt(1, Location::RequiresRegister());
1896   locations->SetInAt(2, Location::RequiresRegister());
1897   locations->SetInAt(3, Location::RequiresRegister());
1898   if (type == DataType::Type::kReference) {
1899     // Need temp registers for card-marking.
1900     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
1901     // Ensure the value is in a byte register.
1902     locations->AddTemp(Location::RegisterLocation(ECX));
1903   } else if (type == DataType::Type::kInt64 && is_volatile) {
1904     locations->AddTemp(Location::RequiresFpuRegister());
1905     locations->AddTemp(Location::RequiresFpuRegister());
1906   }
1907 }
1908 
VisitUnsafePut(HInvoke * invoke)1909 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1910   VisitJdkUnsafePut(invoke);
1911 }
VisitUnsafePutOrdered(HInvoke * invoke)1912 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1913   VisitJdkUnsafePutOrdered(invoke);
1914 }
VisitUnsafePutVolatile(HInvoke * invoke)1915 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1916   VisitJdkUnsafePutVolatile(invoke);
1917 }
VisitUnsafePutObject(HInvoke * invoke)1918 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1919   VisitJdkUnsafePutObject(invoke);
1920 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1921 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1922   VisitJdkUnsafePutObjectOrdered(invoke);
1923 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1924 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1925   VisitJdkUnsafePutObjectVolatile(invoke);
1926 }
VisitUnsafePutLong(HInvoke * invoke)1927 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1928   VisitJdkUnsafePutLong(invoke);
1929 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1930 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1931   VisitJdkUnsafePutLongOrdered(invoke);
1932 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1933 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1934   VisitJdkUnsafePutLongVolatile(invoke);
1935 }
1936 
VisitJdkUnsafePut(HInvoke * invoke)1937 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePut(HInvoke* invoke) {
1938   CreateIntIntIntIntToVoidPlusTempsLocations(
1939       allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ false);
1940 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)1941 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
1942   CreateIntIntIntIntToVoidPlusTempsLocations(
1943       allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ false);
1944 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)1945 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
1946   CreateIntIntIntIntToVoidPlusTempsLocations(
1947       allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
1948 }
VisitJdkUnsafePutRelease(HInvoke * invoke)1949 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
1950   CreateIntIntIntIntToVoidPlusTempsLocations(
1951       allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
1952 }
VisitJdkUnsafePutObject(HInvoke * invoke)1953 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObject(HInvoke* invoke) {
1954   CreateIntIntIntIntToVoidPlusTempsLocations(
1955       allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
1956 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)1957 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
1958   CreateIntIntIntIntToVoidPlusTempsLocations(
1959       allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
1960 }
VisitJdkUnsafePutObjectVolatile(HInvoke * invoke)1961 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
1962   CreateIntIntIntIntToVoidPlusTempsLocations(
1963       allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
1964 }
VisitJdkUnsafePutObjectRelease(HInvoke * invoke)1965 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
1966   CreateIntIntIntIntToVoidPlusTempsLocations(
1967       allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
1968 }
VisitJdkUnsafePutLong(HInvoke * invoke)1969 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLong(HInvoke* invoke) {
1970   CreateIntIntIntIntToVoidPlusTempsLocations(
1971       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
1972 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)1973 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
1974   CreateIntIntIntIntToVoidPlusTempsLocations(
1975       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
1976 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)1977 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
1978   CreateIntIntIntIntToVoidPlusTempsLocations(
1979       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
1980 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)1981 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
1982   CreateIntIntIntIntToVoidPlusTempsLocations(
1983       allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
1984 }
1985 
1986 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1987 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1988 static void GenUnsafePut(LocationSummary* locations,
1989                          DataType::Type type,
1990                          bool is_volatile,
1991                          CodeGeneratorX86* codegen) {
1992   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1993   Register base = locations->InAt(1).AsRegister<Register>();
1994   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1995   Location value_loc = locations->InAt(3);
1996 
1997   if (type == DataType::Type::kInt64) {
1998     Register value_lo = value_loc.AsRegisterPairLow<Register>();
1999     Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2000     if (is_volatile) {
2001       XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2002       XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2003       __ movd(temp1, value_lo);
2004       __ movd(temp2, value_hi);
2005       __ punpckldq(temp1, temp2);
2006       __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2007     } else {
2008       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2009       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2010     }
2011   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2012     Register temp = locations->GetTemp(0).AsRegister<Register>();
2013     __ movl(temp, value_loc.AsRegister<Register>());
2014     __ PoisonHeapReference(temp);
2015     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2016   } else {
2017     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2018   }
2019 
2020   if (is_volatile) {
2021     codegen->MemoryFence();
2022   }
2023 
2024   if (type == DataType::Type::kReference) {
2025     bool value_can_be_null = true;  // TODO: Worth finding out this information?
2026     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2027                         locations->GetTemp(1).AsRegister<Register>(),
2028                         base,
2029                         value_loc.AsRegister<Register>(),
2030                         value_can_be_null);
2031   }
2032 }
2033 
VisitUnsafePut(HInvoke * invoke)2034 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
2035   VisitJdkUnsafePut(invoke);
2036 }
VisitUnsafePutOrdered(HInvoke * invoke)2037 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2038   VisitJdkUnsafePutOrdered(invoke);
2039 }
VisitUnsafePutVolatile(HInvoke * invoke)2040 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2041   VisitJdkUnsafePutVolatile(invoke);
2042 }
VisitUnsafePutObject(HInvoke * invoke)2043 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
2044   VisitJdkUnsafePutObject(invoke);
2045 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2046 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2047   VisitJdkUnsafePutObjectOrdered(invoke);
2048 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2049 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2050   VisitJdkUnsafePutObjectVolatile(invoke);
2051 }
VisitUnsafePutLong(HInvoke * invoke)2052 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
2053   VisitJdkUnsafePutLong(invoke);
2054 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2055 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2056   VisitJdkUnsafePutLongOrdered(invoke);
2057 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2058 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2059   VisitJdkUnsafePutLongVolatile(invoke);
2060 }
2061 
VisitJdkUnsafePut(HInvoke * invoke)2062 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePut(HInvoke* invoke) {
2063   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2064 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2065 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2066   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2067 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2068 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2069   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2070 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2071 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2072   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2073 }
VisitJdkUnsafePutObject(HInvoke * invoke)2074 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObject(HInvoke* invoke) {
2075   GenUnsafePut(
2076       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2077 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2078 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2079   GenUnsafePut(
2080       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2081 }
VisitJdkUnsafePutObjectVolatile(HInvoke * invoke)2082 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
2083   GenUnsafePut(
2084       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2085 }
VisitJdkUnsafePutObjectRelease(HInvoke * invoke)2086 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
2087   GenUnsafePut(
2088       invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2089 }
VisitJdkUnsafePutLong(HInvoke * invoke)2090 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLong(HInvoke* invoke) {
2091   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2092 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2093 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2094   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2095 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2096 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2097   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2098 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2099 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2100   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2101 }
2102 
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2103 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
2104                                        DataType::Type type,
2105                                        HInvoke* invoke) {
2106   const bool can_call = gUseReadBarrier &&
2107                         kUseBakerReadBarrier &&
2108                         IsUnsafeCASObject(invoke);
2109   LocationSummary* locations =
2110       new (allocator) LocationSummary(invoke,
2111                                       can_call
2112                                           ? LocationSummary::kCallOnSlowPath
2113                                           : LocationSummary::kNoCall,
2114                                       kIntrinsified);
2115   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
2116   locations->SetInAt(1, Location::RequiresRegister());
2117   // Offset is a long, but in 32 bit mode, we only need the low word.
2118   // Can we update the invoke here to remove a TypeConvert to Long?
2119   locations->SetInAt(2, Location::RequiresRegister());
2120   // Expected value must be in EAX or EDX:EAX.
2121   // For long, new value must be in ECX:EBX.
2122   if (type == DataType::Type::kInt64) {
2123     locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2124     locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2125   } else {
2126     locations->SetInAt(3, Location::RegisterLocation(EAX));
2127     locations->SetInAt(4, Location::RequiresRegister());
2128   }
2129 
2130   // Force a byte register for the output.
2131   locations->SetOut(Location::RegisterLocation(EAX));
2132   if (type == DataType::Type::kReference) {
2133     // Need temporary registers for card-marking, and possibly for
2134     // (Baker) read barrier.
2135     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
2136     // Need a byte register for marking.
2137     locations->AddTemp(Location::RegisterLocation(ECX));
2138   }
2139 }
2140 
VisitUnsafeCASInt(HInvoke * invoke)2141 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2142   VisitJdkUnsafeCASInt(invoke);
2143 }
2144 
VisitUnsafeCASLong(HInvoke * invoke)2145 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2146   VisitJdkUnsafeCASLong(invoke);
2147 }
2148 
VisitUnsafeCASObject(HInvoke * invoke)2149 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
2150   VisitJdkUnsafeCASObject(invoke);
2151 }
2152 
VisitJdkUnsafeCASInt(HInvoke * invoke)2153 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2154   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2155   VisitJdkUnsafeCompareAndSetInt(invoke);
2156 }
2157 
VisitJdkUnsafeCASLong(HInvoke * invoke)2158 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2159   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2160   VisitJdkUnsafeCompareAndSetLong(invoke);
2161 }
2162 
VisitJdkUnsafeCASObject(HInvoke * invoke)2163 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2164   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2165   VisitJdkUnsafeCompareAndSetObject(invoke);
2166 }
2167 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2168 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2169   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
2170 }
2171 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2172 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2173   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
2174 }
2175 
VisitJdkUnsafeCompareAndSetObject(HInvoke * invoke)2176 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
2177   // The only supported read barrier implementation is the Baker-style read barriers.
2178   if (gUseReadBarrier && !kUseBakerReadBarrier) {
2179     return;
2180   }
2181 
2182   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
2183 }
2184 
GenPrimitiveLockedCmpxchg(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp=Register::kNoRegister)2185 static void GenPrimitiveLockedCmpxchg(DataType::Type type,
2186                                       CodeGeneratorX86* codegen,
2187                                       Location expected_value,
2188                                       Location new_value,
2189                                       Register base,
2190                                       Register offset,
2191                                       // Only necessary for floating point
2192                                       Register temp = Register::kNoRegister) {
2193   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2194 
2195   if (DataType::Kind(type) == DataType::Type::kInt32) {
2196     DCHECK_EQ(expected_value.AsRegister<Register>(), EAX);
2197   }
2198 
2199   // The address of the field within the holding object.
2200   Address field_addr(base, offset, TIMES_1, 0);
2201 
2202   switch (type) {
2203     case DataType::Type::kBool:
2204     case DataType::Type::kInt8:
2205       __ LockCmpxchgb(field_addr, new_value.AsRegister<ByteRegister>());
2206       break;
2207     case DataType::Type::kInt16:
2208     case DataType::Type::kUint16:
2209       __ LockCmpxchgw(field_addr, new_value.AsRegister<Register>());
2210       break;
2211     case DataType::Type::kInt32:
2212       __ LockCmpxchgl(field_addr, new_value.AsRegister<Register>());
2213       break;
2214     case DataType::Type::kFloat32: {
2215       // cmpxchg requires the expected value to be in EAX so the new value must be elsewhere.
2216       DCHECK_NE(temp, EAX);
2217       // EAX is both an input and an output for cmpxchg
2218       codegen->Move32(Location::RegisterLocation(EAX), expected_value);
2219       codegen->Move32(Location::RegisterLocation(temp), new_value);
2220       __ LockCmpxchgl(field_addr, temp);
2221       break;
2222     }
2223     case DataType::Type::kInt64:
2224       // Ensure the expected value is in EAX:EDX and that the new
2225       // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2226       DCHECK_EQ(expected_value.AsRegisterPairLow<Register>(), EAX);
2227       DCHECK_EQ(expected_value.AsRegisterPairHigh<Register>(), EDX);
2228       DCHECK_EQ(new_value.AsRegisterPairLow<Register>(), EBX);
2229       DCHECK_EQ(new_value.AsRegisterPairHigh<Register>(), ECX);
2230       __ LockCmpxchg8b(field_addr);
2231       break;
2232     default:
2233       LOG(FATAL) << "Unexpected CAS type " << type;
2234   }
2235   // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2236   // don't need scheduling barriers at this time.
2237 }
2238 
GenPrimitiveCAS(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Location out,Register temp=Register::kNoRegister,bool is_cmpxchg=false)2239 static void GenPrimitiveCAS(DataType::Type type,
2240                             CodeGeneratorX86* codegen,
2241                             Location expected_value,
2242                             Location new_value,
2243                             Register base,
2244                             Register offset,
2245                             Location out,
2246                             // Only necessary for floating point
2247                             Register temp = Register::kNoRegister,
2248                             bool is_cmpxchg = false) {
2249   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2250 
2251   if (!is_cmpxchg || DataType::Kind(type) == DataType::Type::kInt32) {
2252     DCHECK_EQ(out.AsRegister<Register>(), EAX);
2253   }
2254 
2255   GenPrimitiveLockedCmpxchg(type, codegen, expected_value, new_value, base, offset, temp);
2256 
2257   if (is_cmpxchg) {
2258     // Sign-extend, zero-extend or move the result if necessary
2259     switch (type) {
2260       case DataType::Type::kBool:
2261         __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2262         break;
2263       case DataType::Type::kInt8:
2264         __ movsxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2265         break;
2266       case DataType::Type::kInt16:
2267         __ movsxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2268         break;
2269       case DataType::Type::kUint16:
2270         __ movzxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2271         break;
2272       case DataType::Type::kFloat32:
2273         __ movd(out.AsFpuRegister<XmmRegister>(), EAX);
2274         break;
2275       default:
2276         // Nothing to do
2277         break;
2278     }
2279   } else {
2280     // Convert ZF into the Boolean result.
2281     __ setb(kZero, out.AsRegister<Register>());
2282     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2283   }
2284 }
2285 
GenReferenceCAS(HInvoke * invoke,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp,Register temp2,bool is_cmpxchg=false)2286 static void GenReferenceCAS(HInvoke* invoke,
2287                             CodeGeneratorX86* codegen,
2288                             Location expected_value,
2289                             Location new_value,
2290                             Register base,
2291                             Register offset,
2292                             Register temp,
2293                             Register temp2,
2294                             bool is_cmpxchg = false) {
2295   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2296   LocationSummary* locations = invoke->GetLocations();
2297   Location out = locations->Out();
2298 
2299   // The address of the field within the holding object.
2300   Address field_addr(base, offset, TIMES_1, 0);
2301 
2302   Register value = new_value.AsRegister<Register>();
2303   Register expected = expected_value.AsRegister<Register>();
2304   DCHECK_EQ(expected, EAX);
2305   DCHECK_NE(temp, temp2);
2306 
2307   if (gUseReadBarrier && kUseBakerReadBarrier) {
2308     // Need to make sure the reference stored in the field is a to-space
2309     // one before attempting the CAS or the CAS could fail incorrectly.
2310     codegen->GenerateReferenceLoadWithBakerReadBarrier(
2311         invoke,
2312         // Unused, used only as a "temporary" within the read barrier.
2313         Location::RegisterLocation(temp),
2314         base,
2315         field_addr,
2316         /* needs_null_check= */ false,
2317         /* always_update_field= */ true,
2318         &temp2);
2319   }
2320   bool base_equals_value = (base == value);
2321   if (kPoisonHeapReferences) {
2322     if (base_equals_value) {
2323       // If `base` and `value` are the same register location, move
2324       // `value` to a temporary register.  This way, poisoning
2325       // `value` won't invalidate `base`.
2326       value = temp;
2327       __ movl(value, base);
2328     }
2329 
2330     // Check that the register allocator did not assign the location
2331     // of `expected` (EAX) to `value` nor to `base`, so that heap
2332     // poisoning (when enabled) works as intended below.
2333     // - If `value` were equal to `expected`, both references would
2334     //   be poisoned twice, meaning they would not be poisoned at
2335     //   all, as heap poisoning uses address negation.
2336     // - If `base` were equal to `expected`, poisoning `expected`
2337     //   would invalidate `base`.
2338     DCHECK_NE(value, expected);
2339     DCHECK_NE(base, expected);
2340     __ PoisonHeapReference(expected);
2341     __ PoisonHeapReference(value);
2342   }
2343   __ LockCmpxchgl(field_addr, value);
2344 
2345   // LOCK CMPXCHG has full barrier semantics, and we don't need
2346   // scheduling barriers at this time.
2347 
2348   if (is_cmpxchg) {
2349     DCHECK_EQ(out.AsRegister<Register>(), EAX);
2350     __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
2351   } else {
2352     // Convert ZF into the Boolean result.
2353     __ setb(kZero, out.AsRegister<Register>());
2354     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2355   }
2356 
2357   // Mark card for object if the new value is stored.
2358   bool value_can_be_null = true;  // TODO: Worth finding out this information?
2359   NearLabel skip_mark_gc_card;
2360   __ j(kNotZero, &skip_mark_gc_card);
2361   codegen->MarkGCCard(temp, temp2, base, value, value_can_be_null);
2362   __ Bind(&skip_mark_gc_card);
2363 
2364   // If heap poisoning is enabled, we need to unpoison the values
2365   // that were poisoned earlier.
2366   if (kPoisonHeapReferences) {
2367     if (base_equals_value) {
2368       // `value` has been moved to a temporary register, no need to
2369       // unpoison it.
2370     } else {
2371       // Ensure `value` is different from `out`, so that unpoisoning
2372       // the former does not invalidate the latter.
2373       DCHECK_NE(value, out.AsRegister<Register>());
2374       __ UnpoisonHeapReference(value);
2375     }
2376   }
2377   // Do not unpoison the reference contained in register
2378   // `expected`, as it is the same as register `out` (EAX).
2379 }
2380 
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2381 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2382   LocationSummary* locations = invoke->GetLocations();
2383 
2384   Register base = locations->InAt(1).AsRegister<Register>();
2385   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2386   Location expected_value = locations->InAt(3);
2387   Location new_value = locations->InAt(4);
2388   Location out = locations->Out();
2389   DCHECK_EQ(out.AsRegister<Register>(), EAX);
2390 
2391   if (type == DataType::Type::kReference) {
2392     // The only read barrier implementation supporting the
2393     // UnsafeCASObject intrinsic is the Baker-style read barriers.
2394     DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
2395 
2396     Register temp = locations->GetTemp(0).AsRegister<Register>();
2397     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2398     GenReferenceCAS(invoke, codegen, expected_value, new_value, base, offset, temp, temp2);
2399   } else {
2400     DCHECK(!DataType::IsFloatingPointType(type));
2401     GenPrimitiveCAS(type, codegen, expected_value, new_value, base, offset, out);
2402   }
2403 }
2404 
VisitUnsafeCASInt(HInvoke * invoke)2405 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2406   VisitJdkUnsafeCASInt(invoke);
2407 }
2408 
VisitUnsafeCASLong(HInvoke * invoke)2409 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2410   VisitJdkUnsafeCASLong(invoke);
2411 }
2412 
VisitUnsafeCASObject(HInvoke * invoke)2413 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2414   // The only read barrier implementation supporting the
2415   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2416   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
2417 
2418   GenCAS(DataType::Type::kReference, invoke, codegen_);
2419 }
2420 
VisitJdkUnsafeCASInt(HInvoke * invoke)2421 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2422   // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2423   VisitJdkUnsafeCompareAndSetInt(invoke);
2424 }
2425 
VisitJdkUnsafeCASLong(HInvoke * invoke)2426 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2427   // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2428   VisitJdkUnsafeCompareAndSetLong(invoke);
2429 }
2430 
VisitJdkUnsafeCASObject(HInvoke * invoke)2431 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2432   // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2433   VisitJdkUnsafeCompareAndSetObject(invoke);
2434 }
2435 
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2436 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2437   GenCAS(DataType::Type::kInt32, invoke, codegen_);
2438 }
2439 
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2440 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2441   GenCAS(DataType::Type::kInt64, invoke, codegen_);
2442 }
2443 
VisitJdkUnsafeCompareAndSetObject(HInvoke * invoke)2444 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
2445   // The only supported read barrier implementation is the Baker-style read barriers.
2446   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
2447 
2448   GenCAS(DataType::Type::kReference, invoke, codegen_);
2449 }
2450 
VisitIntegerReverse(HInvoke * invoke)2451 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2452   LocationSummary* locations =
2453       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2454   locations->SetInAt(0, Location::RequiresRegister());
2455   locations->SetOut(Location::SameAsFirstInput());
2456   locations->AddTemp(Location::RequiresRegister());
2457 }
2458 
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2459 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2460                      X86Assembler* assembler) {
2461   Immediate imm_shift(shift);
2462   Immediate imm_mask(mask);
2463   __ movl(temp, reg);
2464   __ shrl(reg, imm_shift);
2465   __ andl(temp, imm_mask);
2466   __ andl(reg, imm_mask);
2467   __ shll(temp, imm_shift);
2468   __ orl(reg, temp);
2469 }
2470 
VisitIntegerReverse(HInvoke * invoke)2471 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2472   X86Assembler* assembler = GetAssembler();
2473   LocationSummary* locations = invoke->GetLocations();
2474 
2475   Register reg = locations->InAt(0).AsRegister<Register>();
2476   Register temp = locations->GetTemp(0).AsRegister<Register>();
2477 
2478   /*
2479    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2480    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2481    * compared to generic luni implementation which has 5 rounds of swapping bits.
2482    * x = bswap x
2483    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2484    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2485    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2486    */
2487   __ bswapl(reg);
2488   SwapBits(reg, temp, 1, 0x55555555, assembler);
2489   SwapBits(reg, temp, 2, 0x33333333, assembler);
2490   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2491 }
2492 
VisitLongReverse(HInvoke * invoke)2493 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2494   LocationSummary* locations =
2495       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2496   locations->SetInAt(0, Location::RequiresRegister());
2497   locations->SetOut(Location::SameAsFirstInput());
2498   locations->AddTemp(Location::RequiresRegister());
2499 }
2500 
VisitLongReverse(HInvoke * invoke)2501 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2502   X86Assembler* assembler = GetAssembler();
2503   LocationSummary* locations = invoke->GetLocations();
2504 
2505   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2506   Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2507   Register temp = locations->GetTemp(0).AsRegister<Register>();
2508 
2509   // We want to swap high/low, then bswap each one, and then do the same
2510   // as a 32 bit reverse.
2511   // Exchange high and low.
2512   __ movl(temp, reg_low);
2513   __ movl(reg_low, reg_high);
2514   __ movl(reg_high, temp);
2515 
2516   // bit-reverse low
2517   __ bswapl(reg_low);
2518   SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2519   SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2520   SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2521 
2522   // bit-reverse high
2523   __ bswapl(reg_high);
2524   SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2525   SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2526   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2527 }
2528 
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2529 static void CreateBitCountLocations(
2530     ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2531   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2532     // Do nothing if there is no popcnt support. This results in generating
2533     // a call for the intrinsic rather than direct code.
2534     return;
2535   }
2536   LocationSummary* locations =
2537       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2538   if (is_long) {
2539     locations->AddTemp(Location::RequiresRegister());
2540   }
2541   locations->SetInAt(0, Location::Any());
2542   locations->SetOut(Location::RequiresRegister());
2543 }
2544 
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2545 static void GenBitCount(X86Assembler* assembler,
2546                         CodeGeneratorX86* codegen,
2547                         HInvoke* invoke, bool is_long) {
2548   LocationSummary* locations = invoke->GetLocations();
2549   Location src = locations->InAt(0);
2550   Register out = locations->Out().AsRegister<Register>();
2551 
2552   if (invoke->InputAt(0)->IsConstant()) {
2553     // Evaluate this at compile time.
2554     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2555     int32_t result = is_long
2556         ? POPCOUNT(static_cast<uint64_t>(value))
2557         : POPCOUNT(static_cast<uint32_t>(value));
2558     codegen->Load32BitValue(out, result);
2559     return;
2560   }
2561 
2562   // Handle the non-constant cases.
2563   if (!is_long) {
2564     if (src.IsRegister()) {
2565       __ popcntl(out, src.AsRegister<Register>());
2566     } else {
2567       DCHECK(src.IsStackSlot());
2568       __ popcntl(out, Address(ESP, src.GetStackIndex()));
2569     }
2570   } else {
2571     // The 64-bit case needs to worry about two parts.
2572     Register temp = locations->GetTemp(0).AsRegister<Register>();
2573     if (src.IsRegisterPair()) {
2574       __ popcntl(temp, src.AsRegisterPairLow<Register>());
2575       __ popcntl(out, src.AsRegisterPairHigh<Register>());
2576     } else {
2577       DCHECK(src.IsDoubleStackSlot());
2578       __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2579       __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2580     }
2581     __ addl(out, temp);
2582   }
2583 }
2584 
VisitIntegerBitCount(HInvoke * invoke)2585 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2586   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
2587 }
2588 
VisitIntegerBitCount(HInvoke * invoke)2589 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2590   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2591 }
2592 
VisitLongBitCount(HInvoke * invoke)2593 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2594   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
2595 }
2596 
VisitLongBitCount(HInvoke * invoke)2597 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2598   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2599 }
2600 
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2601 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2602   LocationSummary* locations =
2603       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2604   if (is_long) {
2605     locations->SetInAt(0, Location::RequiresRegister());
2606   } else {
2607     locations->SetInAt(0, Location::Any());
2608   }
2609   locations->SetOut(Location::RequiresRegister());
2610 }
2611 
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2612 static void GenLeadingZeros(X86Assembler* assembler,
2613                             CodeGeneratorX86* codegen,
2614                             HInvoke* invoke, bool is_long) {
2615   LocationSummary* locations = invoke->GetLocations();
2616   Location src = locations->InAt(0);
2617   Register out = locations->Out().AsRegister<Register>();
2618 
2619   if (invoke->InputAt(0)->IsConstant()) {
2620     // Evaluate this at compile time.
2621     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2622     if (value == 0) {
2623       value = is_long ? 64 : 32;
2624     } else {
2625       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2626     }
2627     codegen->Load32BitValue(out, value);
2628     return;
2629   }
2630 
2631   // Handle the non-constant cases.
2632   if (!is_long) {
2633     if (src.IsRegister()) {
2634       __ bsrl(out, src.AsRegister<Register>());
2635     } else {
2636       DCHECK(src.IsStackSlot());
2637       __ bsrl(out, Address(ESP, src.GetStackIndex()));
2638     }
2639 
2640     // BSR sets ZF if the input was zero, and the output is undefined.
2641     NearLabel all_zeroes, done;
2642     __ j(kEqual, &all_zeroes);
2643 
2644     // Correct the result from BSR to get the final CLZ result.
2645     __ xorl(out, Immediate(31));
2646     __ jmp(&done);
2647 
2648     // Fix the zero case with the expected result.
2649     __ Bind(&all_zeroes);
2650     __ movl(out, Immediate(32));
2651 
2652     __ Bind(&done);
2653     return;
2654   }
2655 
2656   // 64 bit case needs to worry about both parts of the register.
2657   DCHECK(src.IsRegisterPair());
2658   Register src_lo = src.AsRegisterPairLow<Register>();
2659   Register src_hi = src.AsRegisterPairHigh<Register>();
2660   NearLabel handle_low, done, all_zeroes;
2661 
2662   // Is the high word zero?
2663   __ testl(src_hi, src_hi);
2664   __ j(kEqual, &handle_low);
2665 
2666   // High word is not zero. We know that the BSR result is defined in this case.
2667   __ bsrl(out, src_hi);
2668 
2669   // Correct the result from BSR to get the final CLZ result.
2670   __ xorl(out, Immediate(31));
2671   __ jmp(&done);
2672 
2673   // High word was zero.  We have to compute the low word count and add 32.
2674   __ Bind(&handle_low);
2675   __ bsrl(out, src_lo);
2676   __ j(kEqual, &all_zeroes);
2677 
2678   // We had a valid result.  Use an XOR to both correct the result and add 32.
2679   __ xorl(out, Immediate(63));
2680   __ jmp(&done);
2681 
2682   // All zero case.
2683   __ Bind(&all_zeroes);
2684   __ movl(out, Immediate(64));
2685 
2686   __ Bind(&done);
2687 }
2688 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2689 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2690   CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
2691 }
2692 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2693 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2694   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2695 }
2696 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2697 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2698   CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
2699 }
2700 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2701 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2702   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2703 }
2704 
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2705 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2706   LocationSummary* locations =
2707       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2708   if (is_long) {
2709     locations->SetInAt(0, Location::RequiresRegister());
2710   } else {
2711     locations->SetInAt(0, Location::Any());
2712   }
2713   locations->SetOut(Location::RequiresRegister());
2714 }
2715 
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2716 static void GenTrailingZeros(X86Assembler* assembler,
2717                              CodeGeneratorX86* codegen,
2718                              HInvoke* invoke, bool is_long) {
2719   LocationSummary* locations = invoke->GetLocations();
2720   Location src = locations->InAt(0);
2721   Register out = locations->Out().AsRegister<Register>();
2722 
2723   if (invoke->InputAt(0)->IsConstant()) {
2724     // Evaluate this at compile time.
2725     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2726     if (value == 0) {
2727       value = is_long ? 64 : 32;
2728     } else {
2729       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2730     }
2731     codegen->Load32BitValue(out, value);
2732     return;
2733   }
2734 
2735   // Handle the non-constant cases.
2736   if (!is_long) {
2737     if (src.IsRegister()) {
2738       __ bsfl(out, src.AsRegister<Register>());
2739     } else {
2740       DCHECK(src.IsStackSlot());
2741       __ bsfl(out, Address(ESP, src.GetStackIndex()));
2742     }
2743 
2744     // BSF sets ZF if the input was zero, and the output is undefined.
2745     NearLabel done;
2746     __ j(kNotEqual, &done);
2747 
2748     // Fix the zero case with the expected result.
2749     __ movl(out, Immediate(32));
2750 
2751     __ Bind(&done);
2752     return;
2753   }
2754 
2755   // 64 bit case needs to worry about both parts of the register.
2756   DCHECK(src.IsRegisterPair());
2757   Register src_lo = src.AsRegisterPairLow<Register>();
2758   Register src_hi = src.AsRegisterPairHigh<Register>();
2759   NearLabel done, all_zeroes;
2760 
2761   // If the low word is zero, then ZF will be set.  If not, we have the answer.
2762   __ bsfl(out, src_lo);
2763   __ j(kNotEqual, &done);
2764 
2765   // Low word was zero.  We have to compute the high word count and add 32.
2766   __ bsfl(out, src_hi);
2767   __ j(kEqual, &all_zeroes);
2768 
2769   // We had a valid result.  Add 32 to account for the low word being zero.
2770   __ addl(out, Immediate(32));
2771   __ jmp(&done);
2772 
2773   // All zero case.
2774   __ Bind(&all_zeroes);
2775   __ movl(out, Immediate(64));
2776 
2777   __ Bind(&done);
2778 }
2779 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2780 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2781   CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
2782 }
2783 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2784 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2785   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2786 }
2787 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2788 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2789   CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
2790 }
2791 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2792 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2793   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2794 }
2795 
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)2796 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2797   return instruction->InputAt(input0) == instruction->InputAt(input1);
2798 }
2799 
2800 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(X86Assembler * assembler,DataType::Type type,const Register & array,const Location & pos,const Register & base)2801 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
2802                                           DataType::Type type,
2803                                           const Register& array,
2804                                           const Location& pos,
2805                                           const Register& base) {
2806   // This routine is only used by the SystemArrayCopy intrinsic at the
2807   // moment. We can allow DataType::Type::kReference as `type` to implement
2808   // the SystemArrayCopyChar intrinsic.
2809   DCHECK_EQ(type, DataType::Type::kReference);
2810   const int32_t element_size = DataType::Size(type);
2811   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2812   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2813 
2814   if (pos.IsConstant()) {
2815     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2816     __ leal(base, Address(array, element_size * constant + data_offset));
2817   } else {
2818     __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
2819   }
2820 }
2821 
2822 // Compute end source address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(X86Assembler * assembler,DataType::Type type,const Location & copy_length,const Register & base,const Register & end)2823 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
2824                                          DataType::Type type,
2825                                          const Location& copy_length,
2826                                          const Register& base,
2827                                          const Register& end) {
2828   // This routine is only used by the SystemArrayCopy intrinsic at the
2829   // moment. We can allow DataType::Type::kReference as `type` to implement
2830   // the SystemArrayCopyChar intrinsic.
2831   DCHECK_EQ(type, DataType::Type::kReference);
2832   const int32_t element_size = DataType::Size(type);
2833   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2834 
2835   if (copy_length.IsConstant()) {
2836     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2837     __ leal(end, Address(base, element_size * constant));
2838   } else {
2839     __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
2840   }
2841 }
2842 
VisitSystemArrayCopy(HInvoke * invoke)2843 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
2844   // The only read barrier implementation supporting the
2845   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2846   if (gUseReadBarrier && !kUseBakerReadBarrier) {
2847     return;
2848   }
2849 
2850   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2851   if (invoke->GetLocations() != nullptr) {
2852     // Need a byte register for marking.
2853     invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2854 
2855     static constexpr size_t kSrc = 0;
2856     static constexpr size_t kSrcPos = 1;
2857     static constexpr size_t kDest = 2;
2858     static constexpr size_t kDestPos = 3;
2859     static constexpr size_t kLength = 4;
2860 
2861     if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2862         !invoke->InputAt(kDestPos)->IsIntConstant() &&
2863         !invoke->InputAt(kLength)->IsIntConstant()) {
2864       if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2865           !IsSameInput(invoke, kSrcPos, kLength) &&
2866           !IsSameInput(invoke, kDestPos, kLength) &&
2867           !IsSameInput(invoke, kSrc, kDest)) {
2868         // Not enough registers, make the length also take a stack slot.
2869         invoke->GetLocations()->SetInAt(kLength, Location::Any());
2870       }
2871     }
2872   }
2873 }
2874 
VisitSystemArrayCopy(HInvoke * invoke)2875 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
2876   // The only read barrier implementation supporting the
2877   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2878   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
2879 
2880   X86Assembler* assembler = GetAssembler();
2881   LocationSummary* locations = invoke->GetLocations();
2882 
2883   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2884   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2885   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2886   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2887   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2888 
2889   Register src = locations->InAt(0).AsRegister<Register>();
2890   Location src_pos = locations->InAt(1);
2891   Register dest = locations->InAt(2).AsRegister<Register>();
2892   Location dest_pos = locations->InAt(3);
2893   Location length_arg = locations->InAt(4);
2894   Location length = length_arg;
2895   Location temp1_loc = locations->GetTemp(0);
2896   Register temp1 = temp1_loc.AsRegister<Register>();
2897   Location temp2_loc = locations->GetTemp(1);
2898   Register temp2 = temp2_loc.AsRegister<Register>();
2899 
2900   SlowPathCode* intrinsic_slow_path =
2901       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
2902   codegen_->AddSlowPath(intrinsic_slow_path);
2903 
2904   NearLabel conditions_on_positions_validated;
2905   SystemArrayCopyOptimizations optimizations(invoke);
2906 
2907   // If source and destination are the same, we go to slow path if we need to do
2908   // forward copying.
2909   if (src_pos.IsConstant()) {
2910     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2911     if (dest_pos.IsConstant()) {
2912       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2913       if (optimizations.GetDestinationIsSource()) {
2914         // Checked when building locations.
2915         DCHECK_GE(src_pos_constant, dest_pos_constant);
2916       } else if (src_pos_constant < dest_pos_constant) {
2917         __ cmpl(src, dest);
2918         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2919       }
2920     } else {
2921       if (!optimizations.GetDestinationIsSource()) {
2922         __ cmpl(src, dest);
2923         __ j(kNotEqual, &conditions_on_positions_validated);
2924       }
2925       __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
2926       __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
2927     }
2928   } else {
2929     if (!optimizations.GetDestinationIsSource()) {
2930       __ cmpl(src, dest);
2931       __ j(kNotEqual, &conditions_on_positions_validated);
2932     }
2933     if (dest_pos.IsConstant()) {
2934       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2935       __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
2936       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2937     } else {
2938       __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
2939       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2940     }
2941   }
2942 
2943   __ Bind(&conditions_on_positions_validated);
2944 
2945   if (!optimizations.GetSourceIsNotNull()) {
2946     // Bail out if the source is null.
2947     __ testl(src, src);
2948     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2949   }
2950 
2951   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2952     // Bail out if the destination is null.
2953     __ testl(dest, dest);
2954     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2955   }
2956 
2957   Location temp3_loc = locations->GetTemp(2);
2958   Register temp3 = temp3_loc.AsRegister<Register>();
2959   if (length.IsStackSlot()) {
2960     __ movl(temp3, Address(ESP, length.GetStackIndex()));
2961     length = Location::RegisterLocation(temp3);
2962   }
2963 
2964   // If the length is negative, bail out.
2965   // We have already checked in the LocationsBuilder for the constant case.
2966   if (!length.IsConstant() &&
2967       !optimizations.GetCountIsSourceLength() &&
2968       !optimizations.GetCountIsDestinationLength()) {
2969     __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
2970     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2971   }
2972 
2973   // Validity checks: source.
2974   CheckPosition(assembler,
2975                 src_pos,
2976                 src,
2977                 length,
2978                 intrinsic_slow_path,
2979                 temp1,
2980                 optimizations.GetCountIsSourceLength());
2981 
2982   // Validity checks: dest.
2983   CheckPosition(assembler,
2984                 dest_pos,
2985                 dest,
2986                 length,
2987                 intrinsic_slow_path,
2988                 temp1,
2989                 optimizations.GetCountIsDestinationLength());
2990 
2991   if (!optimizations.GetDoesNotNeedTypeCheck()) {
2992     // Check whether all elements of the source array are assignable to the component
2993     // type of the destination array. We do two checks: the classes are the same,
2994     // or the destination is Object[]. If none of these checks succeed, we go to the
2995     // slow path.
2996 
2997     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2998       if (gUseReadBarrier && kUseBakerReadBarrier) {
2999         // /* HeapReference<Class> */ temp1 = src->klass_
3000         codegen_->GenerateFieldLoadWithBakerReadBarrier(
3001             invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
3002         // Bail out if the source is not a non primitive array.
3003         // /* HeapReference<Class> */ temp1 = temp1->component_type_
3004         codegen_->GenerateFieldLoadWithBakerReadBarrier(
3005             invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
3006         __ testl(temp1, temp1);
3007         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3008         // If heap poisoning is enabled, `temp1` has been unpoisoned
3009         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3010       } else {
3011         // /* HeapReference<Class> */ temp1 = src->klass_
3012         __ movl(temp1, Address(src, class_offset));
3013         __ MaybeUnpoisonHeapReference(temp1);
3014         // Bail out if the source is not a non primitive array.
3015         // /* HeapReference<Class> */ temp1 = temp1->component_type_
3016         __ movl(temp1, Address(temp1, component_offset));
3017         __ testl(temp1, temp1);
3018         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3019         __ MaybeUnpoisonHeapReference(temp1);
3020       }
3021       __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
3022       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3023     }
3024 
3025     if (gUseReadBarrier && kUseBakerReadBarrier) {
3026       if (length.Equals(Location::RegisterLocation(temp3))) {
3027         // When Baker read barriers are enabled, register `temp3`,
3028         // which in the present case contains the `length` parameter,
3029         // will be overwritten below.  Make the `length` location
3030         // reference the original stack location; it will be moved
3031         // back to `temp3` later if necessary.
3032         DCHECK(length_arg.IsStackSlot());
3033         length = length_arg;
3034       }
3035 
3036       // /* HeapReference<Class> */ temp1 = dest->klass_
3037       codegen_->GenerateFieldLoadWithBakerReadBarrier(
3038           invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
3039 
3040       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3041         // Bail out if the destination is not a non primitive array.
3042         //
3043         // Register `temp1` is not trashed by the read barrier emitted
3044         // by GenerateFieldLoadWithBakerReadBarrier below, as that
3045         // method produces a call to a ReadBarrierMarkRegX entry point,
3046         // which saves all potentially live registers, including
3047         // temporaries such a `temp1`.
3048         // /* HeapReference<Class> */ temp2 = temp1->component_type_
3049         codegen_->GenerateFieldLoadWithBakerReadBarrier(
3050             invoke, temp2_loc, temp1, component_offset, /* needs_null_check= */ false);
3051         __ testl(temp2, temp2);
3052         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3053         // If heap poisoning is enabled, `temp2` has been unpoisoned
3054         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3055         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
3056         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3057       }
3058 
3059       // For the same reason given earlier, `temp1` is not trashed by the
3060       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
3061       // /* HeapReference<Class> */ temp2 = src->klass_
3062       codegen_->GenerateFieldLoadWithBakerReadBarrier(
3063           invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
3064       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
3065       __ cmpl(temp1, temp2);
3066 
3067       if (optimizations.GetDestinationIsTypedObjectArray()) {
3068         NearLabel do_copy;
3069         __ j(kEqual, &do_copy);
3070         // /* HeapReference<Class> */ temp1 = temp1->component_type_
3071         codegen_->GenerateFieldLoadWithBakerReadBarrier(
3072             invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
3073         // We do not need to emit a read barrier for the following
3074         // heap reference load, as `temp1` is only used in a
3075         // comparison with null below, and this reference is not
3076         // kept afterwards.
3077         __ cmpl(Address(temp1, super_offset), Immediate(0));
3078         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3079         __ Bind(&do_copy);
3080       } else {
3081         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3082       }
3083     } else {
3084       // Non read barrier code.
3085 
3086       // /* HeapReference<Class> */ temp1 = dest->klass_
3087       __ movl(temp1, Address(dest, class_offset));
3088       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3089         __ MaybeUnpoisonHeapReference(temp1);
3090         // Bail out if the destination is not a non primitive array.
3091         // /* HeapReference<Class> */ temp2 = temp1->component_type_
3092         __ movl(temp2, Address(temp1, component_offset));
3093         __ testl(temp2, temp2);
3094         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3095         __ MaybeUnpoisonHeapReference(temp2);
3096         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
3097         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3098         // Re-poison the heap reference to make the compare instruction below
3099         // compare two poisoned references.
3100         __ PoisonHeapReference(temp1);
3101       }
3102 
3103       // Note: if heap poisoning is on, we are comparing two poisoned references here.
3104       __ cmpl(temp1, Address(src, class_offset));
3105 
3106       if (optimizations.GetDestinationIsTypedObjectArray()) {
3107         NearLabel do_copy;
3108         __ j(kEqual, &do_copy);
3109         __ MaybeUnpoisonHeapReference(temp1);
3110         // /* HeapReference<Class> */ temp1 = temp1->component_type_
3111         __ movl(temp1, Address(temp1, component_offset));
3112         __ MaybeUnpoisonHeapReference(temp1);
3113         __ cmpl(Address(temp1, super_offset), Immediate(0));
3114         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3115         __ Bind(&do_copy);
3116       } else {
3117         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3118       }
3119     }
3120   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3121     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3122     // Bail out if the source is not a non primitive array.
3123     if (gUseReadBarrier && kUseBakerReadBarrier) {
3124       // /* HeapReference<Class> */ temp1 = src->klass_
3125       codegen_->GenerateFieldLoadWithBakerReadBarrier(
3126           invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
3127       // /* HeapReference<Class> */ temp1 = temp1->component_type_
3128       codegen_->GenerateFieldLoadWithBakerReadBarrier(
3129           invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
3130       __ testl(temp1, temp1);
3131       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3132       // If heap poisoning is enabled, `temp1` has been unpoisoned
3133       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3134     } else {
3135       // /* HeapReference<Class> */ temp1 = src->klass_
3136       __ movl(temp1, Address(src, class_offset));
3137       __ MaybeUnpoisonHeapReference(temp1);
3138       // /* HeapReference<Class> */ temp1 = temp1->component_type_
3139       __ movl(temp1, Address(temp1, component_offset));
3140       __ testl(temp1, temp1);
3141       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3142       __ MaybeUnpoisonHeapReference(temp1);
3143     }
3144     __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
3145     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3146   }
3147 
3148   const DataType::Type type = DataType::Type::kReference;
3149   const int32_t element_size = DataType::Size(type);
3150 
3151   // Compute the base source address in `temp1`.
3152   GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
3153 
3154   if (gUseReadBarrier && kUseBakerReadBarrier) {
3155     // If it is needed (in the case of the fast-path loop), the base
3156     // destination address is computed later, as `temp2` is used for
3157     // intermediate computations.
3158 
3159     // Compute the end source address in `temp3`.
3160     if (length.IsStackSlot()) {
3161       // Location `length` is again pointing at a stack slot, as
3162       // register `temp3` (which was containing the length parameter
3163       // earlier) has been overwritten; restore it now
3164       DCHECK(length.Equals(length_arg));
3165       __ movl(temp3, Address(ESP, length.GetStackIndex()));
3166       length = Location::RegisterLocation(temp3);
3167     }
3168     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
3169 
3170     // SystemArrayCopy implementation for Baker read barriers (see
3171     // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
3172     //
3173     //   if (src_ptr != end_ptr) {
3174     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3175     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
3176     //     bool is_gray = (rb_state == ReadBarrier::GrayState());
3177     //     if (is_gray) {
3178     //       // Slow-path copy.
3179     //       for (size_t i = 0; i != length; ++i) {
3180     //         dest_array[dest_pos + i] =
3181     //             MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
3182     //       }
3183     //     } else {
3184     //       // Fast-path copy.
3185     //       do {
3186     //         *dest_ptr++ = *src_ptr++;
3187     //       } while (src_ptr != end_ptr)
3188     //     }
3189     //   }
3190 
3191     NearLabel loop, done;
3192 
3193     // Don't enter copy loop if `length == 0`.
3194     __ cmpl(temp1, temp3);
3195     __ j(kEqual, &done);
3196 
3197     // Given the numeric representation, it's enough to check the low bit of the rb_state.
3198     static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
3199     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3200     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
3201     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
3202     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
3203 
3204     // if (rb_state == ReadBarrier::GrayState())
3205     //   goto slow_path;
3206     // At this point, just do the "if" and make sure that flags are preserved until the branch.
3207     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
3208 
3209     // Load fence to prevent load-load reordering.
3210     // Note that this is a no-op, thanks to the x86 memory model.
3211     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3212 
3213     // Slow path used to copy array when `src` is gray.
3214     SlowPathCode* read_barrier_slow_path =
3215         new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
3216     codegen_->AddSlowPath(read_barrier_slow_path);
3217 
3218     // We have done the "if" of the gray bit check above, now branch based on the flags.
3219     __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
3220 
3221     // Fast-path copy.
3222     // Compute the base destination address in `temp2`.
3223     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
3224     // Iterate over the arrays and do a raw copy of the objects. We don't need to
3225     // poison/unpoison.
3226     __ Bind(&loop);
3227     __ pushl(Address(temp1, 0));
3228     __ cfi().AdjustCFAOffset(4);
3229     __ popl(Address(temp2, 0));
3230     __ cfi().AdjustCFAOffset(-4);
3231     __ addl(temp1, Immediate(element_size));
3232     __ addl(temp2, Immediate(element_size));
3233     __ cmpl(temp1, temp3);
3234     __ j(kNotEqual, &loop);
3235 
3236     __ Bind(read_barrier_slow_path->GetExitLabel());
3237     __ Bind(&done);
3238   } else {
3239     // Non read barrier code.
3240     // Compute the base destination address in `temp2`.
3241     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
3242     // Compute the end source address in `temp3`.
3243     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
3244     // Iterate over the arrays and do a raw copy of the objects. We don't need to
3245     // poison/unpoison.
3246     NearLabel loop, done;
3247     __ cmpl(temp1, temp3);
3248     __ j(kEqual, &done);
3249     __ Bind(&loop);
3250     __ pushl(Address(temp1, 0));
3251     __ cfi().AdjustCFAOffset(4);
3252     __ popl(Address(temp2, 0));
3253     __ cfi().AdjustCFAOffset(-4);
3254     __ addl(temp1, Immediate(element_size));
3255     __ addl(temp2, Immediate(element_size));
3256     __ cmpl(temp1, temp3);
3257     __ j(kNotEqual, &loop);
3258     __ Bind(&done);
3259   }
3260 
3261   // We only need one card marking on the destination array.
3262   codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* emit_null_check= */ false);
3263 
3264   __ Bind(intrinsic_slow_path->GetExitLabel());
3265 }
3266 
RequestBaseMethodAddressInRegister(HInvoke * invoke)3267 static void RequestBaseMethodAddressInRegister(HInvoke* invoke) {
3268   LocationSummary* locations = invoke->GetLocations();
3269   if (locations != nullptr) {
3270     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
3271     // Note: The base method address is not present yet when this is called from the
3272     // PCRelativeHandlerVisitor via IsCallFreeIntrinsic() to determine whether to insert it.
3273     if (invoke_static_or_direct->HasSpecialInput()) {
3274       DCHECK(invoke_static_or_direct->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
3275                  ->IsX86ComputeBaseMethodAddress());
3276       locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
3277                          Location::RequiresRegister());
3278     }
3279   }
3280 }
3281 
VisitIntegerValueOf(HInvoke * invoke)3282 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
3283   DCHECK(invoke->IsInvokeStaticOrDirect());
3284   InvokeRuntimeCallingConvention calling_convention;
3285   IntrinsicVisitor::ComputeIntegerValueOfLocations(
3286       invoke,
3287       codegen_,
3288       Location::RegisterLocation(EAX),
3289       Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3290   RequestBaseMethodAddressInRegister(invoke);
3291 }
3292 
VisitIntegerValueOf(HInvoke * invoke)3293 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
3294   DCHECK(invoke->IsInvokeStaticOrDirect());
3295   IntrinsicVisitor::IntegerValueOfInfo info =
3296       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
3297   LocationSummary* locations = invoke->GetLocations();
3298   X86Assembler* assembler = GetAssembler();
3299 
3300   Register out = locations->Out().AsRegister<Register>();
3301   auto allocate_instance = [&]() {
3302     DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0));
3303     codegen_->LoadIntrinsicDeclaringClass(out, invoke->AsInvokeStaticOrDirect());
3304     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3305     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3306   };
3307   if (invoke->InputAt(0)->IsConstant()) {
3308     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3309     if (static_cast<uint32_t>(value - info.low) < info.length) {
3310       // Just embed the j.l.Integer in the code.
3311       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
3312       codegen_->LoadBootImageAddress(
3313           out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
3314     } else {
3315       DCHECK(locations->CanCall());
3316       // Allocate and initialize a new j.l.Integer.
3317       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3318       // JIT object table.
3319       allocate_instance();
3320       __ movl(Address(out, info.value_offset), Immediate(value));
3321     }
3322   } else {
3323     DCHECK(locations->CanCall());
3324     Register in = locations->InAt(0).AsRegister<Register>();
3325     // Check bounds of our cache.
3326     __ leal(out, Address(in, -info.low));
3327     __ cmpl(out, Immediate(info.length));
3328     NearLabel allocate, done;
3329     __ j(kAboveEqual, &allocate);
3330     // If the value is within the bounds, load the j.l.Integer directly from the array.
3331     constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
3332     static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3333                   "Check heap reference size.");
3334     if (codegen_->GetCompilerOptions().IsBootImage()) {
3335       DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
3336       size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
3337       HX86ComputeBaseMethodAddress* method_address =
3338           invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
3339       DCHECK(method_address != nullptr);
3340       Register method_address_reg =
3341           invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
3342       __ movl(out,
3343               Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kPlaceholder32BitOffset));
3344       codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
3345     } else {
3346       // Note: We're about to clobber the index in `out`, so we need to use `in` and
3347       // adjust the offset accordingly.
3348       uint32_t mid_array_boot_image_offset =
3349               info.array_data_boot_image_reference - info.low * kElementSize;
3350       codegen_->LoadBootImageAddress(
3351           out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
3352       DCHECK_NE(out, in);
3353       __ movl(out, Address(out, in, TIMES_4, 0));
3354     }
3355     __ MaybeUnpoisonHeapReference(out);
3356     __ jmp(&done);
3357     __ Bind(&allocate);
3358     // Otherwise allocate and initialize a new j.l.Integer.
3359     allocate_instance();
3360     __ movl(Address(out, info.value_offset), in);
3361     __ Bind(&done);
3362   }
3363 }
3364 
VisitReferenceGetReferent(HInvoke * invoke)3365 void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
3366   IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3367   RequestBaseMethodAddressInRegister(invoke);
3368 }
3369 
VisitReferenceGetReferent(HInvoke * invoke)3370 void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
3371   X86Assembler* assembler = GetAssembler();
3372   LocationSummary* locations = invoke->GetLocations();
3373 
3374   Location obj = locations->InAt(0);
3375   Location out = locations->Out();
3376 
3377   SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
3378   codegen_->AddSlowPath(slow_path);
3379 
3380   if (gUseReadBarrier) {
3381     // Check self->GetWeakRefAccessEnabled().
3382     ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>();
3383     __ fs()->cmpl(Address::Absolute(offset),
3384                   Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3385     __ j(kNotEqual, slow_path->GetEntryLabel());
3386   }
3387 
3388   // Load the java.lang.ref.Reference class, use the output register as a temporary.
3389   codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<Register>(),
3390                                         invoke->AsInvokeStaticOrDirect());
3391 
3392   // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3393   MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3394   DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3395   DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3396             IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3397   __ cmpw(Address(out.AsRegister<Register>(), disable_intrinsic_offset.Uint32Value()),
3398           Immediate(0));
3399   __ j(kNotEqual, slow_path->GetEntryLabel());
3400 
3401   // Load the value from the field.
3402   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3403   if (gUseReadBarrier && kUseBakerReadBarrier) {
3404     codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3405                                                     out,
3406                                                     obj.AsRegister<Register>(),
3407                                                     referent_offset,
3408                                                     /*needs_null_check=*/ true);
3409     // Note that the fence is a no-op, thanks to the x86 memory model.
3410     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3411   } else {
3412     __ movl(out.AsRegister<Register>(), Address(obj.AsRegister<Register>(), referent_offset));
3413     codegen_->MaybeRecordImplicitNullCheck(invoke);
3414     // Note that the fence is a no-op, thanks to the x86 memory model.
3415     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3416     codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3417   }
3418   __ Bind(slow_path->GetExitLabel());
3419 }
3420 
VisitReferenceRefersTo(HInvoke * invoke)3421 void IntrinsicLocationsBuilderX86::VisitReferenceRefersTo(HInvoke* invoke) {
3422   IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
3423 }
3424 
VisitReferenceRefersTo(HInvoke * invoke)3425 void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) {
3426   X86Assembler* assembler = GetAssembler();
3427   LocationSummary* locations = invoke->GetLocations();
3428 
3429   Register obj = locations->InAt(0).AsRegister<Register>();
3430   Register other = locations->InAt(1).AsRegister<Register>();
3431   Register out = locations->Out().AsRegister<Register>();
3432 
3433   uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3434   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3435 
3436   __ movl(out, Address(obj, referent_offset));
3437   codegen_->MaybeRecordImplicitNullCheck(invoke);
3438   __ MaybeUnpoisonHeapReference(out);
3439   // Note that the fence is a no-op, thanks to the x86 memory model.
3440   codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);  // `referent` is volatile.
3441 
3442   NearLabel end, return_true, return_false;
3443   __ cmpl(out, other);
3444 
3445   if (gUseReadBarrier) {
3446     DCHECK(kUseBakerReadBarrier);
3447 
3448     __ j(kEqual, &return_true);
3449 
3450     // Check if the loaded reference is null.
3451     __ testl(out, out);
3452     __ j(kZero, &return_false);
3453 
3454     // For correct memory visibility, we need a barrier before loading the lock word
3455     // but we already have the barrier emitted for volatile load above which is sufficient.
3456 
3457     // Load the lockword and check if it is a forwarding address.
3458     static_assert(LockWord::kStateShift == 30u);
3459     static_assert(LockWord::kStateForwardingAddress == 3u);
3460     __ movl(out, Address(out, monitor_offset));
3461     __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3462     __ j(kBelow, &return_false);
3463 
3464     // Extract the forwarding address and compare with `other`.
3465     __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3466     __ cmpl(out, other);
3467   }
3468 
3469   __ j(kNotEqual, &return_false);
3470 
3471   // Return true and exit the function.
3472   __ Bind(&return_true);
3473   __ movl(out, Immediate(1));
3474   __ jmp(&end);
3475 
3476   // Return false and exit the function.
3477   __ Bind(&return_false);
3478   __ xorl(out, out);
3479   __ Bind(&end);
3480 }
3481 
VisitThreadInterrupted(HInvoke * invoke)3482 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
3483   LocationSummary* locations =
3484       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3485   locations->SetOut(Location::RequiresRegister());
3486 }
3487 
VisitThreadInterrupted(HInvoke * invoke)3488 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
3489   X86Assembler* assembler = GetAssembler();
3490   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
3491   Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
3492   NearLabel done;
3493   __ fs()->movl(out, address);
3494   __ testl(out, out);
3495   __ j(kEqual, &done);
3496   __ fs()->movl(address, Immediate(0));
3497   codegen_->MemoryFence();
3498   __ Bind(&done);
3499 }
3500 
VisitReachabilityFence(HInvoke * invoke)3501 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
3502   LocationSummary* locations =
3503       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3504   locations->SetInAt(0, Location::Any());
3505 }
3506 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3507 void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3508 
VisitIntegerDivideUnsigned(HInvoke * invoke)3509 void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3510   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3511                                                                 LocationSummary::kCallOnSlowPath,
3512                                                                 kIntrinsified);
3513   locations->SetInAt(0, Location::RegisterLocation(EAX));
3514   locations->SetInAt(1, Location::RequiresRegister());
3515   locations->SetOut(Location::SameAsFirstInput());
3516   // Intel uses edx:eax as the dividend.
3517   locations->AddTemp(Location::RegisterLocation(EDX));
3518 }
3519 
VisitIntegerDivideUnsigned(HInvoke * invoke)3520 void IntrinsicCodeGeneratorX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3521   X86Assembler* assembler = GetAssembler();
3522   LocationSummary* locations = invoke->GetLocations();
3523   Location out = locations->Out();
3524   Location first = locations->InAt(0);
3525   Location second = locations->InAt(1);
3526   Register edx = locations->GetTemp(0).AsRegister<Register>();
3527   Register second_reg = second.AsRegister<Register>();
3528 
3529   DCHECK_EQ(EAX, first.AsRegister<Register>());
3530   DCHECK_EQ(EAX, out.AsRegister<Register>());
3531   DCHECK_EQ(EDX, edx);
3532 
3533   // Check if divisor is zero, bail to managed implementation to handle.
3534   __ testl(second_reg, second_reg);
3535   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3536   codegen_->AddSlowPath(slow_path);
3537   __ j(kEqual, slow_path->GetEntryLabel());
3538 
3539   __ xorl(edx, edx);
3540   __ divl(second_reg);
3541 
3542   __ Bind(slow_path->GetExitLabel());
3543 }
3544 
HasVarHandleIntrinsicImplementation(HInvoke * invoke)3545 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
3546   VarHandleOptimizations optimizations(invoke);
3547   if (optimizations.GetDoNotIntrinsify()) {
3548     return false;
3549   }
3550 
3551   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3552   DCHECK_LE(expected_coordinates_count, 2u);  // Filtered by the `DoNotIntrinsify` flag above.
3553   if (expected_coordinates_count > 1u) {
3554     // Only static and instance fields VarHandle are supported now.
3555     // TODO: add support for arrays and views.
3556     return false;
3557   }
3558 
3559   return true;
3560 }
3561 
GenerateVarHandleAccessModeCheck(Register varhandle_object,mirror::VarHandle::AccessMode access_mode,SlowPathCode * slow_path,X86Assembler * assembler)3562 static void GenerateVarHandleAccessModeCheck(Register varhandle_object,
3563                                              mirror::VarHandle::AccessMode access_mode,
3564                                              SlowPathCode* slow_path,
3565                                              X86Assembler* assembler) {
3566   const uint32_t access_modes_bitmask_offset =
3567       mirror::VarHandle::AccessModesBitMaskOffset().Uint32Value();
3568   const uint32_t access_mode_bit = 1u << static_cast<uint32_t>(access_mode);
3569 
3570   // If the access mode is not supported, bail to runtime implementation to handle
3571   __ testl(Address(varhandle_object, access_modes_bitmask_offset), Immediate(access_mode_bit));
3572   __ j(kZero, slow_path->GetEntryLabel());
3573 }
3574 
GenerateVarHandleStaticFieldCheck(Register varhandle_object,SlowPathCode * slow_path,X86Assembler * assembler)3575 static void GenerateVarHandleStaticFieldCheck(Register varhandle_object,
3576                                               SlowPathCode* slow_path,
3577                                               X86Assembler* assembler) {
3578   const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3579 
3580   // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3581   // Do not emit read barrier (or unpoison the reference) for comparing to null.
3582   __ cmpl(Address(varhandle_object, coordtype0_offset), Immediate(0));
3583   __ j(kNotEqual, slow_path->GetEntryLabel());
3584 }
3585 
GenerateSubTypeObjectCheck(Register object,Register temp,Address type_address,SlowPathCode * slow_path,X86Assembler * assembler,bool object_can_be_null=true)3586 static void GenerateSubTypeObjectCheck(Register object,
3587                                        Register temp,
3588                                        Address type_address,
3589                                        SlowPathCode* slow_path,
3590                                        X86Assembler* assembler,
3591                                        bool object_can_be_null = true) {
3592   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
3593   const uint32_t super_class_offset = mirror::Class::SuperClassOffset().Uint32Value();
3594   NearLabel check_type_compatibility, type_matched;
3595 
3596   // If the object is null, there is no need to check the type
3597   if (object_can_be_null) {
3598     __ testl(object, object);
3599     __ j(kZero, &type_matched);
3600   }
3601 
3602   // Do not unpoison for in-memory comparison.
3603   // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3604   __ movl(temp, Address(object, class_offset));
3605   __ Bind(&check_type_compatibility);
3606   __ cmpl(temp, type_address);
3607   __ j(kEqual, &type_matched);
3608   // Load the super class.
3609   __ MaybeUnpoisonHeapReference(temp);
3610   __ movl(temp, Address(temp, super_class_offset));
3611   // If the super class is null, we reached the root of the hierarchy without a match.
3612   // We let the slow path handle uncovered cases (e.g. interfaces).
3613   __ testl(temp, temp);
3614   __ j(kEqual, slow_path->GetEntryLabel());
3615   __ jmp(&check_type_compatibility);
3616   __ Bind(&type_matched);
3617 }
3618 
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3619 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3620                                                  Register temp,
3621                                                  SlowPathCode* slow_path,
3622                                                  X86Assembler* assembler) {
3623   VarHandleOptimizations optimizations(invoke);
3624   LocationSummary* locations = invoke->GetLocations();
3625   Register varhandle_object = locations->InAt(0).AsRegister<Register>();
3626   Register object = locations->InAt(1).AsRegister<Register>();
3627 
3628   const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3629   const uint32_t coordtype1_offset = mirror::VarHandle::CoordinateType1Offset().Uint32Value();
3630 
3631   // Check that the VarHandle references an instance field by checking that
3632   // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3633   // type compatibility check with the source object's type, which will fail for null.
3634   __ cmpl(Address(varhandle_object, coordtype1_offset), Immediate(0));
3635   __ j(kNotEqual, slow_path->GetEntryLabel());
3636 
3637   // Check if the object is null
3638   if (!optimizations.GetSkipObjectNullCheck()) {
3639     __ testl(object, object);
3640     __ j(kZero, slow_path->GetEntryLabel());
3641   }
3642 
3643   // Check the object's class against coordinateType0.
3644   GenerateSubTypeObjectCheck(object,
3645                              temp,
3646                              Address(varhandle_object, coordtype0_offset),
3647                              slow_path,
3648                              assembler,
3649                              /* object_can_be_null= */ false);
3650 }
3651 
GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,Register temp,DataType::Type type,SlowPathCode * slow_path,X86Assembler * assembler)3652 static void GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,
3653                                               Register temp,
3654                                               DataType::Type type,
3655                                               SlowPathCode* slow_path,
3656                                               X86Assembler* assembler) {
3657   const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3658   const uint32_t primitive_type_offset = mirror::Class::PrimitiveTypeOffset().Uint32Value();
3659   const uint32_t primitive_type = static_cast<uint32_t>(DataTypeToPrimitive(type));
3660 
3661   // We do not need a read barrier when loading a reference only for loading a constant field
3662   // through the reference.
3663   __ movl(temp, Address(varhandle_object, var_type_offset));
3664   __ MaybeUnpoisonHeapReference(temp);
3665   __ cmpw(Address(temp, primitive_type_offset), Immediate(primitive_type));
3666   __ j(kNotEqual, slow_path->GetEntryLabel());
3667 }
3668 
GenerateVarHandleCommonChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3669 static void GenerateVarHandleCommonChecks(HInvoke *invoke,
3670                                           Register temp,
3671                                           SlowPathCode* slow_path,
3672                                           X86Assembler* assembler) {
3673   LocationSummary* locations = invoke->GetLocations();
3674   Register vh_object = locations->InAt(0).AsRegister<Register>();
3675   mirror::VarHandle::AccessMode access_mode =
3676       mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3677 
3678   GenerateVarHandleAccessModeCheck(vh_object,
3679                                    access_mode,
3680                                    slow_path,
3681                                    assembler);
3682 
3683   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3684   switch (expected_coordinates_count) {
3685     case 0u:
3686       GenerateVarHandleStaticFieldCheck(vh_object, slow_path, assembler);
3687       break;
3688     case 1u: {
3689       GenerateVarHandleInstanceFieldChecks(invoke, temp, slow_path, assembler);
3690       break;
3691     }
3692     default:
3693       // Unimplemented
3694       UNREACHABLE();
3695   }
3696 
3697   // Check the return type and varType parameters.
3698   mirror::VarHandle::AccessModeTemplate access_mode_template =
3699       mirror::VarHandle::GetAccessModeTemplate(access_mode);
3700   DataType::Type type = invoke->GetType();
3701 
3702   switch (access_mode_template) {
3703     case mirror::VarHandle::AccessModeTemplate::kGet:
3704       // Check the varType.primitiveType against the type we're trying to retrieve. Reference types
3705       // are also checked later by a HCheckCast node as an additional check.
3706       GenerateVarTypePrimitiveTypeCheck(vh_object, temp, type, slow_path, assembler);
3707       break;
3708     case mirror::VarHandle::AccessModeTemplate::kSet:
3709     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
3710       uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3711       DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3712 
3713       // Check the varType.primitiveType against the type of the value we're trying to set.
3714       GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
3715       if (value_type == DataType::Type::kReference) {
3716         const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3717 
3718         // If the value type is a reference, check it against the varType.
3719         GenerateSubTypeObjectCheck(locations->InAt(value_index).AsRegister<Register>(),
3720                                    temp,
3721                                    Address(vh_object, var_type_offset),
3722                                    slow_path,
3723                                    assembler);
3724       }
3725       break;
3726     }
3727     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
3728     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
3729       uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
3730       uint32_t expected_value_index = invoke->GetNumberOfArguments() - 2;
3731       DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
3732       DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_value_index));
3733 
3734       // Check the varType.primitiveType against the type of the expected value.
3735       GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
3736       if (value_type == DataType::Type::kReference) {
3737         const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3738 
3739         // If the value type is a reference, check both the expected and the new value against
3740         // the varType.
3741         GenerateSubTypeObjectCheck(locations->InAt(new_value_index).AsRegister<Register>(),
3742                                    temp,
3743                                    Address(vh_object, var_type_offset),
3744                                    slow_path,
3745                                    assembler);
3746         GenerateSubTypeObjectCheck(locations->InAt(expected_value_index).AsRegister<Register>(),
3747                                    temp,
3748                                    Address(vh_object, var_type_offset),
3749                                    slow_path,
3750                                    assembler);
3751       }
3752       break;
3753     }
3754   }
3755 }
3756 
3757 // This method loads the field's address referred by a field VarHandle (base + offset).
3758 // The return value is the register containing object's reference (in case of an instance field)
3759 // or the declaring class (in case of a static field). The declaring class is stored in temp
3760 // register. Field's offset is loaded to the `offset` register.
GenerateVarHandleFieldReference(HInvoke * invoke,CodeGeneratorX86 * codegen,Register temp,Register offset)3761 static Register GenerateVarHandleFieldReference(HInvoke* invoke,
3762                                                 CodeGeneratorX86* codegen,
3763                                                 Register temp,
3764                                                 /*out*/ Register offset) {
3765   X86Assembler* assembler = codegen->GetAssembler();
3766   LocationSummary* locations = invoke->GetLocations();
3767   const uint32_t artfield_offset = mirror::FieldVarHandle::ArtFieldOffset().Uint32Value();
3768   const uint32_t offset_offset = ArtField::OffsetOffset().Uint32Value();
3769   const uint32_t declaring_class_offset = ArtField::DeclaringClassOffset().Uint32Value();
3770   Register varhandle_object = locations->InAt(0).AsRegister<Register>();
3771 
3772   // Load the ArtField and the offset
3773   __ movl(temp, Address(varhandle_object, artfield_offset));
3774   __ movl(offset, Address(temp, offset_offset));
3775   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3776   if (expected_coordinates_count == 0) {
3777     // For static fields, load the declaring class
3778     InstructionCodeGeneratorX86* instr_codegen =
3779         down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
3780     instr_codegen->GenerateGcRootFieldLoad(invoke,
3781                                            Location::RegisterLocation(temp),
3782                                            Address(temp, declaring_class_offset),
3783                                            /* fixup_label= */ nullptr,
3784                                            gCompilerReadBarrierOption);
3785     return temp;
3786   }
3787 
3788   // For instance fields, return the register containing the object.
3789   DCHECK_EQ(expected_coordinates_count, 1u);
3790 
3791   return locations->InAt(1).AsRegister<Register>();
3792 }
3793 
CreateVarHandleGetLocations(HInvoke * invoke)3794 static void CreateVarHandleGetLocations(HInvoke* invoke) {
3795   // The only read barrier implementation supporting the
3796   // VarHandleGet intrinsic is the Baker-style read barriers.
3797   if (gUseReadBarrier && !kUseBakerReadBarrier) {
3798     return;
3799   }
3800 
3801   if (!HasVarHandleIntrinsicImplementation(invoke)) {
3802     return;
3803   }
3804 
3805   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3806   LocationSummary* locations = new (allocator) LocationSummary(
3807       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3808   locations->SetInAt(0, Location::RequiresRegister());
3809   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3810   if (expected_coordinates_count == 1u) {
3811     // For instance fields, this is the source object.
3812     locations->SetInAt(1, Location::RequiresRegister());
3813   }
3814   locations->AddTemp(Location::RequiresRegister());
3815 
3816   DataType::Type type = invoke->GetType();
3817   switch (DataType::Kind(type)) {
3818     case DataType::Type::kInt64:
3819       locations->AddTemp(Location::RequiresRegister());
3820       if (invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
3821         // We need an XmmRegister for Int64 to ensure an atomic load
3822         locations->AddTemp(Location::RequiresFpuRegister());
3823       }
3824       FALLTHROUGH_INTENDED;
3825     case DataType::Type::kInt32:
3826     case DataType::Type::kReference:
3827       locations->SetOut(Location::RequiresRegister());
3828       break;
3829     default:
3830       DCHECK(DataType::IsFloatingPointType(type));
3831       locations->AddTemp(Location::RequiresRegister());
3832       locations->SetOut(Location::RequiresFpuRegister());
3833   }
3834 }
3835 
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86 * codegen)3836 static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
3837   // The only read barrier implementation supporting the
3838   // VarHandleGet intrinsic is the Baker-style read barriers.
3839   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
3840 
3841   X86Assembler* assembler = codegen->GetAssembler();
3842   LocationSummary* locations = invoke->GetLocations();
3843   DataType::Type type = invoke->GetType();
3844   DCHECK_NE(type, DataType::Type::kVoid);
3845   Register temp = locations->GetTemp(0).AsRegister<Register>();
3846   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3847   codegen->AddSlowPath(slow_path);
3848 
3849   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
3850 
3851   Location out = locations->Out();
3852   // Use 'out' as a temporary register if it's a core register
3853   Register offset =
3854       out.IsRegister() ? out.AsRegister<Register>() : locations->GetTemp(1).AsRegister<Register>();
3855 
3856   // Get the field referred by the VarHandle. The returned register contains the object reference
3857   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
3858   // declaring class will be placed in 'temp' register.
3859   Register ref = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
3860   Address field_addr(ref, offset, TIMES_1, 0);
3861 
3862   // Load the value from the field
3863   if (type == DataType::Type::kReference && gCompilerReadBarrierOption == kWithReadBarrier) {
3864     codegen->GenerateReferenceLoadWithBakerReadBarrier(
3865         invoke, out, ref, field_addr, /* needs_null_check= */ false);
3866   } else if (type == DataType::Type::kInt64 &&
3867              invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
3868     XmmRegister xmm_temp = locations->GetTemp(2).AsFpuRegister<XmmRegister>();
3869     codegen->LoadFromMemoryNoBarrier(
3870         type, out, field_addr, /* instr= */ nullptr, xmm_temp, /* is_atomic_load= */ true);
3871   } else {
3872     codegen->LoadFromMemoryNoBarrier(type, out, field_addr);
3873   }
3874 
3875   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetVolatile ||
3876       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAcquire) {
3877     // Load fence to prevent load-load reordering.
3878     // Note that this is a no-op, thanks to the x86 memory model.
3879     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3880   }
3881 
3882   __ Bind(slow_path->GetExitLabel());
3883 }
3884 
VisitVarHandleGet(HInvoke * invoke)3885 void IntrinsicLocationsBuilderX86::VisitVarHandleGet(HInvoke* invoke) {
3886   CreateVarHandleGetLocations(invoke);
3887 }
3888 
VisitVarHandleGet(HInvoke * invoke)3889 void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) {
3890   GenerateVarHandleGet(invoke, codegen_);
3891 }
3892 
VisitVarHandleGetVolatile(HInvoke * invoke)3893 void IntrinsicLocationsBuilderX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
3894   CreateVarHandleGetLocations(invoke);
3895 }
3896 
VisitVarHandleGetVolatile(HInvoke * invoke)3897 void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
3898   GenerateVarHandleGet(invoke, codegen_);
3899 }
3900 
VisitVarHandleGetAcquire(HInvoke * invoke)3901 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
3902   CreateVarHandleGetLocations(invoke);
3903 }
3904 
VisitVarHandleGetAcquire(HInvoke * invoke)3905 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
3906   GenerateVarHandleGet(invoke, codegen_);
3907 }
3908 
VisitVarHandleGetOpaque(HInvoke * invoke)3909 void IntrinsicLocationsBuilderX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
3910   CreateVarHandleGetLocations(invoke);
3911 }
3912 
VisitVarHandleGetOpaque(HInvoke * invoke)3913 void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
3914   GenerateVarHandleGet(invoke, codegen_);
3915 }
3916 
CreateVarHandleSetLocations(HInvoke * invoke)3917 static void CreateVarHandleSetLocations(HInvoke* invoke) {
3918   // The only read barrier implementation supporting the
3919   // VarHandleGet intrinsic is the Baker-style read barriers.
3920   if (gUseReadBarrier && !kUseBakerReadBarrier) {
3921     return;
3922   }
3923 
3924   if (!HasVarHandleIntrinsicImplementation(invoke)) {
3925     return;
3926   }
3927 
3928   // The last argument should be the value we intend to set.
3929   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3930   HInstruction* value = invoke->InputAt(value_index);
3931   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3932   bool needs_atomicity = invoke->GetIntrinsic() != Intrinsics::kVarHandleSet;
3933   if (value_type == DataType::Type::kInt64 && (!value->IsConstant() || needs_atomicity)) {
3934     // We avoid the case of a non-constant (or volatile) Int64 value because we would need to
3935     // place it in a register pair. If the slow path is taken, the ParallelMove might fail to move
3936     // the pair according to the X86DexCallingConvention in case of an overlap (e.g., move the
3937     // int64 value from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
3938     return;
3939   }
3940 
3941   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3942   LocationSummary* locations = new (allocator) LocationSummary(
3943       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3944   locations->SetInAt(0, Location::RequiresRegister());
3945   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3946   if (expected_coordinates_count == 1u) {
3947     // For instance fields, this is the source object
3948     locations->SetInAt(1, Location::RequiresRegister());
3949   }
3950 
3951   switch (value_type) {
3952     case DataType::Type::kBool:
3953     case DataType::Type::kInt8:
3954     case DataType::Type::kUint8:
3955       // Ensure the value is in a byte register
3956       locations->SetInAt(value_index, Location::ByteRegisterOrConstant(EBX, value));
3957       break;
3958     case DataType::Type::kInt16:
3959     case DataType::Type::kUint16:
3960     case DataType::Type::kInt32:
3961       locations->SetInAt(value_index, Location::RegisterOrConstant(value));
3962       break;
3963     case DataType::Type::kInt64:
3964       // We only handle constant non-atomic int64 values.
3965       DCHECK(value->IsConstant());
3966       locations->SetInAt(value_index, Location::ConstantLocation(value));
3967       break;
3968     case DataType::Type::kReference:
3969       locations->SetInAt(value_index, Location::RequiresRegister());
3970       break;
3971     default:
3972       DCHECK(DataType::IsFloatingPointType(value_type));
3973       if (needs_atomicity && value_type == DataType::Type::kFloat64) {
3974         locations->SetInAt(value_index, Location::RequiresFpuRegister());
3975       } else {
3976         locations->SetInAt(value_index, Location::FpuRegisterOrConstant(value));
3977       }
3978   }
3979 
3980   locations->AddTemp(Location::RequiresRegister());
3981   // This temporary register is also used for card for MarkGCCard. Make sure it's a byte register
3982   locations->AddTemp(Location::RegisterLocation(EAX));
3983   if (expected_coordinates_count == 0 && value_type == DataType::Type::kReference) {
3984     // For static reference fields, we need another temporary for the declaring class. We set it
3985     // last because we want to make sure that the first 2 temps are reserved for HandleFieldSet.
3986     locations->AddTemp(Location::RequiresRegister());
3987   }
3988 }
3989 
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86 * codegen)3990 static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
3991   // The only read barrier implementation supporting the
3992   // VarHandleGet intrinsic is the Baker-style read barriers.
3993   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
3994 
3995   X86Assembler* assembler = codegen->GetAssembler();
3996   LocationSummary* locations = invoke->GetLocations();
3997   // The value we want to set is the last argument
3998   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3999   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4000   Register temp = locations->GetTemp(0).AsRegister<Register>();
4001   Register temp2 = locations->GetTemp(1).AsRegister<Register>();
4002   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4003   codegen->AddSlowPath(slow_path);
4004 
4005   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4006 
4007   // For static reference fields, we need another temporary for the declaring class. But since
4008   // for instance fields the object is in a separate register, it is safe to use the first
4009   // temporary register for GenerateVarHandleFieldReference.
4010   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4011   if (value_type == DataType::Type::kReference && expected_coordinates_count == 0) {
4012     temp = locations->GetTemp(2).AsRegister<Register>();
4013   }
4014 
4015   Register offset = temp2;
4016   // Get the field referred by the VarHandle. The returned register contains the object reference
4017   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4018   // declaring class will be placed in 'temp' register.
4019   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4020 
4021   bool is_volatile = false;
4022   switch (invoke->GetIntrinsic()) {
4023     case Intrinsics::kVarHandleSet:
4024     case Intrinsics::kVarHandleSetOpaque:
4025       // The only constraint for setOpaque is to ensure bitwise atomicity (atomically set 64 bit
4026       // values), but we don't treat Int64 values because we would need to place it in a register
4027       // pair. If the slow path is taken, the Parallel move might fail to move the register pair
4028       // in case of an overlap (e.g., move from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4029       break;
4030     case Intrinsics::kVarHandleSetRelease:
4031       // setRelease needs to ensure atomicity too. See the above comment.
4032       codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4033       break;
4034     case Intrinsics::kVarHandleSetVolatile:
4035       is_volatile = true;
4036       break;
4037     default:
4038       LOG(FATAL) << "GenerateVarHandleSet received non-set intrinsic " << invoke->GetIntrinsic();
4039   }
4040 
4041   InstructionCodeGeneratorX86* instr_codegen =
4042         down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
4043   // Store the value to the field
4044   instr_codegen->HandleFieldSet(
4045       invoke,
4046       value_index,
4047       value_type,
4048       Address(reference, offset, TIMES_1, 0),
4049       reference,
4050       is_volatile,
4051       /* value_can_be_null */ true,
4052       // Value can be null, and this write barrier is not being relied on for other sets.
4053       WriteBarrierKind::kEmitWithNullCheck);
4054 
4055   __ Bind(slow_path->GetExitLabel());
4056 }
4057 
VisitVarHandleSet(HInvoke * invoke)4058 void IntrinsicLocationsBuilderX86::VisitVarHandleSet(HInvoke* invoke) {
4059   CreateVarHandleSetLocations(invoke);
4060 }
4061 
VisitVarHandleSet(HInvoke * invoke)4062 void IntrinsicCodeGeneratorX86::VisitVarHandleSet(HInvoke* invoke) {
4063   GenerateVarHandleSet(invoke, codegen_);
4064 }
4065 
VisitVarHandleSetVolatile(HInvoke * invoke)4066 void IntrinsicLocationsBuilderX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
4067   CreateVarHandleSetLocations(invoke);
4068 }
4069 
VisitVarHandleSetVolatile(HInvoke * invoke)4070 void IntrinsicCodeGeneratorX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
4071   GenerateVarHandleSet(invoke, codegen_);
4072 }
4073 
VisitVarHandleSetRelease(HInvoke * invoke)4074 void IntrinsicLocationsBuilderX86::VisitVarHandleSetRelease(HInvoke* invoke) {
4075   CreateVarHandleSetLocations(invoke);
4076 }
4077 
VisitVarHandleSetRelease(HInvoke * invoke)4078 void IntrinsicCodeGeneratorX86::VisitVarHandleSetRelease(HInvoke* invoke) {
4079   GenerateVarHandleSet(invoke, codegen_);
4080 }
4081 
VisitVarHandleSetOpaque(HInvoke * invoke)4082 void IntrinsicLocationsBuilderX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
4083   CreateVarHandleSetLocations(invoke);
4084 }
4085 
VisitVarHandleSetOpaque(HInvoke * invoke)4086 void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
4087   GenerateVarHandleSet(invoke, codegen_);
4088 }
4089 
CreateVarHandleGetAndSetLocations(HInvoke * invoke)4090 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
4091   // The only read barrier implementation supporting the
4092   // VarHandleGet intrinsic is the Baker-style read barriers.
4093   if (gUseReadBarrier && !kUseBakerReadBarrier) {
4094     return;
4095   }
4096 
4097   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4098     return;
4099   }
4100 
4101   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4102   uint32_t value_index = number_of_arguments - 1;
4103   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4104 
4105   if (DataType::Is64BitType(value_type)) {
4106     // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4107     // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4108     // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4109     // <EAX, EBX> to <EBX, ECX>).
4110     return;
4111   }
4112 
4113   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4114   LocationSummary* locations = new (allocator) LocationSummary(
4115       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4116   locations->AddTemp(Location::RequiresRegister());
4117   locations->AddTemp(Location::RequiresRegister());
4118   // We use this temporary for the card, so we need a byte register
4119   locations->AddTemp(Location::RegisterLocation(EBX));
4120   locations->SetInAt(0, Location::RequiresRegister());
4121   if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4122     // For instance fields, this is the source object
4123     locations->SetInAt(1, Location::RequiresRegister());
4124   } else {
4125     // For static fields, we need another temp because one will be busy with the declaring class.
4126     locations->AddTemp(Location::RequiresRegister());
4127   }
4128   if (value_type == DataType::Type::kFloat32) {
4129     locations->AddTemp(Location::RegisterLocation(EAX));
4130     locations->SetInAt(value_index, Location::FpuRegisterOrConstant(invoke->InputAt(value_index)));
4131     locations->SetOut(Location::RequiresFpuRegister());
4132   } else {
4133     locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4134     locations->SetOut(Location::RegisterLocation(EAX));
4135   }
4136 }
4137 
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86 * codegen)4138 static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
4139   // The only read barrier implementation supporting the
4140   // VarHandleGet intrinsic is the Baker-style read barriers.
4141   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
4142 
4143   X86Assembler* assembler = codegen->GetAssembler();
4144   LocationSummary* locations = invoke->GetLocations();
4145   // The value we want to set is the last argument
4146   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4147   Location value = locations->InAt(value_index);
4148   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4149   Register temp = locations->GetTemp(1).AsRegister<Register>();
4150   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4151   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4152   codegen->AddSlowPath(slow_path);
4153 
4154   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4155 
4156   Register offset = locations->GetTemp(0).AsRegister<Register>();
4157   // Get the field referred by the VarHandle. The returned register contains the object reference
4158   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4159   // declaring class will be placed in 'temp' register.
4160   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4161   Address field_addr(reference, offset, TIMES_1, 0);
4162 
4163   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetRelease) {
4164     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4165   }
4166 
4167   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4168   // For static fields, we need another temporary for the declaring class. But since for instance
4169   // fields the object is in a separate register, it is safe to use the first temporary register.
4170   temp = expected_coordinates_count == 1u ? temp : locations->GetTemp(3).AsRegister<Register>();
4171   // No need for a lock prefix. `xchg` has an implicit lock when it is used with an address.
4172   switch (value_type) {
4173     case DataType::Type::kBool:
4174       __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
4175       __ movzxb(locations->Out().AsRegister<Register>(),
4176                 locations->Out().AsRegister<ByteRegister>());
4177       break;
4178     case DataType::Type::kInt8:
4179       __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
4180       __ movsxb(locations->Out().AsRegister<Register>(),
4181                 locations->Out().AsRegister<ByteRegister>());
4182       break;
4183     case DataType::Type::kUint16:
4184       __ xchgw(value.AsRegister<Register>(), field_addr);
4185       __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4186       break;
4187     case DataType::Type::kInt16:
4188       __ xchgw(value.AsRegister<Register>(), field_addr);
4189       __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4190       break;
4191     case DataType::Type::kInt32:
4192       __ xchgl(value.AsRegister<Register>(), field_addr);
4193       break;
4194     case DataType::Type::kFloat32:
4195       codegen->Move32(Location::RegisterLocation(EAX), value);
4196       __ xchgl(EAX, field_addr);
4197       __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
4198       break;
4199     case DataType::Type::kReference: {
4200       if (gUseReadBarrier && kUseBakerReadBarrier) {
4201         // Need to make sure the reference stored in the field is a to-space
4202         // one before attempting the CAS or the CAS could fail incorrectly.
4203         codegen->GenerateReferenceLoadWithBakerReadBarrier(
4204             invoke,
4205             // Unused, used only as a "temporary" within the read barrier.
4206             Location::RegisterLocation(temp),
4207             reference,
4208             field_addr,
4209             /* needs_null_check= */ false,
4210             /* always_update_field= */ true,
4211             &temp2);
4212       }
4213       codegen->MarkGCCard(
4214           temp, temp2, reference, value.AsRegister<Register>(), /* emit_null_check= */ false);
4215       if (kPoisonHeapReferences) {
4216         __ movl(temp, value.AsRegister<Register>());
4217         __ PoisonHeapReference(temp);
4218         __ xchgl(temp, field_addr);
4219         __ UnpoisonHeapReference(temp);
4220         __ movl(locations->Out().AsRegister<Register>(), temp);
4221       } else {
4222         __ xchgl(locations->Out().AsRegister<Register>(), field_addr);
4223       }
4224       break;
4225     }
4226     default:
4227       UNREACHABLE();
4228   }
4229 
4230   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetAcquire) {
4231     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4232   }
4233 
4234   __ Bind(slow_path->GetExitLabel());
4235 }
4236 
VisitVarHandleGetAndSet(HInvoke * invoke)4237 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4238   CreateVarHandleGetAndSetLocations(invoke);
4239 }
4240 
VisitVarHandleGetAndSet(HInvoke * invoke)4241 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4242   GenerateVarHandleGetAndSet(invoke, codegen_);
4243 }
4244 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4245 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4246   CreateVarHandleGetAndSetLocations(invoke);
4247 }
4248 
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4249 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4250   GenerateVarHandleGetAndSet(invoke, codegen_);
4251 }
4252 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4253 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4254   CreateVarHandleGetAndSetLocations(invoke);
4255 }
4256 
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4257 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4258   GenerateVarHandleGetAndSet(invoke, codegen_);
4259 }
4260 
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke)4261 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
4262   // The only read barrier implementation supporting the
4263   // VarHandleGet intrinsic is the Baker-style read barriers.
4264   if (gUseReadBarrier && !kUseBakerReadBarrier) {
4265     return;
4266   }
4267 
4268   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4269     return;
4270   }
4271 
4272   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4273   uint32_t expected_value_index = number_of_arguments - 2;
4274   uint32_t new_value_index = number_of_arguments - 1;
4275   DataType::Type value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4276   DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, new_value_index));
4277 
4278   if (DataType::Is64BitType(value_type)) {
4279     // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4280     // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4281     // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4282     // <EAX, EBX> to <EBX, ECX>).
4283     return;
4284   }
4285 
4286   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4287   LocationSummary* locations = new (allocator) LocationSummary(
4288       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4289   locations->AddTemp(Location::RequiresRegister());
4290   locations->AddTemp(Location::RequiresRegister());
4291   // We use this temporary for the card, so we need a byte register
4292   locations->AddTemp(Location::RegisterLocation(EBX));
4293   locations->SetInAt(0, Location::RequiresRegister());
4294   if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4295     // For instance fields, this is the source object
4296     locations->SetInAt(1, Location::RequiresRegister());
4297   } else {
4298     // For static fields, we need another temp because one will be busy with the declaring class.
4299     locations->AddTemp(Location::RequiresRegister());
4300   }
4301   if (DataType::IsFloatingPointType(value_type)) {
4302     // We need EAX for placing the expected value
4303     locations->AddTemp(Location::RegisterLocation(EAX));
4304     locations->SetInAt(new_value_index,
4305                        Location::FpuRegisterOrConstant(invoke->InputAt(new_value_index)));
4306     locations->SetInAt(expected_value_index,
4307                        Location::FpuRegisterOrConstant(invoke->InputAt(expected_value_index)));
4308   } else {
4309     // Ensure it's in a byte register
4310     locations->SetInAt(new_value_index, Location::RegisterLocation(ECX));
4311     locations->SetInAt(expected_value_index, Location::RegisterLocation(EAX));
4312   }
4313 
4314   mirror::VarHandle::AccessModeTemplate access_mode_template =
4315       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4316 
4317   if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange &&
4318       value_type == DataType::Type::kFloat32) {
4319     locations->SetOut(Location::RequiresFpuRegister());
4320   } else {
4321     locations->SetOut(Location::RegisterLocation(EAX));
4322   }
4323 }
4324 
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86 * codegen)4325 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) {
4326   // The only read barrier implementation supporting the
4327   // VarHandleGet intrinsic is the Baker-style read barriers.
4328   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
4329 
4330   X86Assembler* assembler = codegen->GetAssembler();
4331   LocationSummary* locations = invoke->GetLocations();
4332   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4333   uint32_t expected_value_index = number_of_arguments - 2;
4334   uint32_t new_value_index = number_of_arguments - 1;
4335   DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4336   DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4337   Location expected_value = locations->InAt(expected_value_index);
4338   Location new_value = locations->InAt(new_value_index);
4339   Register offset = locations->GetTemp(0).AsRegister<Register>();
4340   Register temp = locations->GetTemp(1).AsRegister<Register>();
4341   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4342   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4343   codegen->AddSlowPath(slow_path);
4344 
4345   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4346 
4347   // Get the field referred by the VarHandle. The returned register contains the object reference
4348   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4349   // declaring class will be placed in 'temp' register.
4350   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4351 
4352   uint32_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4353   // For generating the compare and exchange, we need 2 temporaries. In case of a static field, the
4354   // first temporary contains the declaring class so we need another temporary. In case of an
4355   // instance field, the object comes in a separate register so it's safe to use the first temp.
4356   temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(3).AsRegister<Register>();
4357   DCHECK_NE(temp, reference);
4358 
4359   // We are using `lock cmpxchg` in all cases because there is no CAS equivalent that has weak
4360   // failure semantics. `lock cmpxchg` has full barrier semantics, and we don't need scheduling
4361   // barriers at this time.
4362 
4363   mirror::VarHandle::AccessModeTemplate access_mode_template =
4364       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4365   bool is_cmpxchg =
4366       access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange;
4367 
4368   if (type == DataType::Type::kReference) {
4369     GenReferenceCAS(
4370         invoke, codegen, expected_value, new_value, reference, offset, temp, temp2, is_cmpxchg);
4371   } else {
4372     Location out = locations->Out();
4373     GenPrimitiveCAS(
4374         type, codegen, expected_value, new_value, reference, offset, out, temp, is_cmpxchg);
4375   }
4376 
4377   __ Bind(slow_path->GetExitLabel());
4378 }
4379 
VisitVarHandleCompareAndSet(HInvoke * invoke)4380 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4381   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4382 }
4383 
VisitVarHandleCompareAndSet(HInvoke * invoke)4384 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4385   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4386 }
4387 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4388 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4389   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4390 }
4391 
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4392 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4393   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4394 }
4395 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4396 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4397   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4398 }
4399 
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4400 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4401   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4402 }
4403 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4404 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4405   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4406 }
4407 
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4408 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4409   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4410 }
4411 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4412 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4413   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4414 }
4415 
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4416 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4417   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4418 }
4419 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4420 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4421   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4422 }
4423 
VisitVarHandleCompareAndExchange(HInvoke * invoke)4424 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4425   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4426 }
4427 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4428 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4429   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4430 }
4431 
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4432 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4433   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4434 }
4435 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4436 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4437   CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4438 }
4439 
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4440 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4441   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4442 }
4443 
CreateVarHandleGetAndAddLocations(HInvoke * invoke)4444 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
4445   // The only read barrier implementation supporting the
4446   // VarHandleGet intrinsic is the Baker-style read barriers.
4447   if (gUseReadBarrier && !kUseBakerReadBarrier) {
4448     return;
4449   }
4450 
4451   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4452     return;
4453   }
4454 
4455   // The last argument should be the value we intend to set.
4456   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4457   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4458   if (DataType::Is64BitType(value_type)) {
4459     // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4460     // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4461     // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4462     // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4463     return;
4464   }
4465 
4466   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4467   LocationSummary* locations = new (allocator) LocationSummary(
4468       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4469   locations->AddTemp(Location::RequiresRegister());
4470   locations->AddTemp(Location::RequiresRegister());
4471   locations->SetInAt(0, Location::RequiresRegister());
4472   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4473   if (expected_coordinates_count == 1u) {
4474     // For instance fields, this is the source object
4475     locations->SetInAt(1, Location::RequiresRegister());
4476   } else {
4477     // For static fields, we need another temp because one will be busy with the declaring class.
4478     locations->AddTemp(Location::RequiresRegister());
4479   }
4480 
4481   if (DataType::IsFloatingPointType(value_type)) {
4482     locations->AddTemp(Location::RequiresFpuRegister());
4483     locations->AddTemp(Location::RegisterLocation(EAX));
4484     locations->SetInAt(value_index, Location::RequiresFpuRegister());
4485     locations->SetOut(Location::RequiresFpuRegister());
4486   } else {
4487     // xadd updates the register argument with the old value. ByteRegister required for xaddb.
4488     locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4489     locations->SetOut(Location::RegisterLocation(EAX));
4490   }
4491 }
4492 
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86 * codegen)4493 static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) {
4494   // The only read barrier implementation supporting the
4495   // VarHandleGet intrinsic is the Baker-style read barriers.
4496   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
4497 
4498   X86Assembler* assembler = codegen->GetAssembler();
4499   LocationSummary* locations = invoke->GetLocations();
4500   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4501   uint32_t value_index = number_of_arguments - 1;
4502   DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4503   DCHECK_EQ(type, invoke->GetType());
4504   Location value_loc = locations->InAt(value_index);
4505   Register temp = locations->GetTemp(0).AsRegister<Register>();
4506   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4507   codegen->AddSlowPath(slow_path);
4508 
4509   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4510 
4511   Register offset = locations->GetTemp(1).AsRegister<Register>();
4512   // Get the field referred by the VarHandle. The returned register contains the object reference
4513   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4514   // declaring class will be placed in 'temp' register.
4515   Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4516 
4517   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4518   temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4519   DCHECK_NE(temp, reference);
4520   Address field_addr(reference, offset, TIMES_1, 0);
4521 
4522   switch (type) {
4523     case DataType::Type::kInt8:
4524       __ LockXaddb(field_addr, value_loc.AsRegister<ByteRegister>());
4525       __ movsxb(locations->Out().AsRegister<Register>(),
4526                 locations->Out().AsRegister<ByteRegister>());
4527       break;
4528     case DataType::Type::kInt16:
4529       __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4530       __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4531       break;
4532     case DataType::Type::kUint16:
4533       __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4534       __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4535       break;
4536     case DataType::Type::kInt32:
4537       __ LockXaddl(field_addr, value_loc.AsRegister<Register>());
4538       break;
4539     case DataType::Type::kFloat32: {
4540       Location temp_float =
4541           (expected_coordinates_count == 1u) ? locations->GetTemp(2) : locations->GetTemp(3);
4542       DCHECK(temp_float.IsFpuRegister());
4543       Location eax = Location::RegisterLocation(EAX);
4544       NearLabel try_again;
4545       __ Bind(&try_again);
4546       __ movss(temp_float.AsFpuRegister<XmmRegister>(), field_addr);
4547       __ movd(EAX, temp_float.AsFpuRegister<XmmRegister>());
4548       __ addss(temp_float.AsFpuRegister<XmmRegister>(),
4549                value_loc.AsFpuRegister<XmmRegister>());
4550       GenPrimitiveLockedCmpxchg(type,
4551                                 codegen,
4552                                 /* expected_value= */ eax,
4553                                 /* new_value= */ temp_float,
4554                                 reference,
4555                                 offset,
4556                                 temp);
4557       __ j(kNotZero, &try_again);
4558 
4559       // The old value is present in EAX.
4560       codegen->Move32(locations->Out(), eax);
4561       break;
4562     }
4563     default:
4564       UNREACHABLE();
4565   }
4566 
4567   __ Bind(slow_path->GetExitLabel());
4568 }
4569 
VisitVarHandleGetAndAdd(HInvoke * invoke)4570 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4571   CreateVarHandleGetAndAddLocations(invoke);
4572 }
4573 
VisitVarHandleGetAndAdd(HInvoke * invoke)4574 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4575   GenerateVarHandleGetAndAdd(invoke, codegen_);
4576 }
4577 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4578 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4579   CreateVarHandleGetAndAddLocations(invoke);
4580 }
4581 
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4582 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4583   GenerateVarHandleGetAndAdd(invoke, codegen_);
4584 }
4585 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4586 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4587   CreateVarHandleGetAndAddLocations(invoke);
4588 }
4589 
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4590 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4591   GenerateVarHandleGetAndAdd(invoke, codegen_);
4592 }
4593 
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke)4594 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) {
4595   // The only read barrier implementation supporting the
4596   // VarHandleGet intrinsic is the Baker-style read barriers.
4597   if (gUseReadBarrier && !kUseBakerReadBarrier) {
4598     return;
4599   }
4600 
4601   if (!HasVarHandleIntrinsicImplementation(invoke)) {
4602     return;
4603   }
4604 
4605   // The last argument should be the value we intend to set.
4606   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4607   if (DataType::Is64BitType(GetDataTypeFromShorty(invoke, value_index))) {
4608     // We avoid the case of an Int64 value because we would need to place it in a register pair.
4609     // If the slow path is taken, the ParallelMove might fail to move the pair according to the
4610     // X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4611     // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4612     return;
4613   }
4614 
4615   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4616   LocationSummary* locations = new (allocator) LocationSummary(
4617       invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4618   // We need a byte register temp to store the result of the bitwise operation
4619   locations->AddTemp(Location::RegisterLocation(EBX));
4620   locations->AddTemp(Location::RequiresRegister());
4621   locations->SetInAt(0, Location::RequiresRegister());
4622   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4623   if (expected_coordinates_count == 1u) {
4624     // For instance fields, this is the source object
4625     locations->SetInAt(1, Location::RequiresRegister());
4626   } else {
4627     // For static fields, we need another temp because one will be busy with the declaring class.
4628     locations->AddTemp(Location::RequiresRegister());
4629   }
4630 
4631   locations->SetInAt(value_index, Location::RegisterOrConstant(invoke->InputAt(value_index)));
4632   locations->SetOut(Location::RegisterLocation(EAX));
4633 }
4634 
GenerateBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen,Register left,Register right)4635 static void GenerateBitwiseOp(HInvoke* invoke,
4636                               CodeGeneratorX86* codegen,
4637                               Register left,
4638                               Register right) {
4639   X86Assembler* assembler = codegen->GetAssembler();
4640 
4641   switch (invoke->GetIntrinsic()) {
4642     case Intrinsics::kVarHandleGetAndBitwiseOr:
4643     case Intrinsics::kVarHandleGetAndBitwiseOrAcquire:
4644     case Intrinsics::kVarHandleGetAndBitwiseOrRelease:
4645       __ orl(left, right);
4646       break;
4647     case Intrinsics::kVarHandleGetAndBitwiseXor:
4648     case Intrinsics::kVarHandleGetAndBitwiseXorAcquire:
4649     case Intrinsics::kVarHandleGetAndBitwiseXorRelease:
4650       __ xorl(left, right);
4651       break;
4652     case Intrinsics::kVarHandleGetAndBitwiseAnd:
4653     case Intrinsics::kVarHandleGetAndBitwiseAndAcquire:
4654     case Intrinsics::kVarHandleGetAndBitwiseAndRelease:
4655       __ andl(left, right);
4656       break;
4657     default:
4658       UNREACHABLE();
4659   }
4660 }
4661 
GenerateVarHandleGetAndBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen)4662 static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) {
4663   // The only read barrier implementation supporting the
4664   // VarHandleGet intrinsic is the Baker-style read barriers.
4665   DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
4666 
4667   X86Assembler* assembler = codegen->GetAssembler();
4668   LocationSummary* locations = invoke->GetLocations();
4669   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4670   DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4671   DCHECK_EQ(type, invoke->GetType());
4672   Register temp = locations->GetTemp(0).AsRegister<Register>();
4673   SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4674   codegen->AddSlowPath(slow_path);
4675 
4676   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4677 
4678   Register offset = locations->GetTemp(1).AsRegister<Register>();
4679   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4680   // For static field, we need another temporary because the first one contains the declaring class
4681   Register reference =
4682       (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4683   // Get the field referred by the VarHandle. The returned register contains the object reference
4684   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4685   // declaring class will be placed in 'reference' register.
4686   reference = GenerateVarHandleFieldReference(invoke, codegen, reference, offset);
4687   DCHECK_NE(temp, reference);
4688   Address field_addr(reference, offset, TIMES_1, 0);
4689 
4690   Register out = locations->Out().AsRegister<Register>();
4691   DCHECK_EQ(out, EAX);
4692 
4693   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrRelease ||
4694       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorRelease ||
4695       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndRelease) {
4696     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4697   }
4698 
4699   NearLabel try_again;
4700   __ Bind(&try_again);
4701   // Place the expected value in EAX for cmpxchg
4702   codegen->LoadFromMemoryNoBarrier(type, locations->Out(), field_addr);
4703   codegen->Move32(locations->GetTemp(0), locations->InAt(value_index));
4704   GenerateBitwiseOp(invoke, codegen, temp, out);
4705   GenPrimitiveLockedCmpxchg(type,
4706                             codegen,
4707                             /* expected_value= */ locations->Out(),
4708                             /* new_value= */ locations->GetTemp(0),
4709                             reference,
4710                             offset);
4711   // If the cmpxchg failed, another thread changed the value so try again.
4712   __ j(kNotZero, &try_again);
4713 
4714   // The old value is present in EAX.
4715 
4716   if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrAcquire ||
4717       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorAcquire ||
4718       invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndAcquire) {
4719     codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4720   }
4721 
4722   __ Bind(slow_path->GetExitLabel());
4723 }
4724 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4725 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4726   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4727 }
4728 
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4729 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4730   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4731 }
4732 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4733 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4734   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4735 }
4736 
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4737 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4738   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4739 }
4740 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4741 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4742   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4743 }
4744 
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4745 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4746   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4747 }
4748 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4749 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4750   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4751 }
4752 
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4753 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4754   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4755 }
4756 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4757 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4758   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4759 }
4760 
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4761 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4762   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4763 }
4764 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4765 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4766   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4767 }
4768 
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4769 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4770   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4771 }
4772 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4773 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4774   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4775 }
4776 
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4777 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4778   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4779 }
4780 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4781 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4782   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4783 }
4784 
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4785 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4786   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4787 }
4788 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4789 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4790   CreateVarHandleGetAndBitwiseOpLocations(invoke);
4791 }
4792 
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4793 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4794   GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4795 }
4796 
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86 * codegen)4797 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86* codegen) {
4798   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
4799   LocationSummary* locations = invoke->GetLocations();
4800   DCHECK(locations->InAt(0).Equals(locations->Out()));
4801   X86Assembler* assembler = codegen->GetAssembler();
4802   XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
4803   XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
4804   XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
4805   if (invoke->GetType() == DataType::Type::kFloat32) {
4806     __ vfmadd213ss(left, right, accumulator);
4807   } else {
4808     DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
4809     __ vfmadd213sd(left, right, accumulator);
4810   }
4811 }
4812 
VisitMathFmaDouble(HInvoke * invoke)4813 void IntrinsicCodeGeneratorX86::VisitMathFmaDouble(HInvoke* invoke) {
4814   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
4815   GenerateMathFma(invoke, codegen_);
4816 }
4817 
VisitMathFmaDouble(HInvoke * invoke)4818 void IntrinsicLocationsBuilderX86::VisitMathFmaDouble(HInvoke* invoke) {
4819   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
4820     CreateFPFPFPToFPCallLocations(allocator_, invoke);
4821   }
4822 }
4823 
VisitMathFmaFloat(HInvoke * invoke)4824 void IntrinsicCodeGeneratorX86::VisitMathFmaFloat(HInvoke* invoke) {
4825   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
4826   GenerateMathFma(invoke, codegen_);
4827 }
4828 
VisitMathFmaFloat(HInvoke * invoke)4829 void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) {
4830   if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
4831     CreateFPFPFPToFPCallLocations(allocator_, invoke);
4832   }
4833 }
4834 
4835 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86, Name)
4836 UNIMPLEMENTED_INTRINSIC_LIST_X86(MARK_UNIMPLEMENTED);
4837 #undef MARK_UNIMPLEMENTED
4838 
4839 UNREACHABLE_INTRINSICS(X86)
4840 
4841 #undef __
4842 
4843 }  // namespace x86
4844 }  // namespace art
4845