1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "art_method.h"
20 #include "class_table.h"
21 #include "code_generator_utils.h"
22 #include "compiled_method.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "entrypoints/quick/quick_entrypoints_enum.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "intrinsics.h"
29 #include "intrinsics_x86.h"
30 #include "linker/linker_patch.h"
31 #include "lock_word.h"
32 #include "mirror/array-inl.h"
33 #include "mirror/class-inl.h"
34 #include "thread.h"
35 #include "utils/assembler.h"
36 #include "utils/stack_checks.h"
37 #include "utils/x86/assembler_x86.h"
38 #include "utils/x86/managed_register_x86.h"
39
40 namespace art {
41
42 template<class MirrorType>
43 class GcRoot;
44
45 namespace x86 {
46
47 static constexpr int kCurrentMethodStackOffset = 0;
48 static constexpr Register kMethodRegisterArgument = EAX;
49 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
50
51 static constexpr int kC2ConditionMask = 0x400;
52
53 static constexpr int kFakeReturnRegister = Register(8);
54
55 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
56 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
57
OneRegInReferenceOutSaveEverythingCallerSaves()58 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
59 InvokeRuntimeCallingConvention calling_convention;
60 RegisterSet caller_saves = RegisterSet::Empty();
61 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
62 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
63 // that the the kPrimNot result register is the same as the first argument register.
64 return caller_saves;
65 }
66
67 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
68 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
69 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
70
71 class NullCheckSlowPathX86 : public SlowPathCode {
72 public:
NullCheckSlowPathX86(HNullCheck * instruction)73 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
74
EmitNativeCode(CodeGenerator * codegen)75 void EmitNativeCode(CodeGenerator* codegen) override {
76 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
77 __ Bind(GetEntryLabel());
78 if (instruction_->CanThrowIntoCatchBlock()) {
79 // Live registers will be restored in the catch block if caught.
80 SaveLiveRegisters(codegen, instruction_->GetLocations());
81 }
82 x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
83 instruction_,
84 instruction_->GetDexPc(),
85 this);
86 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
87 }
88
IsFatal() const89 bool IsFatal() const override { return true; }
90
GetDescription() const91 const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
92
93 private:
94 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
95 };
96
97 class DivZeroCheckSlowPathX86 : public SlowPathCode {
98 public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)99 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
100
EmitNativeCode(CodeGenerator * codegen)101 void EmitNativeCode(CodeGenerator* codegen) override {
102 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
103 __ Bind(GetEntryLabel());
104 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
105 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
106 }
107
IsFatal() const108 bool IsFatal() const override { return true; }
109
GetDescription() const110 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
111
112 private:
113 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
114 };
115
116 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
117 public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)118 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
119 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
120
EmitNativeCode(CodeGenerator * codegen)121 void EmitNativeCode(CodeGenerator* codegen) override {
122 __ Bind(GetEntryLabel());
123 if (is_div_) {
124 __ negl(reg_);
125 } else {
126 __ movl(reg_, Immediate(0));
127 }
128 __ jmp(GetExitLabel());
129 }
130
GetDescription() const131 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
132
133 private:
134 Register reg_;
135 bool is_div_;
136 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
137 };
138
139 class BoundsCheckSlowPathX86 : public SlowPathCode {
140 public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)141 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
142
EmitNativeCode(CodeGenerator * codegen)143 void EmitNativeCode(CodeGenerator* codegen) override {
144 LocationSummary* locations = instruction_->GetLocations();
145 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
146 __ Bind(GetEntryLabel());
147 // We're moving two locations to locations that could overlap, so we need a parallel
148 // move resolver.
149 if (instruction_->CanThrowIntoCatchBlock()) {
150 // Live registers will be restored in the catch block if caught.
151 SaveLiveRegisters(codegen, instruction_->GetLocations());
152 }
153
154 // Are we using an array length from memory?
155 HInstruction* array_length = instruction_->InputAt(1);
156 Location length_loc = locations->InAt(1);
157 InvokeRuntimeCallingConvention calling_convention;
158 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
159 // Load the array length into our temporary.
160 HArrayLength* length = array_length->AsArrayLength();
161 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
162 Location array_loc = array_length->GetLocations()->InAt(0);
163 Address array_len(array_loc.AsRegister<Register>(), len_offset);
164 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
165 // Check for conflicts with index.
166 if (length_loc.Equals(locations->InAt(0))) {
167 // We know we aren't using parameter 2.
168 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
169 }
170 __ movl(length_loc.AsRegister<Register>(), array_len);
171 if (mirror::kUseStringCompression && length->IsStringLength()) {
172 __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
173 }
174 }
175 x86_codegen->EmitParallelMoves(
176 locations->InAt(0),
177 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
178 DataType::Type::kInt32,
179 length_loc,
180 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
181 DataType::Type::kInt32);
182 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
183 ? kQuickThrowStringBounds
184 : kQuickThrowArrayBounds;
185 x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
186 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
187 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
188 }
189
IsFatal() const190 bool IsFatal() const override { return true; }
191
GetDescription() const192 const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
193
194 private:
195 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
196 };
197
198 class SuspendCheckSlowPathX86 : public SlowPathCode {
199 public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)200 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
201 : SlowPathCode(instruction), successor_(successor) {}
202
EmitNativeCode(CodeGenerator * codegen)203 void EmitNativeCode(CodeGenerator* codegen) override {
204 LocationSummary* locations = instruction_->GetLocations();
205 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
206 __ Bind(GetEntryLabel());
207 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
208 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
209 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
210 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
211 if (successor_ == nullptr) {
212 __ jmp(GetReturnLabel());
213 } else {
214 __ jmp(x86_codegen->GetLabelOf(successor_));
215 }
216 }
217
GetReturnLabel()218 Label* GetReturnLabel() {
219 DCHECK(successor_ == nullptr);
220 return &return_label_;
221 }
222
GetSuccessor() const223 HBasicBlock* GetSuccessor() const {
224 return successor_;
225 }
226
GetDescription() const227 const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
228
229 private:
230 HBasicBlock* const successor_;
231 Label return_label_;
232
233 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
234 };
235
236 class LoadStringSlowPathX86 : public SlowPathCode {
237 public:
LoadStringSlowPathX86(HLoadString * instruction)238 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
239
EmitNativeCode(CodeGenerator * codegen)240 void EmitNativeCode(CodeGenerator* codegen) override {
241 LocationSummary* locations = instruction_->GetLocations();
242 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
243
244 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
245 __ Bind(GetEntryLabel());
246 SaveLiveRegisters(codegen, locations);
247
248 InvokeRuntimeCallingConvention calling_convention;
249 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
250 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
251 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
252 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
253 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
254 RestoreLiveRegisters(codegen, locations);
255
256 __ jmp(GetExitLabel());
257 }
258
GetDescription() const259 const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
260
261 private:
262 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
263 };
264
265 class LoadClassSlowPathX86 : public SlowPathCode {
266 public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)267 LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
268 : SlowPathCode(at), cls_(cls) {
269 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
270 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
271 }
272
EmitNativeCode(CodeGenerator * codegen)273 void EmitNativeCode(CodeGenerator* codegen) override {
274 LocationSummary* locations = instruction_->GetLocations();
275 Location out = locations->Out();
276 const uint32_t dex_pc = instruction_->GetDexPc();
277 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
278 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
279
280 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
281 __ Bind(GetEntryLabel());
282 SaveLiveRegisters(codegen, locations);
283
284 InvokeRuntimeCallingConvention calling_convention;
285 if (must_resolve_type) {
286 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()));
287 dex::TypeIndex type_index = cls_->GetTypeIndex();
288 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
289 x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
290 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
291 // If we also must_do_clinit, the resolved type is now in the correct register.
292 } else {
293 DCHECK(must_do_clinit);
294 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
295 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
296 }
297 if (must_do_clinit) {
298 x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
299 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
300 }
301
302 // Move the class to the desired location.
303 if (out.IsValid()) {
304 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
305 x86_codegen->Move32(out, Location::RegisterLocation(EAX));
306 }
307 RestoreLiveRegisters(codegen, locations);
308 __ jmp(GetExitLabel());
309 }
310
GetDescription() const311 const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
312
313 private:
314 // The class this slow path will load.
315 HLoadClass* const cls_;
316
317 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
318 };
319
320 class TypeCheckSlowPathX86 : public SlowPathCode {
321 public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)322 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
323 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
324
EmitNativeCode(CodeGenerator * codegen)325 void EmitNativeCode(CodeGenerator* codegen) override {
326 LocationSummary* locations = instruction_->GetLocations();
327 DCHECK(instruction_->IsCheckCast()
328 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
329
330 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
331 __ Bind(GetEntryLabel());
332
333 if (kPoisonHeapReferences &&
334 instruction_->IsCheckCast() &&
335 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
336 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
337 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
338 }
339
340 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
341 SaveLiveRegisters(codegen, locations);
342 }
343
344 // We're moving two locations to locations that could overlap, so we need a parallel
345 // move resolver.
346 InvokeRuntimeCallingConvention calling_convention;
347 x86_codegen->EmitParallelMoves(locations->InAt(0),
348 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
349 DataType::Type::kReference,
350 locations->InAt(1),
351 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
352 DataType::Type::kReference);
353 if (instruction_->IsInstanceOf()) {
354 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
355 instruction_,
356 instruction_->GetDexPc(),
357 this);
358 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
359 } else {
360 DCHECK(instruction_->IsCheckCast());
361 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
362 instruction_,
363 instruction_->GetDexPc(),
364 this);
365 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
366 }
367
368 if (!is_fatal_) {
369 if (instruction_->IsInstanceOf()) {
370 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
371 }
372 RestoreLiveRegisters(codegen, locations);
373
374 __ jmp(GetExitLabel());
375 }
376 }
377
GetDescription() const378 const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const379 bool IsFatal() const override { return is_fatal_; }
380
381 private:
382 const bool is_fatal_;
383
384 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
385 };
386
387 class DeoptimizationSlowPathX86 : public SlowPathCode {
388 public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)389 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
390 : SlowPathCode(instruction) {}
391
EmitNativeCode(CodeGenerator * codegen)392 void EmitNativeCode(CodeGenerator* codegen) override {
393 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
394 __ Bind(GetEntryLabel());
395 LocationSummary* locations = instruction_->GetLocations();
396 SaveLiveRegisters(codegen, locations);
397 InvokeRuntimeCallingConvention calling_convention;
398 x86_codegen->Load32BitValue(
399 calling_convention.GetRegisterAt(0),
400 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
401 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
402 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
403 }
404
GetDescription() const405 const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
406
407 private:
408 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
409 };
410
411 class ArraySetSlowPathX86 : public SlowPathCode {
412 public:
ArraySetSlowPathX86(HInstruction * instruction)413 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
414
EmitNativeCode(CodeGenerator * codegen)415 void EmitNativeCode(CodeGenerator* codegen) override {
416 LocationSummary* locations = instruction_->GetLocations();
417 __ Bind(GetEntryLabel());
418 SaveLiveRegisters(codegen, locations);
419
420 InvokeRuntimeCallingConvention calling_convention;
421 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
422 parallel_move.AddMove(
423 locations->InAt(0),
424 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
425 DataType::Type::kReference,
426 nullptr);
427 parallel_move.AddMove(
428 locations->InAt(1),
429 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
430 DataType::Type::kInt32,
431 nullptr);
432 parallel_move.AddMove(
433 locations->InAt(2),
434 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
435 DataType::Type::kReference,
436 nullptr);
437 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
438
439 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
440 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
441 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
442 RestoreLiveRegisters(codegen, locations);
443 __ jmp(GetExitLabel());
444 }
445
GetDescription() const446 const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
447
448 private:
449 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
450 };
451
452 // Slow path marking an object reference `ref` during a read
453 // barrier. The field `obj.field` in the object `obj` holding this
454 // reference does not get updated by this slow path after marking (see
455 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
456 //
457 // This means that after the execution of this slow path, `ref` will
458 // always be up-to-date, but `obj.field` may not; i.e., after the
459 // flip, `ref` will be a to-space reference, but `obj.field` will
460 // probably still be a from-space reference (unless it gets updated by
461 // another thread, or if another thread installed another object
462 // reference (different from `ref`) in `obj.field`).
463 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
464 public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)465 ReadBarrierMarkSlowPathX86(HInstruction* instruction,
466 Location ref,
467 bool unpoison_ref_before_marking)
468 : SlowPathCode(instruction),
469 ref_(ref),
470 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
471 DCHECK(kEmitCompilerReadBarrier);
472 }
473
GetDescription() const474 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
475
EmitNativeCode(CodeGenerator * codegen)476 void EmitNativeCode(CodeGenerator* codegen) override {
477 LocationSummary* locations = instruction_->GetLocations();
478 Register ref_reg = ref_.AsRegister<Register>();
479 DCHECK(locations->CanCall());
480 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
481 DCHECK(instruction_->IsInstanceFieldGet() ||
482 instruction_->IsStaticFieldGet() ||
483 instruction_->IsArrayGet() ||
484 instruction_->IsArraySet() ||
485 instruction_->IsLoadClass() ||
486 instruction_->IsLoadString() ||
487 instruction_->IsInstanceOf() ||
488 instruction_->IsCheckCast() ||
489 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
490 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
491 << "Unexpected instruction in read barrier marking slow path: "
492 << instruction_->DebugName();
493
494 __ Bind(GetEntryLabel());
495 if (unpoison_ref_before_marking_) {
496 // Object* ref = ref_addr->AsMirrorPtr()
497 __ MaybeUnpoisonHeapReference(ref_reg);
498 }
499 // No need to save live registers; it's taken care of by the
500 // entrypoint. Also, there is no need to update the stack mask,
501 // as this runtime call will not trigger a garbage collection.
502 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
503 DCHECK_NE(ref_reg, ESP);
504 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
505 // "Compact" slow path, saving two moves.
506 //
507 // Instead of using the standard runtime calling convention (input
508 // and output in EAX):
509 //
510 // EAX <- ref
511 // EAX <- ReadBarrierMark(EAX)
512 // ref <- EAX
513 //
514 // we just use rX (the register containing `ref`) as input and output
515 // of a dedicated entrypoint:
516 //
517 // rX <- ReadBarrierMarkRegX(rX)
518 //
519 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
520 // This runtime call does not require a stack map.
521 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
522 __ jmp(GetExitLabel());
523 }
524
525 private:
526 // The location (register) of the marked object reference.
527 const Location ref_;
528 // Should the reference in `ref_` be unpoisoned prior to marking it?
529 const bool unpoison_ref_before_marking_;
530
531 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
532 };
533
534 // Slow path marking an object reference `ref` during a read barrier,
535 // and if needed, atomically updating the field `obj.field` in the
536 // object `obj` holding this reference after marking (contrary to
537 // ReadBarrierMarkSlowPathX86 above, which never tries to update
538 // `obj.field`).
539 //
540 // This means that after the execution of this slow path, both `ref`
541 // and `obj.field` will be up-to-date; i.e., after the flip, both will
542 // hold the same to-space reference (unless another thread installed
543 // another object reference (different from `ref`) in `obj.field`).
544 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
545 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)546 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
547 Location ref,
548 Register obj,
549 const Address& field_addr,
550 bool unpoison_ref_before_marking,
551 Register temp)
552 : SlowPathCode(instruction),
553 ref_(ref),
554 obj_(obj),
555 field_addr_(field_addr),
556 unpoison_ref_before_marking_(unpoison_ref_before_marking),
557 temp_(temp) {
558 DCHECK(kEmitCompilerReadBarrier);
559 }
560
GetDescription() const561 const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
562
EmitNativeCode(CodeGenerator * codegen)563 void EmitNativeCode(CodeGenerator* codegen) override {
564 LocationSummary* locations = instruction_->GetLocations();
565 Register ref_reg = ref_.AsRegister<Register>();
566 DCHECK(locations->CanCall());
567 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
568 // This slow path is only used by the UnsafeCASObject intrinsic.
569 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
570 << "Unexpected instruction in read barrier marking and field updating slow path: "
571 << instruction_->DebugName();
572 DCHECK(instruction_->GetLocations()->Intrinsified());
573 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
574
575 __ Bind(GetEntryLabel());
576 if (unpoison_ref_before_marking_) {
577 // Object* ref = ref_addr->AsMirrorPtr()
578 __ MaybeUnpoisonHeapReference(ref_reg);
579 }
580
581 // Save the old (unpoisoned) reference.
582 __ movl(temp_, ref_reg);
583
584 // No need to save live registers; it's taken care of by the
585 // entrypoint. Also, there is no need to update the stack mask,
586 // as this runtime call will not trigger a garbage collection.
587 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
588 DCHECK_NE(ref_reg, ESP);
589 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
590 // "Compact" slow path, saving two moves.
591 //
592 // Instead of using the standard runtime calling convention (input
593 // and output in EAX):
594 //
595 // EAX <- ref
596 // EAX <- ReadBarrierMark(EAX)
597 // ref <- EAX
598 //
599 // we just use rX (the register containing `ref`) as input and output
600 // of a dedicated entrypoint:
601 //
602 // rX <- ReadBarrierMarkRegX(rX)
603 //
604 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
605 // This runtime call does not require a stack map.
606 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
607
608 // If the new reference is different from the old reference,
609 // update the field in the holder (`*field_addr`).
610 //
611 // Note that this field could also hold a different object, if
612 // another thread had concurrently changed it. In that case, the
613 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
614 // operation below would abort the CAS, leaving the field as-is.
615 NearLabel done;
616 __ cmpl(temp_, ref_reg);
617 __ j(kEqual, &done);
618
619 // Update the the holder's field atomically. This may fail if
620 // mutator updates before us, but it's OK. This is achieved
621 // using a strong compare-and-set (CAS) operation with relaxed
622 // memory synchronization ordering, where the expected value is
623 // the old reference and the desired value is the new reference.
624 // This operation is implemented with a 32-bit LOCK CMPXLCHG
625 // instruction, which requires the expected value (the old
626 // reference) to be in EAX. Save EAX beforehand, and move the
627 // expected value (stored in `temp_`) into EAX.
628 __ pushl(EAX);
629 __ movl(EAX, temp_);
630
631 // Convenience aliases.
632 Register base = obj_;
633 Register expected = EAX;
634 Register value = ref_reg;
635
636 bool base_equals_value = (base == value);
637 if (kPoisonHeapReferences) {
638 if (base_equals_value) {
639 // If `base` and `value` are the same register location, move
640 // `value` to a temporary register. This way, poisoning
641 // `value` won't invalidate `base`.
642 value = temp_;
643 __ movl(value, base);
644 }
645
646 // Check that the register allocator did not assign the location
647 // of `expected` (EAX) to `value` nor to `base`, so that heap
648 // poisoning (when enabled) works as intended below.
649 // - If `value` were equal to `expected`, both references would
650 // be poisoned twice, meaning they would not be poisoned at
651 // all, as heap poisoning uses address negation.
652 // - If `base` were equal to `expected`, poisoning `expected`
653 // would invalidate `base`.
654 DCHECK_NE(value, expected);
655 DCHECK_NE(base, expected);
656
657 __ PoisonHeapReference(expected);
658 __ PoisonHeapReference(value);
659 }
660
661 __ LockCmpxchgl(field_addr_, value);
662
663 // If heap poisoning is enabled, we need to unpoison the values
664 // that were poisoned earlier.
665 if (kPoisonHeapReferences) {
666 if (base_equals_value) {
667 // `value` has been moved to a temporary register, no need
668 // to unpoison it.
669 } else {
670 __ UnpoisonHeapReference(value);
671 }
672 // No need to unpoison `expected` (EAX), as it is be overwritten below.
673 }
674
675 // Restore EAX.
676 __ popl(EAX);
677
678 __ Bind(&done);
679 __ jmp(GetExitLabel());
680 }
681
682 private:
683 // The location (register) of the marked object reference.
684 const Location ref_;
685 // The register containing the object holding the marked object reference field.
686 const Register obj_;
687 // The address of the marked reference field. The base of this address must be `obj_`.
688 const Address field_addr_;
689
690 // Should the reference in `ref_` be unpoisoned prior to marking it?
691 const bool unpoison_ref_before_marking_;
692
693 const Register temp_;
694
695 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
696 };
697
698 // Slow path generating a read barrier for a heap reference.
699 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
700 public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)701 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
702 Location out,
703 Location ref,
704 Location obj,
705 uint32_t offset,
706 Location index)
707 : SlowPathCode(instruction),
708 out_(out),
709 ref_(ref),
710 obj_(obj),
711 offset_(offset),
712 index_(index) {
713 DCHECK(kEmitCompilerReadBarrier);
714 // If `obj` is equal to `out` or `ref`, it means the initial object
715 // has been overwritten by (or after) the heap object reference load
716 // to be instrumented, e.g.:
717 //
718 // __ movl(out, Address(out, offset));
719 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
720 //
721 // In that case, we have lost the information about the original
722 // object, and the emitted read barrier cannot work properly.
723 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
724 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
725 }
726
EmitNativeCode(CodeGenerator * codegen)727 void EmitNativeCode(CodeGenerator* codegen) override {
728 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
729 LocationSummary* locations = instruction_->GetLocations();
730 Register reg_out = out_.AsRegister<Register>();
731 DCHECK(locations->CanCall());
732 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
733 DCHECK(instruction_->IsInstanceFieldGet() ||
734 instruction_->IsStaticFieldGet() ||
735 instruction_->IsArrayGet() ||
736 instruction_->IsInstanceOf() ||
737 instruction_->IsCheckCast() ||
738 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
739 << "Unexpected instruction in read barrier for heap reference slow path: "
740 << instruction_->DebugName();
741
742 __ Bind(GetEntryLabel());
743 SaveLiveRegisters(codegen, locations);
744
745 // We may have to change the index's value, but as `index_` is a
746 // constant member (like other "inputs" of this slow path),
747 // introduce a copy of it, `index`.
748 Location index = index_;
749 if (index_.IsValid()) {
750 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
751 if (instruction_->IsArrayGet()) {
752 // Compute the actual memory offset and store it in `index`.
753 Register index_reg = index_.AsRegister<Register>();
754 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
755 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
756 // We are about to change the value of `index_reg` (see the
757 // calls to art::x86::X86Assembler::shll and
758 // art::x86::X86Assembler::AddImmediate below), but it has
759 // not been saved by the previous call to
760 // art::SlowPathCode::SaveLiveRegisters, as it is a
761 // callee-save register --
762 // art::SlowPathCode::SaveLiveRegisters does not consider
763 // callee-save registers, as it has been designed with the
764 // assumption that callee-save registers are supposed to be
765 // handled by the called function. So, as a callee-save
766 // register, `index_reg` _would_ eventually be saved onto
767 // the stack, but it would be too late: we would have
768 // changed its value earlier. Therefore, we manually save
769 // it here into another freely available register,
770 // `free_reg`, chosen of course among the caller-save
771 // registers (as a callee-save `free_reg` register would
772 // exhibit the same problem).
773 //
774 // Note we could have requested a temporary register from
775 // the register allocator instead; but we prefer not to, as
776 // this is a slow path, and we know we can find a
777 // caller-save register that is available.
778 Register free_reg = FindAvailableCallerSaveRegister(codegen);
779 __ movl(free_reg, index_reg);
780 index_reg = free_reg;
781 index = Location::RegisterLocation(index_reg);
782 } else {
783 // The initial register stored in `index_` has already been
784 // saved in the call to art::SlowPathCode::SaveLiveRegisters
785 // (as it is not a callee-save register), so we can freely
786 // use it.
787 }
788 // Shifting the index value contained in `index_reg` by the scale
789 // factor (2) cannot overflow in practice, as the runtime is
790 // unable to allocate object arrays with a size larger than
791 // 2^26 - 1 (that is, 2^28 - 4 bytes).
792 __ shll(index_reg, Immediate(TIMES_4));
793 static_assert(
794 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
795 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
796 __ AddImmediate(index_reg, Immediate(offset_));
797 } else {
798 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
799 // intrinsics, `index_` is not shifted by a scale factor of 2
800 // (as in the case of ArrayGet), as it is actually an offset
801 // to an object field within an object.
802 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
803 DCHECK(instruction_->GetLocations()->Intrinsified());
804 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
805 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
806 << instruction_->AsInvoke()->GetIntrinsic();
807 DCHECK_EQ(offset_, 0U);
808 DCHECK(index_.IsRegisterPair());
809 // UnsafeGet's offset location is a register pair, the low
810 // part contains the correct offset.
811 index = index_.ToLow();
812 }
813 }
814
815 // We're moving two or three locations to locations that could
816 // overlap, so we need a parallel move resolver.
817 InvokeRuntimeCallingConvention calling_convention;
818 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
819 parallel_move.AddMove(ref_,
820 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
821 DataType::Type::kReference,
822 nullptr);
823 parallel_move.AddMove(obj_,
824 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
825 DataType::Type::kReference,
826 nullptr);
827 if (index.IsValid()) {
828 parallel_move.AddMove(index,
829 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
830 DataType::Type::kInt32,
831 nullptr);
832 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
833 } else {
834 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
835 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
836 }
837 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
838 CheckEntrypointTypes<
839 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
840 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
841
842 RestoreLiveRegisters(codegen, locations);
843 __ jmp(GetExitLabel());
844 }
845
GetDescription() const846 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
847
848 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)849 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
850 size_t ref = static_cast<int>(ref_.AsRegister<Register>());
851 size_t obj = static_cast<int>(obj_.AsRegister<Register>());
852 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
853 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
854 return static_cast<Register>(i);
855 }
856 }
857 // We shall never fail to find a free caller-save register, as
858 // there are more than two core caller-save registers on x86
859 // (meaning it is possible to find one which is different from
860 // `ref` and `obj`).
861 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
862 LOG(FATAL) << "Could not find a free caller-save register";
863 UNREACHABLE();
864 }
865
866 const Location out_;
867 const Location ref_;
868 const Location obj_;
869 const uint32_t offset_;
870 // An additional location containing an index to an array.
871 // Only used for HArrayGet and the UnsafeGetObject &
872 // UnsafeGetObjectVolatile intrinsics.
873 const Location index_;
874
875 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
876 };
877
878 // Slow path generating a read barrier for a GC root.
879 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
880 public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)881 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
882 : SlowPathCode(instruction), out_(out), root_(root) {
883 DCHECK(kEmitCompilerReadBarrier);
884 }
885
EmitNativeCode(CodeGenerator * codegen)886 void EmitNativeCode(CodeGenerator* codegen) override {
887 LocationSummary* locations = instruction_->GetLocations();
888 Register reg_out = out_.AsRegister<Register>();
889 DCHECK(locations->CanCall());
890 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
891 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
892 << "Unexpected instruction in read barrier for GC root slow path: "
893 << instruction_->DebugName();
894
895 __ Bind(GetEntryLabel());
896 SaveLiveRegisters(codegen, locations);
897
898 InvokeRuntimeCallingConvention calling_convention;
899 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
900 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
901 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
902 instruction_,
903 instruction_->GetDexPc(),
904 this);
905 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
906 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
907
908 RestoreLiveRegisters(codegen, locations);
909 __ jmp(GetExitLabel());
910 }
911
GetDescription() const912 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
913
914 private:
915 const Location out_;
916 const Location root_;
917
918 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
919 };
920
921 #undef __
922 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
923 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
924
X86Condition(IfCondition cond)925 inline Condition X86Condition(IfCondition cond) {
926 switch (cond) {
927 case kCondEQ: return kEqual;
928 case kCondNE: return kNotEqual;
929 case kCondLT: return kLess;
930 case kCondLE: return kLessEqual;
931 case kCondGT: return kGreater;
932 case kCondGE: return kGreaterEqual;
933 case kCondB: return kBelow;
934 case kCondBE: return kBelowEqual;
935 case kCondA: return kAbove;
936 case kCondAE: return kAboveEqual;
937 }
938 LOG(FATAL) << "Unreachable";
939 UNREACHABLE();
940 }
941
942 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)943 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
944 switch (cond) {
945 case kCondEQ: return kEqual;
946 case kCondNE: return kNotEqual;
947 // Signed to unsigned, and FP to x86 name.
948 case kCondLT: return kBelow;
949 case kCondLE: return kBelowEqual;
950 case kCondGT: return kAbove;
951 case kCondGE: return kAboveEqual;
952 // Unsigned remain unchanged.
953 case kCondB: return kBelow;
954 case kCondBE: return kBelowEqual;
955 case kCondA: return kAbove;
956 case kCondAE: return kAboveEqual;
957 }
958 LOG(FATAL) << "Unreachable";
959 UNREACHABLE();
960 }
961
DumpCoreRegister(std::ostream & stream,int reg) const962 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
963 stream << Register(reg);
964 }
965
DumpFloatingPointRegister(std::ostream & stream,int reg) const966 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
967 stream << XmmRegister(reg);
968 }
969
GetInstructionSetFeatures() const970 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
971 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
972 }
973
SaveCoreRegister(size_t stack_index,uint32_t reg_id)974 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
975 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
976 return kX86WordSize;
977 }
978
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)979 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
980 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
981 return kX86WordSize;
982 }
983
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)984 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
985 if (GetGraph()->HasSIMD()) {
986 __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
987 } else {
988 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
989 }
990 return GetFloatingPointSpillSlotSize();
991 }
992
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)993 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
994 if (GetGraph()->HasSIMD()) {
995 __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
996 } else {
997 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
998 }
999 return GetFloatingPointSpillSlotSize();
1000 }
1001
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1002 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1003 HInstruction* instruction,
1004 uint32_t dex_pc,
1005 SlowPathCode* slow_path) {
1006 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1007 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1008 if (EntrypointRequiresStackMap(entrypoint)) {
1009 RecordPcInfo(instruction, dex_pc, slow_path);
1010 }
1011 }
1012
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1013 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1014 HInstruction* instruction,
1015 SlowPathCode* slow_path) {
1016 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1017 GenerateInvokeRuntime(entry_point_offset);
1018 }
1019
GenerateInvokeRuntime(int32_t entry_point_offset)1020 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1021 __ fs()->call(Address::Absolute(entry_point_offset));
1022 }
1023
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1024 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1025 const CompilerOptions& compiler_options,
1026 OptimizingCompilerStats* stats)
1027 : CodeGenerator(graph,
1028 kNumberOfCpuRegisters,
1029 kNumberOfXmmRegisters,
1030 kNumberOfRegisterPairs,
1031 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1032 arraysize(kCoreCalleeSaves))
1033 | (1 << kFakeReturnRegister),
1034 0,
1035 compiler_options,
1036 stats),
1037 block_labels_(nullptr),
1038 location_builder_(graph, this),
1039 instruction_visitor_(graph, this),
1040 move_resolver_(graph->GetAllocator(), this),
1041 assembler_(graph->GetAllocator()),
1042 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1043 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1044 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1045 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1046 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1047 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1048 boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1049 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1050 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1051 constant_area_start_(-1),
1052 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1053 method_address_offset_(std::less<uint32_t>(),
1054 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1055 // Use a fake return address register to mimic Quick.
1056 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1057 }
1058
SetupBlockedRegisters() const1059 void CodeGeneratorX86::SetupBlockedRegisters() const {
1060 // Stack register is always reserved.
1061 blocked_core_registers_[ESP] = true;
1062 }
1063
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1064 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1065 : InstructionCodeGenerator(graph, codegen),
1066 assembler_(codegen->GetAssembler()),
1067 codegen_(codegen) {}
1068
DWARFReg(Register reg)1069 static dwarf::Reg DWARFReg(Register reg) {
1070 return dwarf::Reg::X86Core(static_cast<int>(reg));
1071 }
1072
GenerateFrameEntry()1073 void CodeGeneratorX86::GenerateFrameEntry() {
1074 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
1075 __ Bind(&frame_entry_label_);
1076 bool skip_overflow_check =
1077 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1078 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1079
1080 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1081 __ addw(Address(kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()),
1082 Immediate(1));
1083 }
1084
1085 if (!skip_overflow_check) {
1086 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1087 __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1088 RecordPcInfo(nullptr, 0);
1089 }
1090
1091 if (HasEmptyFrame()) {
1092 return;
1093 }
1094
1095 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1096 Register reg = kCoreCalleeSaves[i];
1097 if (allocated_registers_.ContainsCoreRegister(reg)) {
1098 __ pushl(reg);
1099 __ cfi().AdjustCFAOffset(kX86WordSize);
1100 __ cfi().RelOffset(DWARFReg(reg), 0);
1101 }
1102 }
1103
1104 int adjust = GetFrameSize() - FrameEntrySpillSize();
1105 __ subl(ESP, Immediate(adjust));
1106 __ cfi().AdjustCFAOffset(adjust);
1107 // Save the current method if we need it. Note that we do not
1108 // do this in HCurrentMethod, as the instruction might have been removed
1109 // in the SSA graph.
1110 if (RequiresCurrentMethod()) {
1111 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1112 }
1113
1114 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1115 // Initialize should_deoptimize flag to 0.
1116 __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1117 }
1118 }
1119
GenerateFrameExit()1120 void CodeGeneratorX86::GenerateFrameExit() {
1121 __ cfi().RememberState();
1122 if (!HasEmptyFrame()) {
1123 int adjust = GetFrameSize() - FrameEntrySpillSize();
1124 __ addl(ESP, Immediate(adjust));
1125 __ cfi().AdjustCFAOffset(-adjust);
1126
1127 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1128 Register reg = kCoreCalleeSaves[i];
1129 if (allocated_registers_.ContainsCoreRegister(reg)) {
1130 __ popl(reg);
1131 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1132 __ cfi().Restore(DWARFReg(reg));
1133 }
1134 }
1135 }
1136 __ ret();
1137 __ cfi().RestoreState();
1138 __ cfi().DefCFAOffset(GetFrameSize());
1139 }
1140
Bind(HBasicBlock * block)1141 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1142 __ Bind(GetLabelOf(block));
1143 }
1144
GetReturnLocation(DataType::Type type) const1145 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1146 switch (type) {
1147 case DataType::Type::kReference:
1148 case DataType::Type::kBool:
1149 case DataType::Type::kUint8:
1150 case DataType::Type::kInt8:
1151 case DataType::Type::kUint16:
1152 case DataType::Type::kInt16:
1153 case DataType::Type::kUint32:
1154 case DataType::Type::kInt32:
1155 return Location::RegisterLocation(EAX);
1156
1157 case DataType::Type::kUint64:
1158 case DataType::Type::kInt64:
1159 return Location::RegisterPairLocation(EAX, EDX);
1160
1161 case DataType::Type::kVoid:
1162 return Location::NoLocation();
1163
1164 case DataType::Type::kFloat64:
1165 case DataType::Type::kFloat32:
1166 return Location::FpuRegisterLocation(XMM0);
1167 }
1168
1169 UNREACHABLE();
1170 }
1171
GetMethodLocation() const1172 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1173 return Location::RegisterLocation(kMethodRegisterArgument);
1174 }
1175
GetNextLocation(DataType::Type type)1176 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1177 switch (type) {
1178 case DataType::Type::kReference:
1179 case DataType::Type::kBool:
1180 case DataType::Type::kUint8:
1181 case DataType::Type::kInt8:
1182 case DataType::Type::kUint16:
1183 case DataType::Type::kInt16:
1184 case DataType::Type::kInt32: {
1185 uint32_t index = gp_index_++;
1186 stack_index_++;
1187 if (index < calling_convention.GetNumberOfRegisters()) {
1188 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1189 } else {
1190 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1191 }
1192 }
1193
1194 case DataType::Type::kInt64: {
1195 uint32_t index = gp_index_;
1196 gp_index_ += 2;
1197 stack_index_ += 2;
1198 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1199 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1200 calling_convention.GetRegisterPairAt(index));
1201 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1202 } else {
1203 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1204 }
1205 }
1206
1207 case DataType::Type::kFloat32: {
1208 uint32_t index = float_index_++;
1209 stack_index_++;
1210 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1211 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1212 } else {
1213 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1214 }
1215 }
1216
1217 case DataType::Type::kFloat64: {
1218 uint32_t index = float_index_++;
1219 stack_index_ += 2;
1220 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1221 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1222 } else {
1223 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1224 }
1225 }
1226
1227 case DataType::Type::kUint32:
1228 case DataType::Type::kUint64:
1229 case DataType::Type::kVoid:
1230 LOG(FATAL) << "Unexpected parameter type " << type;
1231 UNREACHABLE();
1232 }
1233 return Location::NoLocation();
1234 }
1235
Move32(Location destination,Location source)1236 void CodeGeneratorX86::Move32(Location destination, Location source) {
1237 if (source.Equals(destination)) {
1238 return;
1239 }
1240 if (destination.IsRegister()) {
1241 if (source.IsRegister()) {
1242 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1243 } else if (source.IsFpuRegister()) {
1244 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1245 } else {
1246 DCHECK(source.IsStackSlot());
1247 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1248 }
1249 } else if (destination.IsFpuRegister()) {
1250 if (source.IsRegister()) {
1251 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1252 } else if (source.IsFpuRegister()) {
1253 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1254 } else {
1255 DCHECK(source.IsStackSlot());
1256 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1257 }
1258 } else {
1259 DCHECK(destination.IsStackSlot()) << destination;
1260 if (source.IsRegister()) {
1261 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1262 } else if (source.IsFpuRegister()) {
1263 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1264 } else if (source.IsConstant()) {
1265 HConstant* constant = source.GetConstant();
1266 int32_t value = GetInt32ValueOf(constant);
1267 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1268 } else {
1269 DCHECK(source.IsStackSlot());
1270 __ pushl(Address(ESP, source.GetStackIndex()));
1271 __ popl(Address(ESP, destination.GetStackIndex()));
1272 }
1273 }
1274 }
1275
Move64(Location destination,Location source)1276 void CodeGeneratorX86::Move64(Location destination, Location source) {
1277 if (source.Equals(destination)) {
1278 return;
1279 }
1280 if (destination.IsRegisterPair()) {
1281 if (source.IsRegisterPair()) {
1282 EmitParallelMoves(
1283 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1284 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1285 DataType::Type::kInt32,
1286 Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1287 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1288 DataType::Type::kInt32);
1289 } else if (source.IsFpuRegister()) {
1290 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1291 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1292 __ psrlq(src_reg, Immediate(32));
1293 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1294 } else {
1295 // No conflict possible, so just do the moves.
1296 DCHECK(source.IsDoubleStackSlot());
1297 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1298 __ movl(destination.AsRegisterPairHigh<Register>(),
1299 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1300 }
1301 } else if (destination.IsFpuRegister()) {
1302 if (source.IsFpuRegister()) {
1303 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1304 } else if (source.IsDoubleStackSlot()) {
1305 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1306 } else if (source.IsRegisterPair()) {
1307 size_t elem_size = DataType::Size(DataType::Type::kInt32);
1308 // Create stack space for 2 elements.
1309 __ subl(ESP, Immediate(2 * elem_size));
1310 __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
1311 __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
1312 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1313 // And remove the temporary stack space we allocated.
1314 __ addl(ESP, Immediate(2 * elem_size));
1315 } else {
1316 LOG(FATAL) << "Unimplemented";
1317 }
1318 } else {
1319 DCHECK(destination.IsDoubleStackSlot()) << destination;
1320 if (source.IsRegisterPair()) {
1321 // No conflict possible, so just do the moves.
1322 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1323 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1324 source.AsRegisterPairHigh<Register>());
1325 } else if (source.IsFpuRegister()) {
1326 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1327 } else if (source.IsConstant()) {
1328 HConstant* constant = source.GetConstant();
1329 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1330 int64_t value = GetInt64ValueOf(constant);
1331 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1332 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1333 Immediate(High32Bits(value)));
1334 } else {
1335 DCHECK(source.IsDoubleStackSlot()) << source;
1336 EmitParallelMoves(
1337 Location::StackSlot(source.GetStackIndex()),
1338 Location::StackSlot(destination.GetStackIndex()),
1339 DataType::Type::kInt32,
1340 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1341 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1342 DataType::Type::kInt32);
1343 }
1344 }
1345 }
1346
MoveConstant(Location location,int32_t value)1347 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1348 DCHECK(location.IsRegister());
1349 __ movl(location.AsRegister<Register>(), Immediate(value));
1350 }
1351
MoveLocation(Location dst,Location src,DataType::Type dst_type)1352 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1353 HParallelMove move(GetGraph()->GetAllocator());
1354 if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1355 move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1356 move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1357 } else {
1358 move.AddMove(src, dst, dst_type, nullptr);
1359 }
1360 GetMoveResolver()->EmitNativeCode(&move);
1361 }
1362
AddLocationAsTemp(Location location,LocationSummary * locations)1363 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1364 if (location.IsRegister()) {
1365 locations->AddTemp(location);
1366 } else if (location.IsRegisterPair()) {
1367 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1368 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1369 } else {
1370 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1371 }
1372 }
1373
HandleGoto(HInstruction * got,HBasicBlock * successor)1374 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1375 if (successor->IsExitBlock()) {
1376 DCHECK(got->GetPrevious()->AlwaysThrows());
1377 return; // no code needed
1378 }
1379
1380 HBasicBlock* block = got->GetBlock();
1381 HInstruction* previous = got->GetPrevious();
1382
1383 HLoopInformation* info = block->GetLoopInformation();
1384 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1385 if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
1386 __ pushl(EAX);
1387 __ movl(EAX, Address(ESP, kX86WordSize));
1388 __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1));
1389 __ popl(EAX);
1390 }
1391 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1392 return;
1393 }
1394
1395 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1396 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1397 }
1398 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1399 __ jmp(codegen_->GetLabelOf(successor));
1400 }
1401 }
1402
VisitGoto(HGoto * got)1403 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1404 got->SetLocations(nullptr);
1405 }
1406
VisitGoto(HGoto * got)1407 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1408 HandleGoto(got, got->GetSuccessor());
1409 }
1410
VisitTryBoundary(HTryBoundary * try_boundary)1411 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1412 try_boundary->SetLocations(nullptr);
1413 }
1414
VisitTryBoundary(HTryBoundary * try_boundary)1415 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1416 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1417 if (!successor->IsExitBlock()) {
1418 HandleGoto(try_boundary, successor);
1419 }
1420 }
1421
VisitExit(HExit * exit)1422 void LocationsBuilderX86::VisitExit(HExit* exit) {
1423 exit->SetLocations(nullptr);
1424 }
1425
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1426 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1427 }
1428
1429 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1430 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1431 LabelType* true_label,
1432 LabelType* false_label) {
1433 if (cond->IsFPConditionTrueIfNaN()) {
1434 __ j(kUnordered, true_label);
1435 } else if (cond->IsFPConditionFalseIfNaN()) {
1436 __ j(kUnordered, false_label);
1437 }
1438 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1439 }
1440
1441 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1442 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1443 LabelType* true_label,
1444 LabelType* false_label) {
1445 LocationSummary* locations = cond->GetLocations();
1446 Location left = locations->InAt(0);
1447 Location right = locations->InAt(1);
1448 IfCondition if_cond = cond->GetCondition();
1449
1450 Register left_high = left.AsRegisterPairHigh<Register>();
1451 Register left_low = left.AsRegisterPairLow<Register>();
1452 IfCondition true_high_cond = if_cond;
1453 IfCondition false_high_cond = cond->GetOppositeCondition();
1454 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part
1455
1456 // Set the conditions for the test, remembering that == needs to be
1457 // decided using the low words.
1458 switch (if_cond) {
1459 case kCondEQ:
1460 case kCondNE:
1461 // Nothing to do.
1462 break;
1463 case kCondLT:
1464 false_high_cond = kCondGT;
1465 break;
1466 case kCondLE:
1467 true_high_cond = kCondLT;
1468 break;
1469 case kCondGT:
1470 false_high_cond = kCondLT;
1471 break;
1472 case kCondGE:
1473 true_high_cond = kCondGT;
1474 break;
1475 case kCondB:
1476 false_high_cond = kCondA;
1477 break;
1478 case kCondBE:
1479 true_high_cond = kCondB;
1480 break;
1481 case kCondA:
1482 false_high_cond = kCondB;
1483 break;
1484 case kCondAE:
1485 true_high_cond = kCondA;
1486 break;
1487 }
1488
1489 if (right.IsConstant()) {
1490 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1491 int32_t val_high = High32Bits(value);
1492 int32_t val_low = Low32Bits(value);
1493
1494 codegen_->Compare32BitValue(left_high, val_high);
1495 if (if_cond == kCondNE) {
1496 __ j(X86Condition(true_high_cond), true_label);
1497 } else if (if_cond == kCondEQ) {
1498 __ j(X86Condition(false_high_cond), false_label);
1499 } else {
1500 __ j(X86Condition(true_high_cond), true_label);
1501 __ j(X86Condition(false_high_cond), false_label);
1502 }
1503 // Must be equal high, so compare the lows.
1504 codegen_->Compare32BitValue(left_low, val_low);
1505 } else if (right.IsRegisterPair()) {
1506 Register right_high = right.AsRegisterPairHigh<Register>();
1507 Register right_low = right.AsRegisterPairLow<Register>();
1508
1509 __ cmpl(left_high, right_high);
1510 if (if_cond == kCondNE) {
1511 __ j(X86Condition(true_high_cond), true_label);
1512 } else if (if_cond == kCondEQ) {
1513 __ j(X86Condition(false_high_cond), false_label);
1514 } else {
1515 __ j(X86Condition(true_high_cond), true_label);
1516 __ j(X86Condition(false_high_cond), false_label);
1517 }
1518 // Must be equal high, so compare the lows.
1519 __ cmpl(left_low, right_low);
1520 } else {
1521 DCHECK(right.IsDoubleStackSlot());
1522 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1523 if (if_cond == kCondNE) {
1524 __ j(X86Condition(true_high_cond), true_label);
1525 } else if (if_cond == kCondEQ) {
1526 __ j(X86Condition(false_high_cond), false_label);
1527 } else {
1528 __ j(X86Condition(true_high_cond), true_label);
1529 __ j(X86Condition(false_high_cond), false_label);
1530 }
1531 // Must be equal high, so compare the lows.
1532 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1533 }
1534 // The last comparison might be unsigned.
1535 __ j(final_condition, true_label);
1536 }
1537
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1538 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1539 Location rhs,
1540 HInstruction* insn,
1541 bool is_double) {
1542 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1543 if (is_double) {
1544 if (rhs.IsFpuRegister()) {
1545 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1546 } else if (const_area != nullptr) {
1547 DCHECK(const_area->IsEmittedAtUseSite());
1548 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1549 codegen_->LiteralDoubleAddress(
1550 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1551 const_area->GetBaseMethodAddress(),
1552 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1553 } else {
1554 DCHECK(rhs.IsDoubleStackSlot());
1555 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1556 }
1557 } else {
1558 if (rhs.IsFpuRegister()) {
1559 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1560 } else if (const_area != nullptr) {
1561 DCHECK(const_area->IsEmittedAtUseSite());
1562 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
1563 codegen_->LiteralFloatAddress(
1564 const_area->GetConstant()->AsFloatConstant()->GetValue(),
1565 const_area->GetBaseMethodAddress(),
1566 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1567 } else {
1568 DCHECK(rhs.IsStackSlot());
1569 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1570 }
1571 }
1572 }
1573
1574 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1575 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
1576 LabelType* true_target_in,
1577 LabelType* false_target_in) {
1578 // Generated branching requires both targets to be explicit. If either of the
1579 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1580 LabelType fallthrough_target;
1581 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1582 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1583
1584 LocationSummary* locations = condition->GetLocations();
1585 Location left = locations->InAt(0);
1586 Location right = locations->InAt(1);
1587
1588 DataType::Type type = condition->InputAt(0)->GetType();
1589 switch (type) {
1590 case DataType::Type::kInt64:
1591 GenerateLongComparesAndJumps(condition, true_target, false_target);
1592 break;
1593 case DataType::Type::kFloat32:
1594 GenerateFPCompare(left, right, condition, false);
1595 GenerateFPJumps(condition, true_target, false_target);
1596 break;
1597 case DataType::Type::kFloat64:
1598 GenerateFPCompare(left, right, condition, true);
1599 GenerateFPJumps(condition, true_target, false_target);
1600 break;
1601 default:
1602 LOG(FATAL) << "Unexpected compare type " << type;
1603 }
1604
1605 if (false_target != &fallthrough_target) {
1606 __ jmp(false_target);
1607 }
1608
1609 if (fallthrough_target.IsLinked()) {
1610 __ Bind(&fallthrough_target);
1611 }
1612 }
1613
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1614 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1615 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1616 // are set only strictly before `branch`. We can't use the eflags on long/FP
1617 // conditions if they are materialized due to the complex branching.
1618 return cond->IsCondition() &&
1619 cond->GetNext() == branch &&
1620 cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
1621 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1622 }
1623
1624 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1625 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
1626 size_t condition_input_index,
1627 LabelType* true_target,
1628 LabelType* false_target) {
1629 HInstruction* cond = instruction->InputAt(condition_input_index);
1630
1631 if (true_target == nullptr && false_target == nullptr) {
1632 // Nothing to do. The code always falls through.
1633 return;
1634 } else if (cond->IsIntConstant()) {
1635 // Constant condition, statically compared against "true" (integer value 1).
1636 if (cond->AsIntConstant()->IsTrue()) {
1637 if (true_target != nullptr) {
1638 __ jmp(true_target);
1639 }
1640 } else {
1641 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1642 if (false_target != nullptr) {
1643 __ jmp(false_target);
1644 }
1645 }
1646 return;
1647 }
1648
1649 // The following code generates these patterns:
1650 // (1) true_target == nullptr && false_target != nullptr
1651 // - opposite condition true => branch to false_target
1652 // (2) true_target != nullptr && false_target == nullptr
1653 // - condition true => branch to true_target
1654 // (3) true_target != nullptr && false_target != nullptr
1655 // - condition true => branch to true_target
1656 // - branch to false_target
1657 if (IsBooleanValueOrMaterializedCondition(cond)) {
1658 if (AreEflagsSetFrom(cond, instruction)) {
1659 if (true_target == nullptr) {
1660 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
1661 } else {
1662 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
1663 }
1664 } else {
1665 // Materialized condition, compare against 0.
1666 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1667 if (lhs.IsRegister()) {
1668 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
1669 } else {
1670 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
1671 }
1672 if (true_target == nullptr) {
1673 __ j(kEqual, false_target);
1674 } else {
1675 __ j(kNotEqual, true_target);
1676 }
1677 }
1678 } else {
1679 // Condition has not been materialized, use its inputs as the comparison and
1680 // its condition as the branch condition.
1681 HCondition* condition = cond->AsCondition();
1682
1683 // If this is a long or FP comparison that has been folded into
1684 // the HCondition, generate the comparison directly.
1685 DataType::Type type = condition->InputAt(0)->GetType();
1686 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1687 GenerateCompareTestAndBranch(condition, true_target, false_target);
1688 return;
1689 }
1690
1691 Location lhs = condition->GetLocations()->InAt(0);
1692 Location rhs = condition->GetLocations()->InAt(1);
1693 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
1694 codegen_->GenerateIntCompare(lhs, rhs);
1695 if (true_target == nullptr) {
1696 __ j(X86Condition(condition->GetOppositeCondition()), false_target);
1697 } else {
1698 __ j(X86Condition(condition->GetCondition()), true_target);
1699 }
1700 }
1701
1702 // If neither branch falls through (case 3), the conditional branch to `true_target`
1703 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1704 if (true_target != nullptr && false_target != nullptr) {
1705 __ jmp(false_target);
1706 }
1707 }
1708
VisitIf(HIf * if_instr)1709 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
1710 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1711 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1712 locations->SetInAt(0, Location::Any());
1713 }
1714 }
1715
VisitIf(HIf * if_instr)1716 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
1717 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1718 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1719 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1720 nullptr : codegen_->GetLabelOf(true_successor);
1721 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1722 nullptr : codegen_->GetLabelOf(false_successor);
1723 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1724 }
1725
VisitDeoptimize(HDeoptimize * deoptimize)1726 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1727 LocationSummary* locations = new (GetGraph()->GetAllocator())
1728 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1729 InvokeRuntimeCallingConvention calling_convention;
1730 RegisterSet caller_saves = RegisterSet::Empty();
1731 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1732 locations->SetCustomSlowPathCallerSaves(caller_saves);
1733 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1734 locations->SetInAt(0, Location::Any());
1735 }
1736 }
1737
VisitDeoptimize(HDeoptimize * deoptimize)1738 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1739 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
1740 GenerateTestAndBranch<Label>(deoptimize,
1741 /* condition_input_index= */ 0,
1742 slow_path->GetEntryLabel(),
1743 /* false_target= */ nullptr);
1744 }
1745
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1746 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1747 LocationSummary* locations = new (GetGraph()->GetAllocator())
1748 LocationSummary(flag, LocationSummary::kNoCall);
1749 locations->SetOut(Location::RequiresRegister());
1750 }
1751
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1752 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1753 __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
1754 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1755 }
1756
SelectCanUseCMOV(HSelect * select)1757 static bool SelectCanUseCMOV(HSelect* select) {
1758 // There are no conditional move instructions for XMMs.
1759 if (DataType::IsFloatingPointType(select->GetType())) {
1760 return false;
1761 }
1762
1763 // A FP condition doesn't generate the single CC that we need.
1764 // In 32 bit mode, a long condition doesn't generate a single CC either.
1765 HInstruction* condition = select->GetCondition();
1766 if (condition->IsCondition()) {
1767 DataType::Type compare_type = condition->InputAt(0)->GetType();
1768 if (compare_type == DataType::Type::kInt64 ||
1769 DataType::IsFloatingPointType(compare_type)) {
1770 return false;
1771 }
1772 }
1773
1774 // We can generate a CMOV for this Select.
1775 return true;
1776 }
1777
VisitSelect(HSelect * select)1778 void LocationsBuilderX86::VisitSelect(HSelect* select) {
1779 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1780 if (DataType::IsFloatingPointType(select->GetType())) {
1781 locations->SetInAt(0, Location::RequiresFpuRegister());
1782 locations->SetInAt(1, Location::Any());
1783 } else {
1784 locations->SetInAt(0, Location::RequiresRegister());
1785 if (SelectCanUseCMOV(select)) {
1786 if (select->InputAt(1)->IsConstant()) {
1787 // Cmov can't handle a constant value.
1788 locations->SetInAt(1, Location::RequiresRegister());
1789 } else {
1790 locations->SetInAt(1, Location::Any());
1791 }
1792 } else {
1793 locations->SetInAt(1, Location::Any());
1794 }
1795 }
1796 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1797 locations->SetInAt(2, Location::RequiresRegister());
1798 }
1799 locations->SetOut(Location::SameAsFirstInput());
1800 }
1801
VisitSelect(HSelect * select)1802 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
1803 LocationSummary* locations = select->GetLocations();
1804 DCHECK(locations->InAt(0).Equals(locations->Out()));
1805 if (SelectCanUseCMOV(select)) {
1806 // If both the condition and the source types are integer, we can generate
1807 // a CMOV to implement Select.
1808
1809 HInstruction* select_condition = select->GetCondition();
1810 Condition cond = kNotEqual;
1811
1812 // Figure out how to test the 'condition'.
1813 if (select_condition->IsCondition()) {
1814 HCondition* condition = select_condition->AsCondition();
1815 if (!condition->IsEmittedAtUseSite()) {
1816 // This was a previously materialized condition.
1817 // Can we use the existing condition code?
1818 if (AreEflagsSetFrom(condition, select)) {
1819 // Materialization was the previous instruction. Condition codes are right.
1820 cond = X86Condition(condition->GetCondition());
1821 } else {
1822 // No, we have to recreate the condition code.
1823 Register cond_reg = locations->InAt(2).AsRegister<Register>();
1824 __ testl(cond_reg, cond_reg);
1825 }
1826 } else {
1827 // We can't handle FP or long here.
1828 DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
1829 DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
1830 LocationSummary* cond_locations = condition->GetLocations();
1831 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
1832 cond = X86Condition(condition->GetCondition());
1833 }
1834 } else {
1835 // Must be a Boolean condition, which needs to be compared to 0.
1836 Register cond_reg = locations->InAt(2).AsRegister<Register>();
1837 __ testl(cond_reg, cond_reg);
1838 }
1839
1840 // If the condition is true, overwrite the output, which already contains false.
1841 Location false_loc = locations->InAt(0);
1842 Location true_loc = locations->InAt(1);
1843 if (select->GetType() == DataType::Type::kInt64) {
1844 // 64 bit conditional move.
1845 Register false_high = false_loc.AsRegisterPairHigh<Register>();
1846 Register false_low = false_loc.AsRegisterPairLow<Register>();
1847 if (true_loc.IsRegisterPair()) {
1848 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
1849 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
1850 } else {
1851 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
1852 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
1853 }
1854 } else {
1855 // 32 bit conditional move.
1856 Register false_reg = false_loc.AsRegister<Register>();
1857 if (true_loc.IsRegister()) {
1858 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
1859 } else {
1860 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
1861 }
1862 }
1863 } else {
1864 NearLabel false_target;
1865 GenerateTestAndBranch<NearLabel>(
1866 select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
1867 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1868 __ Bind(&false_target);
1869 }
1870 }
1871
VisitNativeDebugInfo(HNativeDebugInfo * info)1872 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1873 new (GetGraph()->GetAllocator()) LocationSummary(info);
1874 }
1875
VisitNativeDebugInfo(HNativeDebugInfo *)1876 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
1877 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1878 }
1879
GenerateNop()1880 void CodeGeneratorX86::GenerateNop() {
1881 __ nop();
1882 }
1883
HandleCondition(HCondition * cond)1884 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
1885 LocationSummary* locations =
1886 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
1887 // Handle the long/FP comparisons made in instruction simplification.
1888 switch (cond->InputAt(0)->GetType()) {
1889 case DataType::Type::kInt64: {
1890 locations->SetInAt(0, Location::RequiresRegister());
1891 locations->SetInAt(1, Location::Any());
1892 if (!cond->IsEmittedAtUseSite()) {
1893 locations->SetOut(Location::RequiresRegister());
1894 }
1895 break;
1896 }
1897 case DataType::Type::kFloat32:
1898 case DataType::Type::kFloat64: {
1899 locations->SetInAt(0, Location::RequiresFpuRegister());
1900 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
1901 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
1902 } else if (cond->InputAt(1)->IsConstant()) {
1903 locations->SetInAt(1, Location::RequiresFpuRegister());
1904 } else {
1905 locations->SetInAt(1, Location::Any());
1906 }
1907 if (!cond->IsEmittedAtUseSite()) {
1908 locations->SetOut(Location::RequiresRegister());
1909 }
1910 break;
1911 }
1912 default:
1913 locations->SetInAt(0, Location::RequiresRegister());
1914 locations->SetInAt(1, Location::Any());
1915 if (!cond->IsEmittedAtUseSite()) {
1916 // We need a byte register.
1917 locations->SetOut(Location::RegisterLocation(ECX));
1918 }
1919 break;
1920 }
1921 }
1922
HandleCondition(HCondition * cond)1923 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
1924 if (cond->IsEmittedAtUseSite()) {
1925 return;
1926 }
1927
1928 LocationSummary* locations = cond->GetLocations();
1929 Location lhs = locations->InAt(0);
1930 Location rhs = locations->InAt(1);
1931 Register reg = locations->Out().AsRegister<Register>();
1932 NearLabel true_label, false_label;
1933
1934 switch (cond->InputAt(0)->GetType()) {
1935 default: {
1936 // Integer case.
1937
1938 // Clear output register: setb only sets the low byte.
1939 __ xorl(reg, reg);
1940 codegen_->GenerateIntCompare(lhs, rhs);
1941 __ setb(X86Condition(cond->GetCondition()), reg);
1942 return;
1943 }
1944 case DataType::Type::kInt64:
1945 GenerateLongComparesAndJumps(cond, &true_label, &false_label);
1946 break;
1947 case DataType::Type::kFloat32:
1948 GenerateFPCompare(lhs, rhs, cond, false);
1949 GenerateFPJumps(cond, &true_label, &false_label);
1950 break;
1951 case DataType::Type::kFloat64:
1952 GenerateFPCompare(lhs, rhs, cond, true);
1953 GenerateFPJumps(cond, &true_label, &false_label);
1954 break;
1955 }
1956
1957 // Convert the jumps into the result.
1958 NearLabel done_label;
1959
1960 // False case: result = 0.
1961 __ Bind(&false_label);
1962 __ xorl(reg, reg);
1963 __ jmp(&done_label);
1964
1965 // True case: result = 1.
1966 __ Bind(&true_label);
1967 __ movl(reg, Immediate(1));
1968 __ Bind(&done_label);
1969 }
1970
VisitEqual(HEqual * comp)1971 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
1972 HandleCondition(comp);
1973 }
1974
VisitEqual(HEqual * comp)1975 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
1976 HandleCondition(comp);
1977 }
1978
VisitNotEqual(HNotEqual * comp)1979 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
1980 HandleCondition(comp);
1981 }
1982
VisitNotEqual(HNotEqual * comp)1983 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
1984 HandleCondition(comp);
1985 }
1986
VisitLessThan(HLessThan * comp)1987 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
1988 HandleCondition(comp);
1989 }
1990
VisitLessThan(HLessThan * comp)1991 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
1992 HandleCondition(comp);
1993 }
1994
VisitLessThanOrEqual(HLessThanOrEqual * comp)1995 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1996 HandleCondition(comp);
1997 }
1998
VisitLessThanOrEqual(HLessThanOrEqual * comp)1999 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2000 HandleCondition(comp);
2001 }
2002
VisitGreaterThan(HGreaterThan * comp)2003 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2004 HandleCondition(comp);
2005 }
2006
VisitGreaterThan(HGreaterThan * comp)2007 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2008 HandleCondition(comp);
2009 }
2010
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2011 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2012 HandleCondition(comp);
2013 }
2014
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2015 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2016 HandleCondition(comp);
2017 }
2018
VisitBelow(HBelow * comp)2019 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2020 HandleCondition(comp);
2021 }
2022
VisitBelow(HBelow * comp)2023 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2024 HandleCondition(comp);
2025 }
2026
VisitBelowOrEqual(HBelowOrEqual * comp)2027 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2028 HandleCondition(comp);
2029 }
2030
VisitBelowOrEqual(HBelowOrEqual * comp)2031 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2032 HandleCondition(comp);
2033 }
2034
VisitAbove(HAbove * comp)2035 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2036 HandleCondition(comp);
2037 }
2038
VisitAbove(HAbove * comp)2039 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2040 HandleCondition(comp);
2041 }
2042
VisitAboveOrEqual(HAboveOrEqual * comp)2043 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2044 HandleCondition(comp);
2045 }
2046
VisitAboveOrEqual(HAboveOrEqual * comp)2047 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2048 HandleCondition(comp);
2049 }
2050
VisitIntConstant(HIntConstant * constant)2051 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2052 LocationSummary* locations =
2053 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2054 locations->SetOut(Location::ConstantLocation(constant));
2055 }
2056
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2057 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2058 // Will be generated at use site.
2059 }
2060
VisitNullConstant(HNullConstant * constant)2061 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2062 LocationSummary* locations =
2063 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2064 locations->SetOut(Location::ConstantLocation(constant));
2065 }
2066
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2067 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2068 // Will be generated at use site.
2069 }
2070
VisitLongConstant(HLongConstant * constant)2071 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2072 LocationSummary* locations =
2073 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2074 locations->SetOut(Location::ConstantLocation(constant));
2075 }
2076
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2077 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2078 // Will be generated at use site.
2079 }
2080
VisitFloatConstant(HFloatConstant * constant)2081 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2082 LocationSummary* locations =
2083 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2084 locations->SetOut(Location::ConstantLocation(constant));
2085 }
2086
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2087 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2088 // Will be generated at use site.
2089 }
2090
VisitDoubleConstant(HDoubleConstant * constant)2091 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2092 LocationSummary* locations =
2093 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2094 locations->SetOut(Location::ConstantLocation(constant));
2095 }
2096
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2097 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2098 // Will be generated at use site.
2099 }
2100
VisitConstructorFence(HConstructorFence * constructor_fence)2101 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2102 constructor_fence->SetLocations(nullptr);
2103 }
2104
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2105 void InstructionCodeGeneratorX86::VisitConstructorFence(
2106 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2107 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2108 }
2109
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2110 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2111 memory_barrier->SetLocations(nullptr);
2112 }
2113
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2114 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2115 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2116 }
2117
VisitReturnVoid(HReturnVoid * ret)2118 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2119 ret->SetLocations(nullptr);
2120 }
2121
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2122 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2123 codegen_->GenerateFrameExit();
2124 }
2125
VisitReturn(HReturn * ret)2126 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2127 LocationSummary* locations =
2128 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2129 switch (ret->InputAt(0)->GetType()) {
2130 case DataType::Type::kReference:
2131 case DataType::Type::kBool:
2132 case DataType::Type::kUint8:
2133 case DataType::Type::kInt8:
2134 case DataType::Type::kUint16:
2135 case DataType::Type::kInt16:
2136 case DataType::Type::kInt32:
2137 locations->SetInAt(0, Location::RegisterLocation(EAX));
2138 break;
2139
2140 case DataType::Type::kInt64:
2141 locations->SetInAt(
2142 0, Location::RegisterPairLocation(EAX, EDX));
2143 break;
2144
2145 case DataType::Type::kFloat32:
2146 case DataType::Type::kFloat64:
2147 locations->SetInAt(
2148 0, Location::FpuRegisterLocation(XMM0));
2149 break;
2150
2151 default:
2152 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2153 }
2154 }
2155
VisitReturn(HReturn * ret)2156 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2157 if (kIsDebugBuild) {
2158 switch (ret->InputAt(0)->GetType()) {
2159 case DataType::Type::kReference:
2160 case DataType::Type::kBool:
2161 case DataType::Type::kUint8:
2162 case DataType::Type::kInt8:
2163 case DataType::Type::kUint16:
2164 case DataType::Type::kInt16:
2165 case DataType::Type::kInt32:
2166 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2167 break;
2168
2169 case DataType::Type::kInt64:
2170 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2171 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2172 break;
2173
2174 case DataType::Type::kFloat32:
2175 case DataType::Type::kFloat64:
2176 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2177 break;
2178
2179 default:
2180 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2181 }
2182 }
2183 codegen_->GenerateFrameExit();
2184 }
2185
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2186 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2187 // The trampoline uses the same calling convention as dex calling conventions,
2188 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2189 // the method_idx.
2190 HandleInvoke(invoke);
2191 }
2192
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2193 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2194 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2195 }
2196
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2197 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2198 // Explicit clinit checks triggered by static invokes must have been pruned by
2199 // art::PrepareForRegisterAllocation.
2200 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2201
2202 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2203 if (intrinsic.TryDispatch(invoke)) {
2204 if (invoke->GetLocations()->CanCall() &&
2205 invoke->HasPcRelativeMethodLoadKind() &&
2206 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2207 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2208 }
2209 return;
2210 }
2211
2212 HandleInvoke(invoke);
2213
2214 // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
2215 if (invoke->HasPcRelativeMethodLoadKind()) {
2216 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2217 }
2218 }
2219
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2220 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2221 if (invoke->GetLocations()->Intrinsified()) {
2222 IntrinsicCodeGeneratorX86 intrinsic(codegen);
2223 intrinsic.Dispatch(invoke);
2224 return true;
2225 }
2226 return false;
2227 }
2228
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2229 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2230 // Explicit clinit checks triggered by static invokes must have been pruned by
2231 // art::PrepareForRegisterAllocation.
2232 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2233
2234 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2235 return;
2236 }
2237
2238 LocationSummary* locations = invoke->GetLocations();
2239 codegen_->GenerateStaticOrDirectCall(
2240 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2241 }
2242
VisitInvokeVirtual(HInvokeVirtual * invoke)2243 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2244 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2245 if (intrinsic.TryDispatch(invoke)) {
2246 return;
2247 }
2248
2249 HandleInvoke(invoke);
2250 }
2251
HandleInvoke(HInvoke * invoke)2252 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2253 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2254 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2255 }
2256
VisitInvokeVirtual(HInvokeVirtual * invoke)2257 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2258 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2259 return;
2260 }
2261
2262 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2263 DCHECK(!codegen_->IsLeafMethod());
2264 }
2265
VisitInvokeInterface(HInvokeInterface * invoke)2266 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2267 // This call to HandleInvoke allocates a temporary (core) register
2268 // which is also used to transfer the hidden argument from FP to
2269 // core register.
2270 HandleInvoke(invoke);
2271 // Add the hidden argument.
2272 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2273 }
2274
VisitInvokeInterface(HInvokeInterface * invoke)2275 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2276 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2277 LocationSummary* locations = invoke->GetLocations();
2278 Register temp = locations->GetTemp(0).AsRegister<Register>();
2279 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2280 Location receiver = locations->InAt(0);
2281 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2282
2283 // Set the hidden argument. This is safe to do this here, as XMM7
2284 // won't be modified thereafter, before the `call` instruction.
2285 DCHECK_EQ(XMM7, hidden_reg);
2286 __ movl(temp, Immediate(invoke->GetDexMethodIndex()));
2287 __ movd(hidden_reg, temp);
2288
2289 if (receiver.IsStackSlot()) {
2290 __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2291 // /* HeapReference<Class> */ temp = temp->klass_
2292 __ movl(temp, Address(temp, class_offset));
2293 } else {
2294 // /* HeapReference<Class> */ temp = receiver->klass_
2295 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2296 }
2297 codegen_->MaybeRecordImplicitNullCheck(invoke);
2298 // Instead of simply (possibly) unpoisoning `temp` here, we should
2299 // emit a read barrier for the previous class reference load.
2300 // However this is not required in practice, as this is an
2301 // intermediate/temporary reference and because the current
2302 // concurrent copying collector keeps the from-space memory
2303 // intact/accessible until the end of the marking phase (the
2304 // concurrent copying collector may not in the future).
2305 __ MaybeUnpoisonHeapReference(temp);
2306 // temp = temp->GetAddressOfIMT()
2307 __ movl(temp,
2308 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2309 // temp = temp->GetImtEntryAt(method_offset);
2310 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2311 invoke->GetImtIndex(), kX86PointerSize));
2312 __ movl(temp, Address(temp, method_offset));
2313 // call temp->GetEntryPoint();
2314 __ call(Address(temp,
2315 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2316
2317 DCHECK(!codegen_->IsLeafMethod());
2318 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2319 }
2320
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2321 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2322 HandleInvoke(invoke);
2323 }
2324
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2325 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2326 codegen_->GenerateInvokePolymorphicCall(invoke);
2327 }
2328
VisitInvokeCustom(HInvokeCustom * invoke)2329 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2330 HandleInvoke(invoke);
2331 }
2332
VisitInvokeCustom(HInvokeCustom * invoke)2333 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2334 codegen_->GenerateInvokeCustomCall(invoke);
2335 }
2336
VisitNeg(HNeg * neg)2337 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2338 LocationSummary* locations =
2339 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2340 switch (neg->GetResultType()) {
2341 case DataType::Type::kInt32:
2342 case DataType::Type::kInt64:
2343 locations->SetInAt(0, Location::RequiresRegister());
2344 locations->SetOut(Location::SameAsFirstInput());
2345 break;
2346
2347 case DataType::Type::kFloat32:
2348 locations->SetInAt(0, Location::RequiresFpuRegister());
2349 locations->SetOut(Location::SameAsFirstInput());
2350 locations->AddTemp(Location::RequiresRegister());
2351 locations->AddTemp(Location::RequiresFpuRegister());
2352 break;
2353
2354 case DataType::Type::kFloat64:
2355 locations->SetInAt(0, Location::RequiresFpuRegister());
2356 locations->SetOut(Location::SameAsFirstInput());
2357 locations->AddTemp(Location::RequiresFpuRegister());
2358 break;
2359
2360 default:
2361 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2362 }
2363 }
2364
VisitNeg(HNeg * neg)2365 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2366 LocationSummary* locations = neg->GetLocations();
2367 Location out = locations->Out();
2368 Location in = locations->InAt(0);
2369 switch (neg->GetResultType()) {
2370 case DataType::Type::kInt32:
2371 DCHECK(in.IsRegister());
2372 DCHECK(in.Equals(out));
2373 __ negl(out.AsRegister<Register>());
2374 break;
2375
2376 case DataType::Type::kInt64:
2377 DCHECK(in.IsRegisterPair());
2378 DCHECK(in.Equals(out));
2379 __ negl(out.AsRegisterPairLow<Register>());
2380 // Negation is similar to subtraction from zero. The least
2381 // significant byte triggers a borrow when it is different from
2382 // zero; to take it into account, add 1 to the most significant
2383 // byte if the carry flag (CF) is set to 1 after the first NEGL
2384 // operation.
2385 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2386 __ negl(out.AsRegisterPairHigh<Register>());
2387 break;
2388
2389 case DataType::Type::kFloat32: {
2390 DCHECK(in.Equals(out));
2391 Register constant = locations->GetTemp(0).AsRegister<Register>();
2392 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2393 // Implement float negation with an exclusive or with value
2394 // 0x80000000 (mask for bit 31, representing the sign of a
2395 // single-precision floating-point number).
2396 __ movl(constant, Immediate(INT32_C(0x80000000)));
2397 __ movd(mask, constant);
2398 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2399 break;
2400 }
2401
2402 case DataType::Type::kFloat64: {
2403 DCHECK(in.Equals(out));
2404 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2405 // Implement double negation with an exclusive or with value
2406 // 0x8000000000000000 (mask for bit 63, representing the sign of
2407 // a double-precision floating-point number).
2408 __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2409 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2410 break;
2411 }
2412
2413 default:
2414 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2415 }
2416 }
2417
VisitX86FPNeg(HX86FPNeg * neg)2418 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2419 LocationSummary* locations =
2420 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2421 DCHECK(DataType::IsFloatingPointType(neg->GetType()));
2422 locations->SetInAt(0, Location::RequiresFpuRegister());
2423 locations->SetInAt(1, Location::RequiresRegister());
2424 locations->SetOut(Location::SameAsFirstInput());
2425 locations->AddTemp(Location::RequiresFpuRegister());
2426 }
2427
VisitX86FPNeg(HX86FPNeg * neg)2428 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2429 LocationSummary* locations = neg->GetLocations();
2430 Location out = locations->Out();
2431 DCHECK(locations->InAt(0).Equals(out));
2432
2433 Register constant_area = locations->InAt(1).AsRegister<Register>();
2434 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2435 if (neg->GetType() == DataType::Type::kFloat32) {
2436 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2437 neg->GetBaseMethodAddress(),
2438 constant_area));
2439 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2440 } else {
2441 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2442 neg->GetBaseMethodAddress(),
2443 constant_area));
2444 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2445 }
2446 }
2447
VisitTypeConversion(HTypeConversion * conversion)2448 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2449 DataType::Type result_type = conversion->GetResultType();
2450 DataType::Type input_type = conversion->GetInputType();
2451 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2452 << input_type << " -> " << result_type;
2453
2454 // The float-to-long and double-to-long type conversions rely on a
2455 // call to the runtime.
2456 LocationSummary::CallKind call_kind =
2457 ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
2458 && result_type == DataType::Type::kInt64)
2459 ? LocationSummary::kCallOnMainOnly
2460 : LocationSummary::kNoCall;
2461 LocationSummary* locations =
2462 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
2463
2464 switch (result_type) {
2465 case DataType::Type::kUint8:
2466 case DataType::Type::kInt8:
2467 switch (input_type) {
2468 case DataType::Type::kUint8:
2469 case DataType::Type::kInt8:
2470 case DataType::Type::kUint16:
2471 case DataType::Type::kInt16:
2472 case DataType::Type::kInt32:
2473 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2474 // Make the output overlap to please the register allocator. This greatly simplifies
2475 // the validation of the linear scan implementation
2476 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2477 break;
2478 case DataType::Type::kInt64: {
2479 HInstruction* input = conversion->InputAt(0);
2480 Location input_location = input->IsConstant()
2481 ? Location::ConstantLocation(input->AsConstant())
2482 : Location::RegisterPairLocation(EAX, EDX);
2483 locations->SetInAt(0, input_location);
2484 // Make the output overlap to please the register allocator. This greatly simplifies
2485 // the validation of the linear scan implementation
2486 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2487 break;
2488 }
2489
2490 default:
2491 LOG(FATAL) << "Unexpected type conversion from " << input_type
2492 << " to " << result_type;
2493 }
2494 break;
2495
2496 case DataType::Type::kUint16:
2497 case DataType::Type::kInt16:
2498 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2499 locations->SetInAt(0, Location::Any());
2500 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2501 break;
2502
2503 case DataType::Type::kInt32:
2504 switch (input_type) {
2505 case DataType::Type::kInt64:
2506 locations->SetInAt(0, Location::Any());
2507 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2508 break;
2509
2510 case DataType::Type::kFloat32:
2511 locations->SetInAt(0, Location::RequiresFpuRegister());
2512 locations->SetOut(Location::RequiresRegister());
2513 locations->AddTemp(Location::RequiresFpuRegister());
2514 break;
2515
2516 case DataType::Type::kFloat64:
2517 locations->SetInAt(0, Location::RequiresFpuRegister());
2518 locations->SetOut(Location::RequiresRegister());
2519 locations->AddTemp(Location::RequiresFpuRegister());
2520 break;
2521
2522 default:
2523 LOG(FATAL) << "Unexpected type conversion from " << input_type
2524 << " to " << result_type;
2525 }
2526 break;
2527
2528 case DataType::Type::kInt64:
2529 switch (input_type) {
2530 case DataType::Type::kBool:
2531 case DataType::Type::kUint8:
2532 case DataType::Type::kInt8:
2533 case DataType::Type::kUint16:
2534 case DataType::Type::kInt16:
2535 case DataType::Type::kInt32:
2536 locations->SetInAt(0, Location::RegisterLocation(EAX));
2537 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2538 break;
2539
2540 case DataType::Type::kFloat32:
2541 case DataType::Type::kFloat64: {
2542 InvokeRuntimeCallingConvention calling_convention;
2543 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
2544 locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
2545
2546 // The runtime helper puts the result in EAX, EDX.
2547 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2548 }
2549 break;
2550
2551 default:
2552 LOG(FATAL) << "Unexpected type conversion from " << input_type
2553 << " to " << result_type;
2554 }
2555 break;
2556
2557 case DataType::Type::kFloat32:
2558 switch (input_type) {
2559 case DataType::Type::kBool:
2560 case DataType::Type::kUint8:
2561 case DataType::Type::kInt8:
2562 case DataType::Type::kUint16:
2563 case DataType::Type::kInt16:
2564 case DataType::Type::kInt32:
2565 locations->SetInAt(0, Location::RequiresRegister());
2566 locations->SetOut(Location::RequiresFpuRegister());
2567 break;
2568
2569 case DataType::Type::kInt64:
2570 locations->SetInAt(0, Location::Any());
2571 locations->SetOut(Location::Any());
2572 break;
2573
2574 case DataType::Type::kFloat64:
2575 locations->SetInAt(0, Location::RequiresFpuRegister());
2576 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2577 break;
2578
2579 default:
2580 LOG(FATAL) << "Unexpected type conversion from " << input_type
2581 << " to " << result_type;
2582 }
2583 break;
2584
2585 case DataType::Type::kFloat64:
2586 switch (input_type) {
2587 case DataType::Type::kBool:
2588 case DataType::Type::kUint8:
2589 case DataType::Type::kInt8:
2590 case DataType::Type::kUint16:
2591 case DataType::Type::kInt16:
2592 case DataType::Type::kInt32:
2593 locations->SetInAt(0, Location::RequiresRegister());
2594 locations->SetOut(Location::RequiresFpuRegister());
2595 break;
2596
2597 case DataType::Type::kInt64:
2598 locations->SetInAt(0, Location::Any());
2599 locations->SetOut(Location::Any());
2600 break;
2601
2602 case DataType::Type::kFloat32:
2603 locations->SetInAt(0, Location::RequiresFpuRegister());
2604 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2605 break;
2606
2607 default:
2608 LOG(FATAL) << "Unexpected type conversion from " << input_type
2609 << " to " << result_type;
2610 }
2611 break;
2612
2613 default:
2614 LOG(FATAL) << "Unexpected type conversion from " << input_type
2615 << " to " << result_type;
2616 }
2617 }
2618
VisitTypeConversion(HTypeConversion * conversion)2619 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
2620 LocationSummary* locations = conversion->GetLocations();
2621 Location out = locations->Out();
2622 Location in = locations->InAt(0);
2623 DataType::Type result_type = conversion->GetResultType();
2624 DataType::Type input_type = conversion->GetInputType();
2625 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2626 << input_type << " -> " << result_type;
2627 switch (result_type) {
2628 case DataType::Type::kUint8:
2629 switch (input_type) {
2630 case DataType::Type::kInt8:
2631 case DataType::Type::kUint16:
2632 case DataType::Type::kInt16:
2633 case DataType::Type::kInt32:
2634 if (in.IsRegister()) {
2635 __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2636 } else {
2637 DCHECK(in.GetConstant()->IsIntConstant());
2638 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2639 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2640 }
2641 break;
2642 case DataType::Type::kInt64:
2643 if (in.IsRegisterPair()) {
2644 __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2645 } else {
2646 DCHECK(in.GetConstant()->IsLongConstant());
2647 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2648 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2649 }
2650 break;
2651
2652 default:
2653 LOG(FATAL) << "Unexpected type conversion from " << input_type
2654 << " to " << result_type;
2655 }
2656 break;
2657
2658 case DataType::Type::kInt8:
2659 switch (input_type) {
2660 case DataType::Type::kUint8:
2661 case DataType::Type::kUint16:
2662 case DataType::Type::kInt16:
2663 case DataType::Type::kInt32:
2664 if (in.IsRegister()) {
2665 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2666 } else {
2667 DCHECK(in.GetConstant()->IsIntConstant());
2668 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2669 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2670 }
2671 break;
2672 case DataType::Type::kInt64:
2673 if (in.IsRegisterPair()) {
2674 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2675 } else {
2676 DCHECK(in.GetConstant()->IsLongConstant());
2677 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2678 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2679 }
2680 break;
2681
2682 default:
2683 LOG(FATAL) << "Unexpected type conversion from " << input_type
2684 << " to " << result_type;
2685 }
2686 break;
2687
2688 case DataType::Type::kUint16:
2689 switch (input_type) {
2690 case DataType::Type::kInt8:
2691 case DataType::Type::kInt16:
2692 case DataType::Type::kInt32:
2693 if (in.IsRegister()) {
2694 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2695 } else if (in.IsStackSlot()) {
2696 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2697 } else {
2698 DCHECK(in.GetConstant()->IsIntConstant());
2699 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2700 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2701 }
2702 break;
2703 case DataType::Type::kInt64:
2704 if (in.IsRegisterPair()) {
2705 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2706 } else if (in.IsDoubleStackSlot()) {
2707 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2708 } else {
2709 DCHECK(in.GetConstant()->IsLongConstant());
2710 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2711 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2712 }
2713 break;
2714
2715 default:
2716 LOG(FATAL) << "Unexpected type conversion from " << input_type
2717 << " to " << result_type;
2718 }
2719 break;
2720
2721 case DataType::Type::kInt16:
2722 switch (input_type) {
2723 case DataType::Type::kUint16:
2724 case DataType::Type::kInt32:
2725 if (in.IsRegister()) {
2726 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2727 } else if (in.IsStackSlot()) {
2728 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2729 } else {
2730 DCHECK(in.GetConstant()->IsIntConstant());
2731 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2732 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2733 }
2734 break;
2735 case DataType::Type::kInt64:
2736 if (in.IsRegisterPair()) {
2737 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2738 } else if (in.IsDoubleStackSlot()) {
2739 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2740 } else {
2741 DCHECK(in.GetConstant()->IsLongConstant());
2742 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2743 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2744 }
2745 break;
2746
2747 default:
2748 LOG(FATAL) << "Unexpected type conversion from " << input_type
2749 << " to " << result_type;
2750 }
2751 break;
2752
2753 case DataType::Type::kInt32:
2754 switch (input_type) {
2755 case DataType::Type::kInt64:
2756 if (in.IsRegisterPair()) {
2757 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2758 } else if (in.IsDoubleStackSlot()) {
2759 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2760 } else {
2761 DCHECK(in.IsConstant());
2762 DCHECK(in.GetConstant()->IsLongConstant());
2763 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2764 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
2765 }
2766 break;
2767
2768 case DataType::Type::kFloat32: {
2769 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2770 Register output = out.AsRegister<Register>();
2771 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2772 NearLabel done, nan;
2773
2774 __ movl(output, Immediate(kPrimIntMax));
2775 // temp = int-to-float(output)
2776 __ cvtsi2ss(temp, output);
2777 // if input >= temp goto done
2778 __ comiss(input, temp);
2779 __ j(kAboveEqual, &done);
2780 // if input == NaN goto nan
2781 __ j(kUnordered, &nan);
2782 // output = float-to-int-truncate(input)
2783 __ cvttss2si(output, input);
2784 __ jmp(&done);
2785 __ Bind(&nan);
2786 // output = 0
2787 __ xorl(output, output);
2788 __ Bind(&done);
2789 break;
2790 }
2791
2792 case DataType::Type::kFloat64: {
2793 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2794 Register output = out.AsRegister<Register>();
2795 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2796 NearLabel done, nan;
2797
2798 __ movl(output, Immediate(kPrimIntMax));
2799 // temp = int-to-double(output)
2800 __ cvtsi2sd(temp, output);
2801 // if input >= temp goto done
2802 __ comisd(input, temp);
2803 __ j(kAboveEqual, &done);
2804 // if input == NaN goto nan
2805 __ j(kUnordered, &nan);
2806 // output = double-to-int-truncate(input)
2807 __ cvttsd2si(output, input);
2808 __ jmp(&done);
2809 __ Bind(&nan);
2810 // output = 0
2811 __ xorl(output, output);
2812 __ Bind(&done);
2813 break;
2814 }
2815
2816 default:
2817 LOG(FATAL) << "Unexpected type conversion from " << input_type
2818 << " to " << result_type;
2819 }
2820 break;
2821
2822 case DataType::Type::kInt64:
2823 switch (input_type) {
2824 case DataType::Type::kBool:
2825 case DataType::Type::kUint8:
2826 case DataType::Type::kInt8:
2827 case DataType::Type::kUint16:
2828 case DataType::Type::kInt16:
2829 case DataType::Type::kInt32:
2830 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
2831 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
2832 DCHECK_EQ(in.AsRegister<Register>(), EAX);
2833 __ cdq();
2834 break;
2835
2836 case DataType::Type::kFloat32:
2837 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
2838 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
2839 break;
2840
2841 case DataType::Type::kFloat64:
2842 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
2843 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
2844 break;
2845
2846 default:
2847 LOG(FATAL) << "Unexpected type conversion from " << input_type
2848 << " to " << result_type;
2849 }
2850 break;
2851
2852 case DataType::Type::kFloat32:
2853 switch (input_type) {
2854 case DataType::Type::kBool:
2855 case DataType::Type::kUint8:
2856 case DataType::Type::kInt8:
2857 case DataType::Type::kUint16:
2858 case DataType::Type::kInt16:
2859 case DataType::Type::kInt32:
2860 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
2861 break;
2862
2863 case DataType::Type::kInt64: {
2864 size_t adjustment = 0;
2865
2866 // Create stack space for the call to
2867 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
2868 // TODO: enhance register allocator to ask for stack temporaries.
2869 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
2870 adjustment = DataType::Size(DataType::Type::kInt64);
2871 __ subl(ESP, Immediate(adjustment));
2872 }
2873
2874 // Load the value to the FP stack, using temporaries if needed.
2875 PushOntoFPStack(in, 0, adjustment, false, true);
2876
2877 if (out.IsStackSlot()) {
2878 __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
2879 } else {
2880 __ fstps(Address(ESP, 0));
2881 Location stack_temp = Location::StackSlot(0);
2882 codegen_->Move32(out, stack_temp);
2883 }
2884
2885 // Remove the temporary stack space we allocated.
2886 if (adjustment != 0) {
2887 __ addl(ESP, Immediate(adjustment));
2888 }
2889 break;
2890 }
2891
2892 case DataType::Type::kFloat64:
2893 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2894 break;
2895
2896 default:
2897 LOG(FATAL) << "Unexpected type conversion from " << input_type
2898 << " to " << result_type;
2899 }
2900 break;
2901
2902 case DataType::Type::kFloat64:
2903 switch (input_type) {
2904 case DataType::Type::kBool:
2905 case DataType::Type::kUint8:
2906 case DataType::Type::kInt8:
2907 case DataType::Type::kUint16:
2908 case DataType::Type::kInt16:
2909 case DataType::Type::kInt32:
2910 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
2911 break;
2912
2913 case DataType::Type::kInt64: {
2914 size_t adjustment = 0;
2915
2916 // Create stack space for the call to
2917 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
2918 // TODO: enhance register allocator to ask for stack temporaries.
2919 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
2920 adjustment = DataType::Size(DataType::Type::kInt64);
2921 __ subl(ESP, Immediate(adjustment));
2922 }
2923
2924 // Load the value to the FP stack, using temporaries if needed.
2925 PushOntoFPStack(in, 0, adjustment, false, true);
2926
2927 if (out.IsDoubleStackSlot()) {
2928 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
2929 } else {
2930 __ fstpl(Address(ESP, 0));
2931 Location stack_temp = Location::DoubleStackSlot(0);
2932 codegen_->Move64(out, stack_temp);
2933 }
2934
2935 // Remove the temporary stack space we allocated.
2936 if (adjustment != 0) {
2937 __ addl(ESP, Immediate(adjustment));
2938 }
2939 break;
2940 }
2941
2942 case DataType::Type::kFloat32:
2943 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2944 break;
2945
2946 default:
2947 LOG(FATAL) << "Unexpected type conversion from " << input_type
2948 << " to " << result_type;
2949 }
2950 break;
2951
2952 default:
2953 LOG(FATAL) << "Unexpected type conversion from " << input_type
2954 << " to " << result_type;
2955 }
2956 }
2957
VisitAdd(HAdd * add)2958 void LocationsBuilderX86::VisitAdd(HAdd* add) {
2959 LocationSummary* locations =
2960 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
2961 switch (add->GetResultType()) {
2962 case DataType::Type::kInt32: {
2963 locations->SetInAt(0, Location::RequiresRegister());
2964 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
2965 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2966 break;
2967 }
2968
2969 case DataType::Type::kInt64: {
2970 locations->SetInAt(0, Location::RequiresRegister());
2971 locations->SetInAt(1, Location::Any());
2972 locations->SetOut(Location::SameAsFirstInput());
2973 break;
2974 }
2975
2976 case DataType::Type::kFloat32:
2977 case DataType::Type::kFloat64: {
2978 locations->SetInAt(0, Location::RequiresFpuRegister());
2979 if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
2980 DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
2981 } else if (add->InputAt(1)->IsConstant()) {
2982 locations->SetInAt(1, Location::RequiresFpuRegister());
2983 } else {
2984 locations->SetInAt(1, Location::Any());
2985 }
2986 locations->SetOut(Location::SameAsFirstInput());
2987 break;
2988 }
2989
2990 default:
2991 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2992 UNREACHABLE();
2993 }
2994 }
2995
VisitAdd(HAdd * add)2996 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
2997 LocationSummary* locations = add->GetLocations();
2998 Location first = locations->InAt(0);
2999 Location second = locations->InAt(1);
3000 Location out = locations->Out();
3001
3002 switch (add->GetResultType()) {
3003 case DataType::Type::kInt32: {
3004 if (second.IsRegister()) {
3005 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3006 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3007 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3008 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3009 } else {
3010 __ leal(out.AsRegister<Register>(), Address(
3011 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3012 }
3013 } else if (second.IsConstant()) {
3014 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3015 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3016 __ addl(out.AsRegister<Register>(), Immediate(value));
3017 } else {
3018 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3019 }
3020 } else {
3021 DCHECK(first.Equals(locations->Out()));
3022 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3023 }
3024 break;
3025 }
3026
3027 case DataType::Type::kInt64: {
3028 if (second.IsRegisterPair()) {
3029 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3030 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3031 } else if (second.IsDoubleStackSlot()) {
3032 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3033 __ adcl(first.AsRegisterPairHigh<Register>(),
3034 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3035 } else {
3036 DCHECK(second.IsConstant()) << second;
3037 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3038 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3039 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3040 }
3041 break;
3042 }
3043
3044 case DataType::Type::kFloat32: {
3045 if (second.IsFpuRegister()) {
3046 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3047 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3048 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3049 DCHECK(const_area->IsEmittedAtUseSite());
3050 __ addss(first.AsFpuRegister<XmmRegister>(),
3051 codegen_->LiteralFloatAddress(
3052 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3053 const_area->GetBaseMethodAddress(),
3054 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3055 } else {
3056 DCHECK(second.IsStackSlot());
3057 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3058 }
3059 break;
3060 }
3061
3062 case DataType::Type::kFloat64: {
3063 if (second.IsFpuRegister()) {
3064 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3065 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3066 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3067 DCHECK(const_area->IsEmittedAtUseSite());
3068 __ addsd(first.AsFpuRegister<XmmRegister>(),
3069 codegen_->LiteralDoubleAddress(
3070 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3071 const_area->GetBaseMethodAddress(),
3072 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3073 } else {
3074 DCHECK(second.IsDoubleStackSlot());
3075 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3076 }
3077 break;
3078 }
3079
3080 default:
3081 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3082 }
3083 }
3084
VisitSub(HSub * sub)3085 void LocationsBuilderX86::VisitSub(HSub* sub) {
3086 LocationSummary* locations =
3087 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3088 switch (sub->GetResultType()) {
3089 case DataType::Type::kInt32:
3090 case DataType::Type::kInt64: {
3091 locations->SetInAt(0, Location::RequiresRegister());
3092 locations->SetInAt(1, Location::Any());
3093 locations->SetOut(Location::SameAsFirstInput());
3094 break;
3095 }
3096 case DataType::Type::kFloat32:
3097 case DataType::Type::kFloat64: {
3098 locations->SetInAt(0, Location::RequiresFpuRegister());
3099 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3100 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3101 } else if (sub->InputAt(1)->IsConstant()) {
3102 locations->SetInAt(1, Location::RequiresFpuRegister());
3103 } else {
3104 locations->SetInAt(1, Location::Any());
3105 }
3106 locations->SetOut(Location::SameAsFirstInput());
3107 break;
3108 }
3109
3110 default:
3111 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3112 }
3113 }
3114
VisitSub(HSub * sub)3115 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3116 LocationSummary* locations = sub->GetLocations();
3117 Location first = locations->InAt(0);
3118 Location second = locations->InAt(1);
3119 DCHECK(first.Equals(locations->Out()));
3120 switch (sub->GetResultType()) {
3121 case DataType::Type::kInt32: {
3122 if (second.IsRegister()) {
3123 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3124 } else if (second.IsConstant()) {
3125 __ subl(first.AsRegister<Register>(),
3126 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3127 } else {
3128 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3129 }
3130 break;
3131 }
3132
3133 case DataType::Type::kInt64: {
3134 if (second.IsRegisterPair()) {
3135 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3136 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3137 } else if (second.IsDoubleStackSlot()) {
3138 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3139 __ sbbl(first.AsRegisterPairHigh<Register>(),
3140 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3141 } else {
3142 DCHECK(second.IsConstant()) << second;
3143 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3144 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3145 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3146 }
3147 break;
3148 }
3149
3150 case DataType::Type::kFloat32: {
3151 if (second.IsFpuRegister()) {
3152 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3153 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3154 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3155 DCHECK(const_area->IsEmittedAtUseSite());
3156 __ subss(first.AsFpuRegister<XmmRegister>(),
3157 codegen_->LiteralFloatAddress(
3158 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3159 const_area->GetBaseMethodAddress(),
3160 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3161 } else {
3162 DCHECK(second.IsStackSlot());
3163 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3164 }
3165 break;
3166 }
3167
3168 case DataType::Type::kFloat64: {
3169 if (second.IsFpuRegister()) {
3170 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3171 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3172 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3173 DCHECK(const_area->IsEmittedAtUseSite());
3174 __ subsd(first.AsFpuRegister<XmmRegister>(),
3175 codegen_->LiteralDoubleAddress(
3176 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3177 const_area->GetBaseMethodAddress(),
3178 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3179 } else {
3180 DCHECK(second.IsDoubleStackSlot());
3181 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3182 }
3183 break;
3184 }
3185
3186 default:
3187 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3188 }
3189 }
3190
VisitMul(HMul * mul)3191 void LocationsBuilderX86::VisitMul(HMul* mul) {
3192 LocationSummary* locations =
3193 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3194 switch (mul->GetResultType()) {
3195 case DataType::Type::kInt32:
3196 locations->SetInAt(0, Location::RequiresRegister());
3197 locations->SetInAt(1, Location::Any());
3198 if (mul->InputAt(1)->IsIntConstant()) {
3199 // Can use 3 operand multiply.
3200 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3201 } else {
3202 locations->SetOut(Location::SameAsFirstInput());
3203 }
3204 break;
3205 case DataType::Type::kInt64: {
3206 locations->SetInAt(0, Location::RequiresRegister());
3207 locations->SetInAt(1, Location::Any());
3208 locations->SetOut(Location::SameAsFirstInput());
3209 // Needed for imul on 32bits with 64bits output.
3210 locations->AddTemp(Location::RegisterLocation(EAX));
3211 locations->AddTemp(Location::RegisterLocation(EDX));
3212 break;
3213 }
3214 case DataType::Type::kFloat32:
3215 case DataType::Type::kFloat64: {
3216 locations->SetInAt(0, Location::RequiresFpuRegister());
3217 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3218 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3219 } else if (mul->InputAt(1)->IsConstant()) {
3220 locations->SetInAt(1, Location::RequiresFpuRegister());
3221 } else {
3222 locations->SetInAt(1, Location::Any());
3223 }
3224 locations->SetOut(Location::SameAsFirstInput());
3225 break;
3226 }
3227
3228 default:
3229 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3230 }
3231 }
3232
VisitMul(HMul * mul)3233 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3234 LocationSummary* locations = mul->GetLocations();
3235 Location first = locations->InAt(0);
3236 Location second = locations->InAt(1);
3237 Location out = locations->Out();
3238
3239 switch (mul->GetResultType()) {
3240 case DataType::Type::kInt32:
3241 // The constant may have ended up in a register, so test explicitly to avoid
3242 // problems where the output may not be the same as the first operand.
3243 if (mul->InputAt(1)->IsIntConstant()) {
3244 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3245 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3246 } else if (second.IsRegister()) {
3247 DCHECK(first.Equals(out));
3248 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3249 } else {
3250 DCHECK(second.IsStackSlot());
3251 DCHECK(first.Equals(out));
3252 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3253 }
3254 break;
3255
3256 case DataType::Type::kInt64: {
3257 Register in1_hi = first.AsRegisterPairHigh<Register>();
3258 Register in1_lo = first.AsRegisterPairLow<Register>();
3259 Register eax = locations->GetTemp(0).AsRegister<Register>();
3260 Register edx = locations->GetTemp(1).AsRegister<Register>();
3261
3262 DCHECK_EQ(EAX, eax);
3263 DCHECK_EQ(EDX, edx);
3264
3265 // input: in1 - 64 bits, in2 - 64 bits.
3266 // output: in1
3267 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3268 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3269 // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3270 if (second.IsConstant()) {
3271 DCHECK(second.GetConstant()->IsLongConstant());
3272
3273 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3274 int32_t low_value = Low32Bits(value);
3275 int32_t high_value = High32Bits(value);
3276 Immediate low(low_value);
3277 Immediate high(high_value);
3278
3279 __ movl(eax, high);
3280 // eax <- in1.lo * in2.hi
3281 __ imull(eax, in1_lo);
3282 // in1.hi <- in1.hi * in2.lo
3283 __ imull(in1_hi, low);
3284 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3285 __ addl(in1_hi, eax);
3286 // move in2_lo to eax to prepare for double precision
3287 __ movl(eax, low);
3288 // edx:eax <- in1.lo * in2.lo
3289 __ mull(in1_lo);
3290 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3291 __ addl(in1_hi, edx);
3292 // in1.lo <- (in1.lo * in2.lo)[31:0];
3293 __ movl(in1_lo, eax);
3294 } else if (second.IsRegisterPair()) {
3295 Register in2_hi = second.AsRegisterPairHigh<Register>();
3296 Register in2_lo = second.AsRegisterPairLow<Register>();
3297
3298 __ movl(eax, in2_hi);
3299 // eax <- in1.lo * in2.hi
3300 __ imull(eax, in1_lo);
3301 // in1.hi <- in1.hi * in2.lo
3302 __ imull(in1_hi, in2_lo);
3303 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3304 __ addl(in1_hi, eax);
3305 // move in1_lo to eax to prepare for double precision
3306 __ movl(eax, in1_lo);
3307 // edx:eax <- in1.lo * in2.lo
3308 __ mull(in2_lo);
3309 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3310 __ addl(in1_hi, edx);
3311 // in1.lo <- (in1.lo * in2.lo)[31:0];
3312 __ movl(in1_lo, eax);
3313 } else {
3314 DCHECK(second.IsDoubleStackSlot()) << second;
3315 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3316 Address in2_lo(ESP, second.GetStackIndex());
3317
3318 __ movl(eax, in2_hi);
3319 // eax <- in1.lo * in2.hi
3320 __ imull(eax, in1_lo);
3321 // in1.hi <- in1.hi * in2.lo
3322 __ imull(in1_hi, in2_lo);
3323 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3324 __ addl(in1_hi, eax);
3325 // move in1_lo to eax to prepare for double precision
3326 __ movl(eax, in1_lo);
3327 // edx:eax <- in1.lo * in2.lo
3328 __ mull(in2_lo);
3329 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3330 __ addl(in1_hi, edx);
3331 // in1.lo <- (in1.lo * in2.lo)[31:0];
3332 __ movl(in1_lo, eax);
3333 }
3334
3335 break;
3336 }
3337
3338 case DataType::Type::kFloat32: {
3339 DCHECK(first.Equals(locations->Out()));
3340 if (second.IsFpuRegister()) {
3341 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3342 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3343 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3344 DCHECK(const_area->IsEmittedAtUseSite());
3345 __ mulss(first.AsFpuRegister<XmmRegister>(),
3346 codegen_->LiteralFloatAddress(
3347 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3348 const_area->GetBaseMethodAddress(),
3349 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3350 } else {
3351 DCHECK(second.IsStackSlot());
3352 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3353 }
3354 break;
3355 }
3356
3357 case DataType::Type::kFloat64: {
3358 DCHECK(first.Equals(locations->Out()));
3359 if (second.IsFpuRegister()) {
3360 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3361 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3362 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3363 DCHECK(const_area->IsEmittedAtUseSite());
3364 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3365 codegen_->LiteralDoubleAddress(
3366 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3367 const_area->GetBaseMethodAddress(),
3368 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3369 } else {
3370 DCHECK(second.IsDoubleStackSlot());
3371 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3372 }
3373 break;
3374 }
3375
3376 default:
3377 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3378 }
3379 }
3380
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3381 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3382 uint32_t temp_offset,
3383 uint32_t stack_adjustment,
3384 bool is_fp,
3385 bool is_wide) {
3386 if (source.IsStackSlot()) {
3387 DCHECK(!is_wide);
3388 if (is_fp) {
3389 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3390 } else {
3391 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3392 }
3393 } else if (source.IsDoubleStackSlot()) {
3394 DCHECK(is_wide);
3395 if (is_fp) {
3396 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3397 } else {
3398 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3399 }
3400 } else {
3401 // Write the value to the temporary location on the stack and load to FP stack.
3402 if (!is_wide) {
3403 Location stack_temp = Location::StackSlot(temp_offset);
3404 codegen_->Move32(stack_temp, source);
3405 if (is_fp) {
3406 __ flds(Address(ESP, temp_offset));
3407 } else {
3408 __ filds(Address(ESP, temp_offset));
3409 }
3410 } else {
3411 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3412 codegen_->Move64(stack_temp, source);
3413 if (is_fp) {
3414 __ fldl(Address(ESP, temp_offset));
3415 } else {
3416 __ fildl(Address(ESP, temp_offset));
3417 }
3418 }
3419 }
3420 }
3421
GenerateRemFP(HRem * rem)3422 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3423 DataType::Type type = rem->GetResultType();
3424 bool is_float = type == DataType::Type::kFloat32;
3425 size_t elem_size = DataType::Size(type);
3426 LocationSummary* locations = rem->GetLocations();
3427 Location first = locations->InAt(0);
3428 Location second = locations->InAt(1);
3429 Location out = locations->Out();
3430
3431 // Create stack space for 2 elements.
3432 // TODO: enhance register allocator to ask for stack temporaries.
3433 __ subl(ESP, Immediate(2 * elem_size));
3434
3435 // Load the values to the FP stack in reverse order, using temporaries if needed.
3436 const bool is_wide = !is_float;
3437 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
3438 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
3439
3440 // Loop doing FPREM until we stabilize.
3441 NearLabel retry;
3442 __ Bind(&retry);
3443 __ fprem();
3444
3445 // Move FP status to AX.
3446 __ fstsw();
3447
3448 // And see if the argument reduction is complete. This is signaled by the
3449 // C2 FPU flag bit set to 0.
3450 __ andl(EAX, Immediate(kC2ConditionMask));
3451 __ j(kNotEqual, &retry);
3452
3453 // We have settled on the final value. Retrieve it into an XMM register.
3454 // Store FP top of stack to real stack.
3455 if (is_float) {
3456 __ fsts(Address(ESP, 0));
3457 } else {
3458 __ fstl(Address(ESP, 0));
3459 }
3460
3461 // Pop the 2 items from the FP stack.
3462 __ fucompp();
3463
3464 // Load the value from the stack into an XMM register.
3465 DCHECK(out.IsFpuRegister()) << out;
3466 if (is_float) {
3467 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3468 } else {
3469 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3470 }
3471
3472 // And remove the temporary stack space we allocated.
3473 __ addl(ESP, Immediate(2 * elem_size));
3474 }
3475
3476
DivRemOneOrMinusOne(HBinaryOperation * instruction)3477 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3478 DCHECK(instruction->IsDiv() || instruction->IsRem());
3479
3480 LocationSummary* locations = instruction->GetLocations();
3481 DCHECK(locations->InAt(1).IsConstant());
3482 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3483
3484 Register out_register = locations->Out().AsRegister<Register>();
3485 Register input_register = locations->InAt(0).AsRegister<Register>();
3486 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3487
3488 DCHECK(imm == 1 || imm == -1);
3489
3490 if (instruction->IsRem()) {
3491 __ xorl(out_register, out_register);
3492 } else {
3493 __ movl(out_register, input_register);
3494 if (imm == -1) {
3495 __ negl(out_register);
3496 }
3497 }
3498 }
3499
RemByPowerOfTwo(HRem * instruction)3500 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
3501 LocationSummary* locations = instruction->GetLocations();
3502 Location second = locations->InAt(1);
3503
3504 Register out = locations->Out().AsRegister<Register>();
3505 Register numerator = locations->InAt(0).AsRegister<Register>();
3506
3507 int32_t imm = Int64FromConstant(second.GetConstant());
3508 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3509 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3510
3511 Register tmp = locations->GetTemp(0).AsRegister<Register>();
3512 NearLabel done;
3513 __ movl(out, numerator);
3514 __ andl(out, Immediate(abs_imm-1));
3515 __ j(Condition::kZero, &done);
3516 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3517 __ testl(numerator, numerator);
3518 __ cmovl(Condition::kLess, out, tmp);
3519 __ Bind(&done);
3520 }
3521
DivByPowerOfTwo(HDiv * instruction)3522 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
3523 LocationSummary* locations = instruction->GetLocations();
3524
3525 Register out_register = locations->Out().AsRegister<Register>();
3526 Register input_register = locations->InAt(0).AsRegister<Register>();
3527 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3528 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3529 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3530
3531 Register num = locations->GetTemp(0).AsRegister<Register>();
3532
3533 __ leal(num, Address(input_register, abs_imm - 1));
3534 __ testl(input_register, input_register);
3535 __ cmovl(kGreaterEqual, num, input_register);
3536 int shift = CTZ(imm);
3537 __ sarl(num, Immediate(shift));
3538
3539 if (imm < 0) {
3540 __ negl(num);
3541 }
3542
3543 __ movl(out_register, num);
3544 }
3545
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3546 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3547 DCHECK(instruction->IsDiv() || instruction->IsRem());
3548
3549 LocationSummary* locations = instruction->GetLocations();
3550 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3551
3552 Register eax = locations->InAt(0).AsRegister<Register>();
3553 Register out = locations->Out().AsRegister<Register>();
3554 Register num;
3555 Register edx;
3556
3557 if (instruction->IsDiv()) {
3558 edx = locations->GetTemp(0).AsRegister<Register>();
3559 num = locations->GetTemp(1).AsRegister<Register>();
3560 } else {
3561 edx = locations->Out().AsRegister<Register>();
3562 num = locations->GetTemp(0).AsRegister<Register>();
3563 }
3564
3565 DCHECK_EQ(EAX, eax);
3566 DCHECK_EQ(EDX, edx);
3567 if (instruction->IsDiv()) {
3568 DCHECK_EQ(EAX, out);
3569 } else {
3570 DCHECK_EQ(EDX, out);
3571 }
3572
3573 int64_t magic;
3574 int shift;
3575 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3576
3577 // Save the numerator.
3578 __ movl(num, eax);
3579
3580 // EAX = magic
3581 __ movl(eax, Immediate(magic));
3582
3583 // EDX:EAX = magic * numerator
3584 __ imull(num);
3585
3586 if (imm > 0 && magic < 0) {
3587 // EDX += num
3588 __ addl(edx, num);
3589 } else if (imm < 0 && magic > 0) {
3590 __ subl(edx, num);
3591 }
3592
3593 // Shift if needed.
3594 if (shift != 0) {
3595 __ sarl(edx, Immediate(shift));
3596 }
3597
3598 // EDX += 1 if EDX < 0
3599 __ movl(eax, edx);
3600 __ shrl(edx, Immediate(31));
3601 __ addl(edx, eax);
3602
3603 if (instruction->IsRem()) {
3604 __ movl(eax, num);
3605 __ imull(edx, Immediate(imm));
3606 __ subl(eax, edx);
3607 __ movl(edx, eax);
3608 } else {
3609 __ movl(eax, edx);
3610 }
3611 }
3612
GenerateDivRemIntegral(HBinaryOperation * instruction)3613 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3614 DCHECK(instruction->IsDiv() || instruction->IsRem());
3615
3616 LocationSummary* locations = instruction->GetLocations();
3617 Location out = locations->Out();
3618 Location first = locations->InAt(0);
3619 Location second = locations->InAt(1);
3620 bool is_div = instruction->IsDiv();
3621
3622 switch (instruction->GetResultType()) {
3623 case DataType::Type::kInt32: {
3624 DCHECK_EQ(EAX, first.AsRegister<Register>());
3625 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
3626
3627 if (second.IsConstant()) {
3628 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
3629
3630 if (imm == 0) {
3631 // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
3632 } else if (imm == 1 || imm == -1) {
3633 DivRemOneOrMinusOne(instruction);
3634 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3635 if (is_div) {
3636 DivByPowerOfTwo(instruction->AsDiv());
3637 } else {
3638 RemByPowerOfTwo(instruction->AsRem());
3639 }
3640 } else {
3641 DCHECK(imm <= -2 || imm >= 2);
3642 GenerateDivRemWithAnyConstant(instruction);
3643 }
3644 } else {
3645 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
3646 instruction, out.AsRegister<Register>(), is_div);
3647 codegen_->AddSlowPath(slow_path);
3648
3649 Register second_reg = second.AsRegister<Register>();
3650 // 0x80000000/-1 triggers an arithmetic exception!
3651 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
3652 // it's safe to just use negl instead of more complex comparisons.
3653
3654 __ cmpl(second_reg, Immediate(-1));
3655 __ j(kEqual, slow_path->GetEntryLabel());
3656
3657 // edx:eax <- sign-extended of eax
3658 __ cdq();
3659 // eax = quotient, edx = remainder
3660 __ idivl(second_reg);
3661 __ Bind(slow_path->GetExitLabel());
3662 }
3663 break;
3664 }
3665
3666 case DataType::Type::kInt64: {
3667 InvokeRuntimeCallingConvention calling_convention;
3668 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
3669 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
3670 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
3671 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
3672 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
3673 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
3674
3675 if (is_div) {
3676 codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
3677 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
3678 } else {
3679 codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
3680 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
3681 }
3682 break;
3683 }
3684
3685 default:
3686 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
3687 }
3688 }
3689
VisitDiv(HDiv * div)3690 void LocationsBuilderX86::VisitDiv(HDiv* div) {
3691 LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
3692 ? LocationSummary::kCallOnMainOnly
3693 : LocationSummary::kNoCall;
3694 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
3695
3696 switch (div->GetResultType()) {
3697 case DataType::Type::kInt32: {
3698 locations->SetInAt(0, Location::RegisterLocation(EAX));
3699 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3700 locations->SetOut(Location::SameAsFirstInput());
3701 // Intel uses edx:eax as the dividend.
3702 locations->AddTemp(Location::RegisterLocation(EDX));
3703 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3704 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
3705 // output and request another temp.
3706 if (div->InputAt(1)->IsIntConstant()) {
3707 locations->AddTemp(Location::RequiresRegister());
3708 }
3709 break;
3710 }
3711 case DataType::Type::kInt64: {
3712 InvokeRuntimeCallingConvention calling_convention;
3713 locations->SetInAt(0, Location::RegisterPairLocation(
3714 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3715 locations->SetInAt(1, Location::RegisterPairLocation(
3716 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3717 // Runtime helper puts the result in EAX, EDX.
3718 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3719 break;
3720 }
3721 case DataType::Type::kFloat32:
3722 case DataType::Type::kFloat64: {
3723 locations->SetInAt(0, Location::RequiresFpuRegister());
3724 if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3725 DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
3726 } else if (div->InputAt(1)->IsConstant()) {
3727 locations->SetInAt(1, Location::RequiresFpuRegister());
3728 } else {
3729 locations->SetInAt(1, Location::Any());
3730 }
3731 locations->SetOut(Location::SameAsFirstInput());
3732 break;
3733 }
3734
3735 default:
3736 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3737 }
3738 }
3739
VisitDiv(HDiv * div)3740 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
3741 LocationSummary* locations = div->GetLocations();
3742 Location first = locations->InAt(0);
3743 Location second = locations->InAt(1);
3744
3745 switch (div->GetResultType()) {
3746 case DataType::Type::kInt32:
3747 case DataType::Type::kInt64: {
3748 GenerateDivRemIntegral(div);
3749 break;
3750 }
3751
3752 case DataType::Type::kFloat32: {
3753 if (second.IsFpuRegister()) {
3754 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3755 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3756 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3757 DCHECK(const_area->IsEmittedAtUseSite());
3758 __ divss(first.AsFpuRegister<XmmRegister>(),
3759 codegen_->LiteralFloatAddress(
3760 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3761 const_area->GetBaseMethodAddress(),
3762 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3763 } else {
3764 DCHECK(second.IsStackSlot());
3765 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3766 }
3767 break;
3768 }
3769
3770 case DataType::Type::kFloat64: {
3771 if (second.IsFpuRegister()) {
3772 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3773 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3774 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3775 DCHECK(const_area->IsEmittedAtUseSite());
3776 __ divsd(first.AsFpuRegister<XmmRegister>(),
3777 codegen_->LiteralDoubleAddress(
3778 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3779 const_area->GetBaseMethodAddress(),
3780 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3781 } else {
3782 DCHECK(second.IsDoubleStackSlot());
3783 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3784 }
3785 break;
3786 }
3787
3788 default:
3789 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3790 }
3791 }
3792
VisitRem(HRem * rem)3793 void LocationsBuilderX86::VisitRem(HRem* rem) {
3794 DataType::Type type = rem->GetResultType();
3795
3796 LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
3797 ? LocationSummary::kCallOnMainOnly
3798 : LocationSummary::kNoCall;
3799 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
3800
3801 switch (type) {
3802 case DataType::Type::kInt32: {
3803 locations->SetInAt(0, Location::RegisterLocation(EAX));
3804 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3805 locations->SetOut(Location::RegisterLocation(EDX));
3806 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3807 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
3808 // output and request another temp.
3809 if (rem->InputAt(1)->IsIntConstant()) {
3810 locations->AddTemp(Location::RequiresRegister());
3811 }
3812 break;
3813 }
3814 case DataType::Type::kInt64: {
3815 InvokeRuntimeCallingConvention calling_convention;
3816 locations->SetInAt(0, Location::RegisterPairLocation(
3817 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3818 locations->SetInAt(1, Location::RegisterPairLocation(
3819 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3820 // Runtime helper puts the result in EAX, EDX.
3821 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3822 break;
3823 }
3824 case DataType::Type::kFloat64:
3825 case DataType::Type::kFloat32: {
3826 locations->SetInAt(0, Location::Any());
3827 locations->SetInAt(1, Location::Any());
3828 locations->SetOut(Location::RequiresFpuRegister());
3829 locations->AddTemp(Location::RegisterLocation(EAX));
3830 break;
3831 }
3832
3833 default:
3834 LOG(FATAL) << "Unexpected rem type " << type;
3835 }
3836 }
3837
VisitRem(HRem * rem)3838 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
3839 DataType::Type type = rem->GetResultType();
3840 switch (type) {
3841 case DataType::Type::kInt32:
3842 case DataType::Type::kInt64: {
3843 GenerateDivRemIntegral(rem);
3844 break;
3845 }
3846 case DataType::Type::kFloat32:
3847 case DataType::Type::kFloat64: {
3848 GenerateRemFP(rem);
3849 break;
3850 }
3851 default:
3852 LOG(FATAL) << "Unexpected rem type " << type;
3853 }
3854 }
3855
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)3856 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
3857 LocationSummary* locations = new (allocator) LocationSummary(minmax);
3858 switch (minmax->GetResultType()) {
3859 case DataType::Type::kInt32:
3860 locations->SetInAt(0, Location::RequiresRegister());
3861 locations->SetInAt(1, Location::RequiresRegister());
3862 locations->SetOut(Location::SameAsFirstInput());
3863 break;
3864 case DataType::Type::kInt64:
3865 locations->SetInAt(0, Location::RequiresRegister());
3866 locations->SetInAt(1, Location::RequiresRegister());
3867 locations->SetOut(Location::SameAsFirstInput());
3868 // Register to use to perform a long subtract to set cc.
3869 locations->AddTemp(Location::RequiresRegister());
3870 break;
3871 case DataType::Type::kFloat32:
3872 locations->SetInAt(0, Location::RequiresFpuRegister());
3873 locations->SetInAt(1, Location::RequiresFpuRegister());
3874 locations->SetOut(Location::SameAsFirstInput());
3875 locations->AddTemp(Location::RequiresRegister());
3876 break;
3877 case DataType::Type::kFloat64:
3878 locations->SetInAt(0, Location::RequiresFpuRegister());
3879 locations->SetInAt(1, Location::RequiresFpuRegister());
3880 locations->SetOut(Location::SameAsFirstInput());
3881 break;
3882 default:
3883 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
3884 }
3885 }
3886
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)3887 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
3888 bool is_min,
3889 DataType::Type type) {
3890 Location op1_loc = locations->InAt(0);
3891 Location op2_loc = locations->InAt(1);
3892
3893 // Shortcut for same input locations.
3894 if (op1_loc.Equals(op2_loc)) {
3895 // Can return immediately, as op1_loc == out_loc.
3896 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
3897 // a copy here.
3898 DCHECK(locations->Out().Equals(op1_loc));
3899 return;
3900 }
3901
3902 if (type == DataType::Type::kInt64) {
3903 // Need to perform a subtract to get the sign right.
3904 // op1 is already in the same location as the output.
3905 Location output = locations->Out();
3906 Register output_lo = output.AsRegisterPairLow<Register>();
3907 Register output_hi = output.AsRegisterPairHigh<Register>();
3908
3909 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
3910 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
3911
3912 // The comparison is performed by subtracting the second operand from
3913 // the first operand and then setting the status flags in the same
3914 // manner as the SUB instruction."
3915 __ cmpl(output_lo, op2_lo);
3916
3917 // Now use a temp and the borrow to finish the subtraction of op2_hi.
3918 Register temp = locations->GetTemp(0).AsRegister<Register>();
3919 __ movl(temp, output_hi);
3920 __ sbbl(temp, op2_hi);
3921
3922 // Now the condition code is correct.
3923 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
3924 __ cmovl(cond, output_lo, op2_lo);
3925 __ cmovl(cond, output_hi, op2_hi);
3926 } else {
3927 DCHECK_EQ(type, DataType::Type::kInt32);
3928 Register out = locations->Out().AsRegister<Register>();
3929 Register op2 = op2_loc.AsRegister<Register>();
3930
3931 // (out := op1)
3932 // out <=? op2
3933 // if out is min jmp done
3934 // out := op2
3935 // done:
3936
3937 __ cmpl(out, op2);
3938 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
3939 __ cmovl(cond, out, op2);
3940 }
3941 }
3942
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)3943 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
3944 bool is_min,
3945 DataType::Type type) {
3946 Location op1_loc = locations->InAt(0);
3947 Location op2_loc = locations->InAt(1);
3948 Location out_loc = locations->Out();
3949 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
3950
3951 // Shortcut for same input locations.
3952 if (op1_loc.Equals(op2_loc)) {
3953 DCHECK(out_loc.Equals(op1_loc));
3954 return;
3955 }
3956
3957 // (out := op1)
3958 // out <=? op2
3959 // if Nan jmp Nan_label
3960 // if out is min jmp done
3961 // if op2 is min jmp op2_label
3962 // handle -0/+0
3963 // jmp done
3964 // Nan_label:
3965 // out := NaN
3966 // op2_label:
3967 // out := op2
3968 // done:
3969 //
3970 // This removes one jmp, but needs to copy one input (op1) to out.
3971 //
3972 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
3973
3974 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
3975
3976 NearLabel nan, done, op2_label;
3977 if (type == DataType::Type::kFloat64) {
3978 __ ucomisd(out, op2);
3979 } else {
3980 DCHECK_EQ(type, DataType::Type::kFloat32);
3981 __ ucomiss(out, op2);
3982 }
3983
3984 __ j(Condition::kParityEven, &nan);
3985
3986 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
3987 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
3988
3989 // Handle 0.0/-0.0.
3990 if (is_min) {
3991 if (type == DataType::Type::kFloat64) {
3992 __ orpd(out, op2);
3993 } else {
3994 __ orps(out, op2);
3995 }
3996 } else {
3997 if (type == DataType::Type::kFloat64) {
3998 __ andpd(out, op2);
3999 } else {
4000 __ andps(out, op2);
4001 }
4002 }
4003 __ jmp(&done);
4004
4005 // NaN handling.
4006 __ Bind(&nan);
4007 if (type == DataType::Type::kFloat64) {
4008 // TODO: Use a constant from the constant table (requires extra input).
4009 __ LoadLongConstant(out, kDoubleNaN);
4010 } else {
4011 Register constant = locations->GetTemp(0).AsRegister<Register>();
4012 __ movl(constant, Immediate(kFloatNaN));
4013 __ movd(out, constant);
4014 }
4015 __ jmp(&done);
4016
4017 // out := op2;
4018 __ Bind(&op2_label);
4019 if (type == DataType::Type::kFloat64) {
4020 __ movsd(out, op2);
4021 } else {
4022 __ movss(out, op2);
4023 }
4024
4025 // Done.
4026 __ Bind(&done);
4027 }
4028
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4029 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4030 DataType::Type type = minmax->GetResultType();
4031 switch (type) {
4032 case DataType::Type::kInt32:
4033 case DataType::Type::kInt64:
4034 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4035 break;
4036 case DataType::Type::kFloat32:
4037 case DataType::Type::kFloat64:
4038 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4039 break;
4040 default:
4041 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4042 }
4043 }
4044
VisitMin(HMin * min)4045 void LocationsBuilderX86::VisitMin(HMin* min) {
4046 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4047 }
4048
VisitMin(HMin * min)4049 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4050 GenerateMinMax(min, /*is_min*/ true);
4051 }
4052
VisitMax(HMax * max)4053 void LocationsBuilderX86::VisitMax(HMax* max) {
4054 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4055 }
4056
VisitMax(HMax * max)4057 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4058 GenerateMinMax(max, /*is_min*/ false);
4059 }
4060
VisitAbs(HAbs * abs)4061 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4062 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4063 switch (abs->GetResultType()) {
4064 case DataType::Type::kInt32:
4065 locations->SetInAt(0, Location::RegisterLocation(EAX));
4066 locations->SetOut(Location::SameAsFirstInput());
4067 locations->AddTemp(Location::RegisterLocation(EDX));
4068 break;
4069 case DataType::Type::kInt64:
4070 locations->SetInAt(0, Location::RequiresRegister());
4071 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4072 locations->AddTemp(Location::RequiresRegister());
4073 break;
4074 case DataType::Type::kFloat32:
4075 locations->SetInAt(0, Location::RequiresFpuRegister());
4076 locations->SetOut(Location::SameAsFirstInput());
4077 locations->AddTemp(Location::RequiresFpuRegister());
4078 locations->AddTemp(Location::RequiresRegister());
4079 break;
4080 case DataType::Type::kFloat64:
4081 locations->SetInAt(0, Location::RequiresFpuRegister());
4082 locations->SetOut(Location::SameAsFirstInput());
4083 locations->AddTemp(Location::RequiresFpuRegister());
4084 break;
4085 default:
4086 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4087 }
4088 }
4089
VisitAbs(HAbs * abs)4090 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4091 LocationSummary* locations = abs->GetLocations();
4092 switch (abs->GetResultType()) {
4093 case DataType::Type::kInt32: {
4094 Register out = locations->Out().AsRegister<Register>();
4095 DCHECK_EQ(out, EAX);
4096 Register temp = locations->GetTemp(0).AsRegister<Register>();
4097 DCHECK_EQ(temp, EDX);
4098 // Sign extend EAX into EDX.
4099 __ cdq();
4100 // XOR EAX with sign.
4101 __ xorl(EAX, EDX);
4102 // Subtract out sign to correct.
4103 __ subl(EAX, EDX);
4104 // The result is in EAX.
4105 break;
4106 }
4107 case DataType::Type::kInt64: {
4108 Location input = locations->InAt(0);
4109 Register input_lo = input.AsRegisterPairLow<Register>();
4110 Register input_hi = input.AsRegisterPairHigh<Register>();
4111 Location output = locations->Out();
4112 Register output_lo = output.AsRegisterPairLow<Register>();
4113 Register output_hi = output.AsRegisterPairHigh<Register>();
4114 Register temp = locations->GetTemp(0).AsRegister<Register>();
4115 // Compute the sign into the temporary.
4116 __ movl(temp, input_hi);
4117 __ sarl(temp, Immediate(31));
4118 // Store the sign into the output.
4119 __ movl(output_lo, temp);
4120 __ movl(output_hi, temp);
4121 // XOR the input to the output.
4122 __ xorl(output_lo, input_lo);
4123 __ xorl(output_hi, input_hi);
4124 // Subtract the sign.
4125 __ subl(output_lo, temp);
4126 __ sbbl(output_hi, temp);
4127 break;
4128 }
4129 case DataType::Type::kFloat32: {
4130 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4131 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4132 Register constant = locations->GetTemp(1).AsRegister<Register>();
4133 __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4134 __ movd(temp, constant);
4135 __ andps(out, temp);
4136 break;
4137 }
4138 case DataType::Type::kFloat64: {
4139 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4140 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4141 // TODO: Use a constant from the constant table (requires extra input).
4142 __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4143 __ andpd(out, temp);
4144 break;
4145 }
4146 default:
4147 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4148 }
4149 }
4150
VisitDivZeroCheck(HDivZeroCheck * instruction)4151 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4152 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4153 switch (instruction->GetType()) {
4154 case DataType::Type::kBool:
4155 case DataType::Type::kUint8:
4156 case DataType::Type::kInt8:
4157 case DataType::Type::kUint16:
4158 case DataType::Type::kInt16:
4159 case DataType::Type::kInt32: {
4160 locations->SetInAt(0, Location::Any());
4161 break;
4162 }
4163 case DataType::Type::kInt64: {
4164 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4165 if (!instruction->IsConstant()) {
4166 locations->AddTemp(Location::RequiresRegister());
4167 }
4168 break;
4169 }
4170 default:
4171 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4172 }
4173 }
4174
VisitDivZeroCheck(HDivZeroCheck * instruction)4175 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4176 SlowPathCode* slow_path =
4177 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4178 codegen_->AddSlowPath(slow_path);
4179
4180 LocationSummary* locations = instruction->GetLocations();
4181 Location value = locations->InAt(0);
4182
4183 switch (instruction->GetType()) {
4184 case DataType::Type::kBool:
4185 case DataType::Type::kUint8:
4186 case DataType::Type::kInt8:
4187 case DataType::Type::kUint16:
4188 case DataType::Type::kInt16:
4189 case DataType::Type::kInt32: {
4190 if (value.IsRegister()) {
4191 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4192 __ j(kEqual, slow_path->GetEntryLabel());
4193 } else if (value.IsStackSlot()) {
4194 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4195 __ j(kEqual, slow_path->GetEntryLabel());
4196 } else {
4197 DCHECK(value.IsConstant()) << value;
4198 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4199 __ jmp(slow_path->GetEntryLabel());
4200 }
4201 }
4202 break;
4203 }
4204 case DataType::Type::kInt64: {
4205 if (value.IsRegisterPair()) {
4206 Register temp = locations->GetTemp(0).AsRegister<Register>();
4207 __ movl(temp, value.AsRegisterPairLow<Register>());
4208 __ orl(temp, value.AsRegisterPairHigh<Register>());
4209 __ j(kEqual, slow_path->GetEntryLabel());
4210 } else {
4211 DCHECK(value.IsConstant()) << value;
4212 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4213 __ jmp(slow_path->GetEntryLabel());
4214 }
4215 }
4216 break;
4217 }
4218 default:
4219 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4220 }
4221 }
4222
HandleShift(HBinaryOperation * op)4223 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4224 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4225
4226 LocationSummary* locations =
4227 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4228
4229 switch (op->GetResultType()) {
4230 case DataType::Type::kInt32:
4231 case DataType::Type::kInt64: {
4232 // Can't have Location::Any() and output SameAsFirstInput()
4233 locations->SetInAt(0, Location::RequiresRegister());
4234 // The shift count needs to be in CL or a constant.
4235 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4236 locations->SetOut(Location::SameAsFirstInput());
4237 break;
4238 }
4239 default:
4240 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4241 }
4242 }
4243
HandleShift(HBinaryOperation * op)4244 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4245 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4246
4247 LocationSummary* locations = op->GetLocations();
4248 Location first = locations->InAt(0);
4249 Location second = locations->InAt(1);
4250 DCHECK(first.Equals(locations->Out()));
4251
4252 switch (op->GetResultType()) {
4253 case DataType::Type::kInt32: {
4254 DCHECK(first.IsRegister());
4255 Register first_reg = first.AsRegister<Register>();
4256 if (second.IsRegister()) {
4257 Register second_reg = second.AsRegister<Register>();
4258 DCHECK_EQ(ECX, second_reg);
4259 if (op->IsShl()) {
4260 __ shll(first_reg, second_reg);
4261 } else if (op->IsShr()) {
4262 __ sarl(first_reg, second_reg);
4263 } else {
4264 __ shrl(first_reg, second_reg);
4265 }
4266 } else {
4267 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4268 if (shift == 0) {
4269 return;
4270 }
4271 Immediate imm(shift);
4272 if (op->IsShl()) {
4273 __ shll(first_reg, imm);
4274 } else if (op->IsShr()) {
4275 __ sarl(first_reg, imm);
4276 } else {
4277 __ shrl(first_reg, imm);
4278 }
4279 }
4280 break;
4281 }
4282 case DataType::Type::kInt64: {
4283 if (second.IsRegister()) {
4284 Register second_reg = second.AsRegister<Register>();
4285 DCHECK_EQ(ECX, second_reg);
4286 if (op->IsShl()) {
4287 GenerateShlLong(first, second_reg);
4288 } else if (op->IsShr()) {
4289 GenerateShrLong(first, second_reg);
4290 } else {
4291 GenerateUShrLong(first, second_reg);
4292 }
4293 } else {
4294 // Shift by a constant.
4295 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4296 // Nothing to do if the shift is 0, as the input is already the output.
4297 if (shift != 0) {
4298 if (op->IsShl()) {
4299 GenerateShlLong(first, shift);
4300 } else if (op->IsShr()) {
4301 GenerateShrLong(first, shift);
4302 } else {
4303 GenerateUShrLong(first, shift);
4304 }
4305 }
4306 }
4307 break;
4308 }
4309 default:
4310 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4311 }
4312 }
4313
GenerateShlLong(const Location & loc,int shift)4314 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4315 Register low = loc.AsRegisterPairLow<Register>();
4316 Register high = loc.AsRegisterPairHigh<Register>();
4317 if (shift == 1) {
4318 // This is just an addition.
4319 __ addl(low, low);
4320 __ adcl(high, high);
4321 } else if (shift == 32) {
4322 // Shift by 32 is easy. High gets low, and low gets 0.
4323 codegen_->EmitParallelMoves(
4324 loc.ToLow(),
4325 loc.ToHigh(),
4326 DataType::Type::kInt32,
4327 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4328 loc.ToLow(),
4329 DataType::Type::kInt32);
4330 } else if (shift > 32) {
4331 // Low part becomes 0. High part is low part << (shift-32).
4332 __ movl(high, low);
4333 __ shll(high, Immediate(shift - 32));
4334 __ xorl(low, low);
4335 } else {
4336 // Between 1 and 31.
4337 __ shld(high, low, Immediate(shift));
4338 __ shll(low, Immediate(shift));
4339 }
4340 }
4341
GenerateShlLong(const Location & loc,Register shifter)4342 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4343 NearLabel done;
4344 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4345 __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4346 __ testl(shifter, Immediate(32));
4347 __ j(kEqual, &done);
4348 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4349 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4350 __ Bind(&done);
4351 }
4352
GenerateShrLong(const Location & loc,int shift)4353 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4354 Register low = loc.AsRegisterPairLow<Register>();
4355 Register high = loc.AsRegisterPairHigh<Register>();
4356 if (shift == 32) {
4357 // Need to copy the sign.
4358 DCHECK_NE(low, high);
4359 __ movl(low, high);
4360 __ sarl(high, Immediate(31));
4361 } else if (shift > 32) {
4362 DCHECK_NE(low, high);
4363 // High part becomes sign. Low part is shifted by shift - 32.
4364 __ movl(low, high);
4365 __ sarl(high, Immediate(31));
4366 __ sarl(low, Immediate(shift - 32));
4367 } else {
4368 // Between 1 and 31.
4369 __ shrd(low, high, Immediate(shift));
4370 __ sarl(high, Immediate(shift));
4371 }
4372 }
4373
GenerateShrLong(const Location & loc,Register shifter)4374 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4375 NearLabel done;
4376 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4377 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4378 __ testl(shifter, Immediate(32));
4379 __ j(kEqual, &done);
4380 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4381 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4382 __ Bind(&done);
4383 }
4384
GenerateUShrLong(const Location & loc,int shift)4385 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4386 Register low = loc.AsRegisterPairLow<Register>();
4387 Register high = loc.AsRegisterPairHigh<Register>();
4388 if (shift == 32) {
4389 // Shift by 32 is easy. Low gets high, and high gets 0.
4390 codegen_->EmitParallelMoves(
4391 loc.ToHigh(),
4392 loc.ToLow(),
4393 DataType::Type::kInt32,
4394 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4395 loc.ToHigh(),
4396 DataType::Type::kInt32);
4397 } else if (shift > 32) {
4398 // Low part is high >> (shift - 32). High part becomes 0.
4399 __ movl(low, high);
4400 __ shrl(low, Immediate(shift - 32));
4401 __ xorl(high, high);
4402 } else {
4403 // Between 1 and 31.
4404 __ shrd(low, high, Immediate(shift));
4405 __ shrl(high, Immediate(shift));
4406 }
4407 }
4408
GenerateUShrLong(const Location & loc,Register shifter)4409 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4410 NearLabel done;
4411 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4412 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4413 __ testl(shifter, Immediate(32));
4414 __ j(kEqual, &done);
4415 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4416 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4417 __ Bind(&done);
4418 }
4419
VisitRor(HRor * ror)4420 void LocationsBuilderX86::VisitRor(HRor* ror) {
4421 LocationSummary* locations =
4422 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4423
4424 switch (ror->GetResultType()) {
4425 case DataType::Type::kInt64:
4426 // Add the temporary needed.
4427 locations->AddTemp(Location::RequiresRegister());
4428 FALLTHROUGH_INTENDED;
4429 case DataType::Type::kInt32:
4430 locations->SetInAt(0, Location::RequiresRegister());
4431 // The shift count needs to be in CL (unless it is a constant).
4432 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4433 locations->SetOut(Location::SameAsFirstInput());
4434 break;
4435 default:
4436 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4437 UNREACHABLE();
4438 }
4439 }
4440
VisitRor(HRor * ror)4441 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4442 LocationSummary* locations = ror->GetLocations();
4443 Location first = locations->InAt(0);
4444 Location second = locations->InAt(1);
4445
4446 if (ror->GetResultType() == DataType::Type::kInt32) {
4447 Register first_reg = first.AsRegister<Register>();
4448 if (second.IsRegister()) {
4449 Register second_reg = second.AsRegister<Register>();
4450 __ rorl(first_reg, second_reg);
4451 } else {
4452 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4453 __ rorl(first_reg, imm);
4454 }
4455 return;
4456 }
4457
4458 DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64);
4459 Register first_reg_lo = first.AsRegisterPairLow<Register>();
4460 Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4461 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4462 if (second.IsRegister()) {
4463 Register second_reg = second.AsRegister<Register>();
4464 DCHECK_EQ(second_reg, ECX);
4465 __ movl(temp_reg, first_reg_hi);
4466 __ shrd(first_reg_hi, first_reg_lo, second_reg);
4467 __ shrd(first_reg_lo, temp_reg, second_reg);
4468 __ movl(temp_reg, first_reg_hi);
4469 __ testl(second_reg, Immediate(32));
4470 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4471 __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4472 } else {
4473 int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4474 if (shift_amt == 0) {
4475 // Already fine.
4476 return;
4477 }
4478 if (shift_amt == 32) {
4479 // Just swap.
4480 __ movl(temp_reg, first_reg_lo);
4481 __ movl(first_reg_lo, first_reg_hi);
4482 __ movl(first_reg_hi, temp_reg);
4483 return;
4484 }
4485
4486 Immediate imm(shift_amt);
4487 // Save the constents of the low value.
4488 __ movl(temp_reg, first_reg_lo);
4489
4490 // Shift right into low, feeding bits from high.
4491 __ shrd(first_reg_lo, first_reg_hi, imm);
4492
4493 // Shift right into high, feeding bits from the original low.
4494 __ shrd(first_reg_hi, temp_reg, imm);
4495
4496 // Swap if needed.
4497 if (shift_amt > 32) {
4498 __ movl(temp_reg, first_reg_lo);
4499 __ movl(first_reg_lo, first_reg_hi);
4500 __ movl(first_reg_hi, temp_reg);
4501 }
4502 }
4503 }
4504
VisitShl(HShl * shl)4505 void LocationsBuilderX86::VisitShl(HShl* shl) {
4506 HandleShift(shl);
4507 }
4508
VisitShl(HShl * shl)4509 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
4510 HandleShift(shl);
4511 }
4512
VisitShr(HShr * shr)4513 void LocationsBuilderX86::VisitShr(HShr* shr) {
4514 HandleShift(shr);
4515 }
4516
VisitShr(HShr * shr)4517 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
4518 HandleShift(shr);
4519 }
4520
VisitUShr(HUShr * ushr)4521 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
4522 HandleShift(ushr);
4523 }
4524
VisitUShr(HUShr * ushr)4525 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
4526 HandleShift(ushr);
4527 }
4528
VisitNewInstance(HNewInstance * instruction)4529 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
4530 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4531 instruction, LocationSummary::kCallOnMainOnly);
4532 locations->SetOut(Location::RegisterLocation(EAX));
4533 InvokeRuntimeCallingConvention calling_convention;
4534 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4535 }
4536
VisitNewInstance(HNewInstance * instruction)4537 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
4538 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4539 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4540 DCHECK(!codegen_->IsLeafMethod());
4541 }
4542
VisitNewArray(HNewArray * instruction)4543 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
4544 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4545 instruction, LocationSummary::kCallOnMainOnly);
4546 locations->SetOut(Location::RegisterLocation(EAX));
4547 InvokeRuntimeCallingConvention calling_convention;
4548 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4549 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4550 }
4551
VisitNewArray(HNewArray * instruction)4552 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
4553 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4554 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4555 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4556 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4557 DCHECK(!codegen_->IsLeafMethod());
4558 }
4559
VisitParameterValue(HParameterValue * instruction)4560 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
4561 LocationSummary* locations =
4562 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4563 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4564 if (location.IsStackSlot()) {
4565 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4566 } else if (location.IsDoubleStackSlot()) {
4567 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4568 }
4569 locations->SetOut(location);
4570 }
4571
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4572 void InstructionCodeGeneratorX86::VisitParameterValue(
4573 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4574 }
4575
VisitCurrentMethod(HCurrentMethod * instruction)4576 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
4577 LocationSummary* locations =
4578 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4579 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4580 }
4581
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4582 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4583 }
4584
VisitClassTableGet(HClassTableGet * instruction)4585 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
4586 LocationSummary* locations =
4587 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4588 locations->SetInAt(0, Location::RequiresRegister());
4589 locations->SetOut(Location::RequiresRegister());
4590 }
4591
VisitClassTableGet(HClassTableGet * instruction)4592 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
4593 LocationSummary* locations = instruction->GetLocations();
4594 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4595 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4596 instruction->GetIndex(), kX86PointerSize).SizeValue();
4597 __ movl(locations->Out().AsRegister<Register>(),
4598 Address(locations->InAt(0).AsRegister<Register>(), method_offset));
4599 } else {
4600 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4601 instruction->GetIndex(), kX86PointerSize));
4602 __ movl(locations->Out().AsRegister<Register>(),
4603 Address(locations->InAt(0).AsRegister<Register>(),
4604 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
4605 // temp = temp->GetImtEntryAt(method_offset);
4606 __ movl(locations->Out().AsRegister<Register>(),
4607 Address(locations->Out().AsRegister<Register>(), method_offset));
4608 }
4609 }
4610
VisitNot(HNot * not_)4611 void LocationsBuilderX86::VisitNot(HNot* not_) {
4612 LocationSummary* locations =
4613 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4614 locations->SetInAt(0, Location::RequiresRegister());
4615 locations->SetOut(Location::SameAsFirstInput());
4616 }
4617
VisitNot(HNot * not_)4618 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
4619 LocationSummary* locations = not_->GetLocations();
4620 Location in = locations->InAt(0);
4621 Location out = locations->Out();
4622 DCHECK(in.Equals(out));
4623 switch (not_->GetResultType()) {
4624 case DataType::Type::kInt32:
4625 __ notl(out.AsRegister<Register>());
4626 break;
4627
4628 case DataType::Type::kInt64:
4629 __ notl(out.AsRegisterPairLow<Register>());
4630 __ notl(out.AsRegisterPairHigh<Register>());
4631 break;
4632
4633 default:
4634 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4635 }
4636 }
4637
VisitBooleanNot(HBooleanNot * bool_not)4638 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
4639 LocationSummary* locations =
4640 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4641 locations->SetInAt(0, Location::RequiresRegister());
4642 locations->SetOut(Location::SameAsFirstInput());
4643 }
4644
VisitBooleanNot(HBooleanNot * bool_not)4645 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
4646 LocationSummary* locations = bool_not->GetLocations();
4647 Location in = locations->InAt(0);
4648 Location out = locations->Out();
4649 DCHECK(in.Equals(out));
4650 __ xorl(out.AsRegister<Register>(), Immediate(1));
4651 }
4652
VisitCompare(HCompare * compare)4653 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
4654 LocationSummary* locations =
4655 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
4656 switch (compare->InputAt(0)->GetType()) {
4657 case DataType::Type::kBool:
4658 case DataType::Type::kUint8:
4659 case DataType::Type::kInt8:
4660 case DataType::Type::kUint16:
4661 case DataType::Type::kInt16:
4662 case DataType::Type::kInt32:
4663 case DataType::Type::kInt64: {
4664 locations->SetInAt(0, Location::RequiresRegister());
4665 locations->SetInAt(1, Location::Any());
4666 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4667 break;
4668 }
4669 case DataType::Type::kFloat32:
4670 case DataType::Type::kFloat64: {
4671 locations->SetInAt(0, Location::RequiresFpuRegister());
4672 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
4673 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
4674 } else if (compare->InputAt(1)->IsConstant()) {
4675 locations->SetInAt(1, Location::RequiresFpuRegister());
4676 } else {
4677 locations->SetInAt(1, Location::Any());
4678 }
4679 locations->SetOut(Location::RequiresRegister());
4680 break;
4681 }
4682 default:
4683 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4684 }
4685 }
4686
VisitCompare(HCompare * compare)4687 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
4688 LocationSummary* locations = compare->GetLocations();
4689 Register out = locations->Out().AsRegister<Register>();
4690 Location left = locations->InAt(0);
4691 Location right = locations->InAt(1);
4692
4693 NearLabel less, greater, done;
4694 Condition less_cond = kLess;
4695
4696 switch (compare->InputAt(0)->GetType()) {
4697 case DataType::Type::kBool:
4698 case DataType::Type::kUint8:
4699 case DataType::Type::kInt8:
4700 case DataType::Type::kUint16:
4701 case DataType::Type::kInt16:
4702 case DataType::Type::kInt32: {
4703 codegen_->GenerateIntCompare(left, right);
4704 break;
4705 }
4706 case DataType::Type::kInt64: {
4707 Register left_low = left.AsRegisterPairLow<Register>();
4708 Register left_high = left.AsRegisterPairHigh<Register>();
4709 int32_t val_low = 0;
4710 int32_t val_high = 0;
4711 bool right_is_const = false;
4712
4713 if (right.IsConstant()) {
4714 DCHECK(right.GetConstant()->IsLongConstant());
4715 right_is_const = true;
4716 int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
4717 val_low = Low32Bits(val);
4718 val_high = High32Bits(val);
4719 }
4720
4721 if (right.IsRegisterPair()) {
4722 __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
4723 } else if (right.IsDoubleStackSlot()) {
4724 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
4725 } else {
4726 DCHECK(right_is_const) << right;
4727 codegen_->Compare32BitValue(left_high, val_high);
4728 }
4729 __ j(kLess, &less); // Signed compare.
4730 __ j(kGreater, &greater); // Signed compare.
4731 if (right.IsRegisterPair()) {
4732 __ cmpl(left_low, right.AsRegisterPairLow<Register>());
4733 } else if (right.IsDoubleStackSlot()) {
4734 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
4735 } else {
4736 DCHECK(right_is_const) << right;
4737 codegen_->Compare32BitValue(left_low, val_low);
4738 }
4739 less_cond = kBelow; // for CF (unsigned).
4740 break;
4741 }
4742 case DataType::Type::kFloat32: {
4743 GenerateFPCompare(left, right, compare, false);
4744 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4745 less_cond = kBelow; // for CF (floats).
4746 break;
4747 }
4748 case DataType::Type::kFloat64: {
4749 GenerateFPCompare(left, right, compare, true);
4750 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4751 less_cond = kBelow; // for CF (floats).
4752 break;
4753 }
4754 default:
4755 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4756 }
4757
4758 __ movl(out, Immediate(0));
4759 __ j(kEqual, &done);
4760 __ j(less_cond, &less);
4761
4762 __ Bind(&greater);
4763 __ movl(out, Immediate(1));
4764 __ jmp(&done);
4765
4766 __ Bind(&less);
4767 __ movl(out, Immediate(-1));
4768
4769 __ Bind(&done);
4770 }
4771
VisitPhi(HPhi * instruction)4772 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
4773 LocationSummary* locations =
4774 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4775 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4776 locations->SetInAt(i, Location::Any());
4777 }
4778 locations->SetOut(Location::Any());
4779 }
4780
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4781 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4782 LOG(FATAL) << "Unreachable";
4783 }
4784
GenerateMemoryBarrier(MemBarrierKind kind)4785 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
4786 /*
4787 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4788 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
4789 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4790 */
4791 switch (kind) {
4792 case MemBarrierKind::kAnyAny: {
4793 MemoryFence();
4794 break;
4795 }
4796 case MemBarrierKind::kAnyStore:
4797 case MemBarrierKind::kLoadAny:
4798 case MemBarrierKind::kStoreStore: {
4799 // nop
4800 break;
4801 }
4802 case MemBarrierKind::kNTStoreStore:
4803 // Non-Temporal Store/Store needs an explicit fence.
4804 MemoryFence(/* non-temporal= */ true);
4805 break;
4806 }
4807 }
4808
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)4809 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
4810 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4811 ArtMethod* method ATTRIBUTE_UNUSED) {
4812 return desired_dispatch_info;
4813 }
4814
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)4815 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
4816 Register temp) {
4817 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
4818 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4819 if (!invoke->GetLocations()->Intrinsified()) {
4820 return location.AsRegister<Register>();
4821 }
4822 // For intrinsics we allow any location, so it may be on the stack.
4823 if (!location.IsRegister()) {
4824 __ movl(temp, Address(ESP, location.GetStackIndex()));
4825 return temp;
4826 }
4827 // For register locations, check if the register was saved. If so, get it from the stack.
4828 // Note: There is a chance that the register was saved but not overwritten, so we could
4829 // save one load. However, since this is just an intrinsic slow path we prefer this
4830 // simple and more robust approach rather that trying to determine if that's the case.
4831 SlowPathCode* slow_path = GetCurrentSlowPath();
4832 DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
4833 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
4834 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
4835 __ movl(temp, Address(ESP, stack_offset));
4836 return temp;
4837 }
4838 return location.AsRegister<Register>();
4839 }
4840
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4841 void CodeGeneratorX86::GenerateStaticOrDirectCall(
4842 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4843 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
4844 switch (invoke->GetMethodLoadKind()) {
4845 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4846 // temp = thread->string_init_entrypoint
4847 uint32_t offset =
4848 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4849 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
4850 break;
4851 }
4852 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4853 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4854 break;
4855 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
4856 DCHECK(GetCompilerOptions().IsBootImage());
4857 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4858 temp.AsRegister<Register>());
4859 __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset));
4860 RecordBootImageMethodPatch(invoke);
4861 break;
4862 }
4863 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
4864 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4865 temp.AsRegister<Register>());
4866 __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
4867 RecordBootImageRelRoPatch(
4868 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(),
4869 GetBootImageOffset(invoke));
4870 break;
4871 }
4872 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
4873 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4874 temp.AsRegister<Register>());
4875 __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
4876 RecordMethodBssEntryPatch(invoke);
4877 break;
4878 }
4879 case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
4880 __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
4881 break;
4882 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
4883 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
4884 return; // No code pointer retrieval; the runtime performs the call directly.
4885 }
4886 }
4887
4888 switch (invoke->GetCodePtrLocation()) {
4889 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
4890 __ call(GetFrameEntryLabel());
4891 break;
4892 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
4893 // (callee_method + offset_of_quick_compiled_code)()
4894 __ call(Address(callee_method.AsRegister<Register>(),
4895 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
4896 kX86PointerSize).Int32Value()));
4897 break;
4898 }
4899 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4900
4901 DCHECK(!IsLeafMethod());
4902 }
4903
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)4904 void CodeGeneratorX86::GenerateVirtualCall(
4905 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
4906 Register temp = temp_in.AsRegister<Register>();
4907 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4908 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
4909
4910 // Use the calling convention instead of the location of the receiver, as
4911 // intrinsics may have put the receiver in a different register. In the intrinsics
4912 // slow path, the arguments have been moved to the right place, so here we are
4913 // guaranteed that the receiver is the first register of the calling convention.
4914 InvokeDexCallingConvention calling_convention;
4915 Register receiver = calling_convention.GetRegisterAt(0);
4916 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4917 // /* HeapReference<Class> */ temp = receiver->klass_
4918 __ movl(temp, Address(receiver, class_offset));
4919 MaybeRecordImplicitNullCheck(invoke);
4920 // Instead of simply (possibly) unpoisoning `temp` here, we should
4921 // emit a read barrier for the previous class reference load.
4922 // However this is not required in practice, as this is an
4923 // intermediate/temporary reference and because the current
4924 // concurrent copying collector keeps the from-space memory
4925 // intact/accessible until the end of the marking phase (the
4926 // concurrent copying collector may not in the future).
4927 __ MaybeUnpoisonHeapReference(temp);
4928 // temp = temp->GetMethodAt(method_offset);
4929 __ movl(temp, Address(temp, method_offset));
4930 // call temp->GetEntryPoint();
4931 __ call(Address(
4932 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
4933 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4934 }
4935
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)4936 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
4937 uint32_t intrinsic_data) {
4938 boot_image_intrinsic_patches_.emplace_back(
4939 method_address, /* target_dex_file= */ nullptr, intrinsic_data);
4940 __ Bind(&boot_image_intrinsic_patches_.back().label);
4941 }
4942
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)4943 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
4944 uint32_t boot_image_offset) {
4945 boot_image_method_patches_.emplace_back(
4946 method_address, /* target_dex_file= */ nullptr, boot_image_offset);
4947 __ Bind(&boot_image_method_patches_.back().label);
4948 }
4949
RecordBootImageMethodPatch(HInvokeStaticOrDirect * invoke)4950 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
4951 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
4952 HX86ComputeBaseMethodAddress* method_address =
4953 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
4954 boot_image_method_patches_.emplace_back(
4955 method_address, invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
4956 __ Bind(&boot_image_method_patches_.back().label);
4957 }
4958
RecordMethodBssEntryPatch(HInvokeStaticOrDirect * invoke)4959 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
4960 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
4961 HX86ComputeBaseMethodAddress* method_address =
4962 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
4963 // Add the patch entry and bind its label at the end of the instruction.
4964 method_bss_entry_patches_.emplace_back(
4965 method_address, &GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
4966 __ Bind(&method_bss_entry_patches_.back().label);
4967 }
4968
RecordBootImageTypePatch(HLoadClass * load_class)4969 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
4970 HX86ComputeBaseMethodAddress* method_address =
4971 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
4972 boot_image_type_patches_.emplace_back(
4973 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
4974 __ Bind(&boot_image_type_patches_.back().label);
4975 }
4976
NewTypeBssEntryPatch(HLoadClass * load_class)4977 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
4978 HX86ComputeBaseMethodAddress* method_address =
4979 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
4980 type_bss_entry_patches_.emplace_back(
4981 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
4982 return &type_bss_entry_patches_.back().label;
4983 }
4984
RecordBootImageStringPatch(HLoadString * load_string)4985 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
4986 HX86ComputeBaseMethodAddress* method_address =
4987 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
4988 boot_image_string_patches_.emplace_back(
4989 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
4990 __ Bind(&boot_image_string_patches_.back().label);
4991 }
4992
NewStringBssEntryPatch(HLoadString * load_string)4993 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
4994 HX86ComputeBaseMethodAddress* method_address =
4995 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
4996 string_bss_entry_patches_.emplace_back(
4997 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
4998 return &string_bss_entry_patches_.back().label;
4999 }
5000
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5001 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5002 uint32_t boot_image_reference,
5003 HInvokeStaticOrDirect* invoke) {
5004 if (GetCompilerOptions().IsBootImage()) {
5005 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5006 HX86ComputeBaseMethodAddress* method_address =
5007 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5008 DCHECK(method_address != nullptr);
5009 Register method_address_reg =
5010 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5011 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
5012 RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5013 } else if (GetCompilerOptions().GetCompilePic()) {
5014 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5015 HX86ComputeBaseMethodAddress* method_address =
5016 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5017 DCHECK(method_address != nullptr);
5018 Register method_address_reg =
5019 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5020 __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
5021 RecordBootImageRelRoPatch(method_address, boot_image_reference);
5022 } else {
5023 DCHECK(Runtime::Current()->UseJitCompilation());
5024 gc::Heap* heap = Runtime::Current()->GetHeap();
5025 DCHECK(!heap->GetBootImageSpaces().empty());
5026 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5027 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5028 }
5029 }
5030
AllocateInstanceForIntrinsic(HInvokeStaticOrDirect * invoke,uint32_t boot_image_offset)5031 void CodeGeneratorX86::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
5032 uint32_t boot_image_offset) {
5033 DCHECK(invoke->IsStatic());
5034 InvokeRuntimeCallingConvention calling_convention;
5035 Register argument = calling_convention.GetRegisterAt(0);
5036 if (GetCompilerOptions().IsBootImage()) {
5037 DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
5038 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5039 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5040 HX86ComputeBaseMethodAddress* method_address =
5041 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5042 DCHECK(method_address != nullptr);
5043 Register method_address_reg =
5044 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5045 __ leal(argument, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
5046 MethodReference target_method = invoke->GetTargetMethod();
5047 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5048 boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5049 __ Bind(&boot_image_type_patches_.back().label);
5050 } else {
5051 LoadBootImageAddress(argument, boot_image_offset, invoke);
5052 }
5053 InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
5054 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5055 }
5056
5057 // The label points to the end of the "movl" or another instruction but the literal offset
5058 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5059 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5060
5061 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5062 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5063 const ArenaDeque<X86PcRelativePatchInfo>& infos,
5064 ArenaVector<linker::LinkerPatch>* linker_patches) {
5065 for (const X86PcRelativePatchInfo& info : infos) {
5066 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5067 linker_patches->push_back(Factory(literal_offset,
5068 info.target_dex_file,
5069 GetMethodAddressOffset(info.method_address),
5070 info.offset_or_index));
5071 }
5072 }
5073
5074 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5075 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5076 const DexFile* target_dex_file,
5077 uint32_t pc_insn_offset,
5078 uint32_t boot_image_offset) {
5079 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
5080 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5081 }
5082
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5083 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5084 DCHECK(linker_patches->empty());
5085 size_t size =
5086 boot_image_method_patches_.size() +
5087 method_bss_entry_patches_.size() +
5088 boot_image_type_patches_.size() +
5089 type_bss_entry_patches_.size() +
5090 boot_image_string_patches_.size() +
5091 string_bss_entry_patches_.size() +
5092 boot_image_intrinsic_patches_.size();
5093 linker_patches->reserve(size);
5094 if (GetCompilerOptions().IsBootImage()) {
5095 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5096 boot_image_method_patches_, linker_patches);
5097 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5098 boot_image_type_patches_, linker_patches);
5099 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5100 boot_image_string_patches_, linker_patches);
5101 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5102 boot_image_intrinsic_patches_, linker_patches);
5103 } else {
5104 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
5105 boot_image_method_patches_, linker_patches);
5106 DCHECK(boot_image_type_patches_.empty());
5107 DCHECK(boot_image_string_patches_.empty());
5108 DCHECK(boot_image_intrinsic_patches_.empty());
5109 }
5110 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5111 method_bss_entry_patches_, linker_patches);
5112 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5113 type_bss_entry_patches_, linker_patches);
5114 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5115 string_bss_entry_patches_, linker_patches);
5116 DCHECK_EQ(size, linker_patches->size());
5117 }
5118
MarkGCCard(Register temp,Register card,Register object,Register value,bool value_can_be_null)5119 void CodeGeneratorX86::MarkGCCard(Register temp,
5120 Register card,
5121 Register object,
5122 Register value,
5123 bool value_can_be_null) {
5124 NearLabel is_null;
5125 if (value_can_be_null) {
5126 __ testl(value, value);
5127 __ j(kEqual, &is_null);
5128 }
5129 // Load the address of the card table into `card`.
5130 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5131 // Calculate the offset (in the card table) of the card corresponding to
5132 // `object`.
5133 __ movl(temp, object);
5134 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5135 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5136 // `object`'s card.
5137 //
5138 // Register `card` contains the address of the card table. Note that the card
5139 // table's base is biased during its creation so that it always starts at an
5140 // address whose least-significant byte is equal to `kCardDirty` (see
5141 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5142 // below writes the `kCardDirty` (byte) value into the `object`'s card
5143 // (located at `card + object >> kCardShift`).
5144 //
5145 // This dual use of the value in register `card` (1. to calculate the location
5146 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5147 // (no need to explicitly load `kCardDirty` as an immediate value).
5148 __ movb(Address(temp, card, TIMES_1, 0),
5149 X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
5150 if (value_can_be_null) {
5151 __ Bind(&is_null);
5152 }
5153 }
5154
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5155 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
5156 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5157
5158 bool object_field_get_with_read_barrier =
5159 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5160 LocationSummary* locations =
5161 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5162 kEmitCompilerReadBarrier
5163 ? LocationSummary::kCallOnSlowPath
5164 : LocationSummary::kNoCall);
5165 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5166 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5167 }
5168 locations->SetInAt(0, Location::RequiresRegister());
5169
5170 if (DataType::IsFloatingPointType(instruction->GetType())) {
5171 locations->SetOut(Location::RequiresFpuRegister());
5172 } else {
5173 // The output overlaps in case of long: we don't want the low move
5174 // to overwrite the object's location. Likewise, in the case of
5175 // an object field get with read barriers enabled, we do not want
5176 // the move to overwrite the object's location, as we need it to emit
5177 // the read barrier.
5178 locations->SetOut(
5179 Location::RequiresRegister(),
5180 (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64) ?
5181 Location::kOutputOverlap :
5182 Location::kNoOutputOverlap);
5183 }
5184
5185 if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
5186 // Long values can be loaded atomically into an XMM using movsd.
5187 // So we use an XMM register as a temp to achieve atomicity (first
5188 // load the temp into the XMM and then copy the XMM into the
5189 // output, 32 bits at a time).
5190 locations->AddTemp(Location::RequiresFpuRegister());
5191 }
5192 }
5193
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5194 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
5195 const FieldInfo& field_info) {
5196 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5197
5198 LocationSummary* locations = instruction->GetLocations();
5199 Location base_loc = locations->InAt(0);
5200 Register base = base_loc.AsRegister<Register>();
5201 Location out = locations->Out();
5202 bool is_volatile = field_info.IsVolatile();
5203 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5204 DataType::Type load_type = instruction->GetType();
5205 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5206
5207 switch (load_type) {
5208 case DataType::Type::kBool:
5209 case DataType::Type::kUint8: {
5210 __ movzxb(out.AsRegister<Register>(), Address(base, offset));
5211 break;
5212 }
5213
5214 case DataType::Type::kInt8: {
5215 __ movsxb(out.AsRegister<Register>(), Address(base, offset));
5216 break;
5217 }
5218
5219 case DataType::Type::kUint16: {
5220 __ movzxw(out.AsRegister<Register>(), Address(base, offset));
5221 break;
5222 }
5223
5224 case DataType::Type::kInt16: {
5225 __ movsxw(out.AsRegister<Register>(), Address(base, offset));
5226 break;
5227 }
5228
5229 case DataType::Type::kInt32:
5230 __ movl(out.AsRegister<Register>(), Address(base, offset));
5231 break;
5232
5233 case DataType::Type::kReference: {
5234 // /* HeapReference<Object> */ out = *(base + offset)
5235 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5236 // Note that a potential implicit null check is handled in this
5237 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
5238 codegen_->GenerateFieldLoadWithBakerReadBarrier(
5239 instruction, out, base, offset, /* needs_null_check= */ true);
5240 if (is_volatile) {
5241 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5242 }
5243 } else {
5244 __ movl(out.AsRegister<Register>(), Address(base, offset));
5245 codegen_->MaybeRecordImplicitNullCheck(instruction);
5246 if (is_volatile) {
5247 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5248 }
5249 // If read barriers are enabled, emit read barriers other than
5250 // Baker's using a slow path (and also unpoison the loaded
5251 // reference, if heap poisoning is enabled).
5252 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5253 }
5254 break;
5255 }
5256
5257 case DataType::Type::kInt64: {
5258 if (is_volatile) {
5259 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5260 __ movsd(temp, Address(base, offset));
5261 codegen_->MaybeRecordImplicitNullCheck(instruction);
5262 __ movd(out.AsRegisterPairLow<Register>(), temp);
5263 __ psrlq(temp, Immediate(32));
5264 __ movd(out.AsRegisterPairHigh<Register>(), temp);
5265 } else {
5266 DCHECK_NE(base, out.AsRegisterPairLow<Register>());
5267 __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset));
5268 codegen_->MaybeRecordImplicitNullCheck(instruction);
5269 __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset));
5270 }
5271 break;
5272 }
5273
5274 case DataType::Type::kFloat32: {
5275 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5276 break;
5277 }
5278
5279 case DataType::Type::kFloat64: {
5280 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5281 break;
5282 }
5283
5284 case DataType::Type::kUint32:
5285 case DataType::Type::kUint64:
5286 case DataType::Type::kVoid:
5287 LOG(FATAL) << "Unreachable type " << load_type;
5288 UNREACHABLE();
5289 }
5290
5291 if (load_type == DataType::Type::kReference || load_type == DataType::Type::kInt64) {
5292 // Potential implicit null checks, in the case of reference or
5293 // long fields, are handled in the previous switch statement.
5294 } else {
5295 codegen_->MaybeRecordImplicitNullCheck(instruction);
5296 }
5297
5298 if (is_volatile) {
5299 if (load_type == DataType::Type::kReference) {
5300 // Memory barriers, in the case of references, are also handled
5301 // in the previous switch statement.
5302 } else {
5303 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5304 }
5305 }
5306 }
5307
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5308 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
5309 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5310
5311 LocationSummary* locations =
5312 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5313 locations->SetInAt(0, Location::RequiresRegister());
5314 bool is_volatile = field_info.IsVolatile();
5315 DataType::Type field_type = field_info.GetFieldType();
5316 bool is_byte_type = DataType::Size(field_type) == 1u;
5317
5318 // The register allocator does not support multiple
5319 // inputs that die at entry with one in a specific register.
5320 if (is_byte_type) {
5321 // Ensure the value is in a byte register.
5322 locations->SetInAt(1, Location::RegisterLocation(EAX));
5323 } else if (DataType::IsFloatingPointType(field_type)) {
5324 if (is_volatile && field_type == DataType::Type::kFloat64) {
5325 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5326 locations->SetInAt(1, Location::RequiresFpuRegister());
5327 } else {
5328 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5329 }
5330 } else if (is_volatile && field_type == DataType::Type::kInt64) {
5331 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5332 locations->SetInAt(1, Location::RequiresRegister());
5333
5334 // 64bits value can be atomically written to an address with movsd and an XMM register.
5335 // We need two XMM registers because there's no easier way to (bit) copy a register pair
5336 // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
5337 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
5338 // isolated cases when we need this it isn't worth adding the extra complexity.
5339 locations->AddTemp(Location::RequiresFpuRegister());
5340 locations->AddTemp(Location::RequiresFpuRegister());
5341 } else {
5342 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5343
5344 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5345 // Temporary registers for the write barrier.
5346 locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too.
5347 // Ensure the card is in a byte register.
5348 locations->AddTemp(Location::RegisterLocation(ECX));
5349 }
5350 }
5351 }
5352
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5353 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
5354 const FieldInfo& field_info,
5355 bool value_can_be_null) {
5356 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5357
5358 LocationSummary* locations = instruction->GetLocations();
5359 Register base = locations->InAt(0).AsRegister<Register>();
5360 Location value = locations->InAt(1);
5361 bool is_volatile = field_info.IsVolatile();
5362 DataType::Type field_type = field_info.GetFieldType();
5363 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5364 bool needs_write_barrier =
5365 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5366
5367 if (is_volatile) {
5368 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5369 }
5370
5371 bool maybe_record_implicit_null_check_done = false;
5372
5373 switch (field_type) {
5374 case DataType::Type::kBool:
5375 case DataType::Type::kUint8:
5376 case DataType::Type::kInt8: {
5377 __ movb(Address(base, offset), value.AsRegister<ByteRegister>());
5378 break;
5379 }
5380
5381 case DataType::Type::kUint16:
5382 case DataType::Type::kInt16: {
5383 if (value.IsConstant()) {
5384 __ movw(Address(base, offset),
5385 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5386 } else {
5387 __ movw(Address(base, offset), value.AsRegister<Register>());
5388 }
5389 break;
5390 }
5391
5392 case DataType::Type::kInt32:
5393 case DataType::Type::kReference: {
5394 if (kPoisonHeapReferences && needs_write_barrier) {
5395 // Note that in the case where `value` is a null reference,
5396 // we do not enter this block, as the reference does not
5397 // need poisoning.
5398 DCHECK_EQ(field_type, DataType::Type::kReference);
5399 Register temp = locations->GetTemp(0).AsRegister<Register>();
5400 __ movl(temp, value.AsRegister<Register>());
5401 __ PoisonHeapReference(temp);
5402 __ movl(Address(base, offset), temp);
5403 } else if (value.IsConstant()) {
5404 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5405 __ movl(Address(base, offset), Immediate(v));
5406 } else {
5407 DCHECK(value.IsRegister()) << value;
5408 __ movl(Address(base, offset), value.AsRegister<Register>());
5409 }
5410 break;
5411 }
5412
5413 case DataType::Type::kInt64: {
5414 if (is_volatile) {
5415 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5416 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
5417 __ movd(temp1, value.AsRegisterPairLow<Register>());
5418 __ movd(temp2, value.AsRegisterPairHigh<Register>());
5419 __ punpckldq(temp1, temp2);
5420 __ movsd(Address(base, offset), temp1);
5421 codegen_->MaybeRecordImplicitNullCheck(instruction);
5422 } else if (value.IsConstant()) {
5423 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5424 __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5425 codegen_->MaybeRecordImplicitNullCheck(instruction);
5426 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5427 } else {
5428 __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
5429 codegen_->MaybeRecordImplicitNullCheck(instruction);
5430 __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
5431 }
5432 maybe_record_implicit_null_check_done = true;
5433 break;
5434 }
5435
5436 case DataType::Type::kFloat32: {
5437 if (value.IsConstant()) {
5438 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5439 __ movl(Address(base, offset), Immediate(v));
5440 } else {
5441 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5442 }
5443 break;
5444 }
5445
5446 case DataType::Type::kFloat64: {
5447 if (value.IsConstant()) {
5448 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5449 __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5450 codegen_->MaybeRecordImplicitNullCheck(instruction);
5451 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5452 maybe_record_implicit_null_check_done = true;
5453 } else {
5454 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5455 }
5456 break;
5457 }
5458
5459 case DataType::Type::kUint32:
5460 case DataType::Type::kUint64:
5461 case DataType::Type::kVoid:
5462 LOG(FATAL) << "Unreachable type " << field_type;
5463 UNREACHABLE();
5464 }
5465
5466 if (!maybe_record_implicit_null_check_done) {
5467 codegen_->MaybeRecordImplicitNullCheck(instruction);
5468 }
5469
5470 if (needs_write_barrier) {
5471 Register temp = locations->GetTemp(0).AsRegister<Register>();
5472 Register card = locations->GetTemp(1).AsRegister<Register>();
5473 codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
5474 }
5475
5476 if (is_volatile) {
5477 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5478 }
5479 }
5480
VisitStaticFieldGet(HStaticFieldGet * instruction)5481 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5482 HandleFieldGet(instruction, instruction->GetFieldInfo());
5483 }
5484
VisitStaticFieldGet(HStaticFieldGet * instruction)5485 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5486 HandleFieldGet(instruction, instruction->GetFieldInfo());
5487 }
5488
VisitStaticFieldSet(HStaticFieldSet * instruction)5489 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5490 HandleFieldSet(instruction, instruction->GetFieldInfo());
5491 }
5492
VisitStaticFieldSet(HStaticFieldSet * instruction)5493 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5494 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5495 }
5496
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5497 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5498 HandleFieldSet(instruction, instruction->GetFieldInfo());
5499 }
5500
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5501 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5502 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5503 }
5504
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5505 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5506 HandleFieldGet(instruction, instruction->GetFieldInfo());
5507 }
5508
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5509 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5510 HandleFieldGet(instruction, instruction->GetFieldInfo());
5511 }
5512
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5513 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
5514 HUnresolvedInstanceFieldGet* instruction) {
5515 FieldAccessCallingConventionX86 calling_convention;
5516 codegen_->CreateUnresolvedFieldLocationSummary(
5517 instruction, instruction->GetFieldType(), calling_convention);
5518 }
5519
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5520 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
5521 HUnresolvedInstanceFieldGet* instruction) {
5522 FieldAccessCallingConventionX86 calling_convention;
5523 codegen_->GenerateUnresolvedFieldAccess(instruction,
5524 instruction->GetFieldType(),
5525 instruction->GetFieldIndex(),
5526 instruction->GetDexPc(),
5527 calling_convention);
5528 }
5529
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5530 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
5531 HUnresolvedInstanceFieldSet* instruction) {
5532 FieldAccessCallingConventionX86 calling_convention;
5533 codegen_->CreateUnresolvedFieldLocationSummary(
5534 instruction, instruction->GetFieldType(), calling_convention);
5535 }
5536
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5537 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
5538 HUnresolvedInstanceFieldSet* instruction) {
5539 FieldAccessCallingConventionX86 calling_convention;
5540 codegen_->GenerateUnresolvedFieldAccess(instruction,
5541 instruction->GetFieldType(),
5542 instruction->GetFieldIndex(),
5543 instruction->GetDexPc(),
5544 calling_convention);
5545 }
5546
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5547 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
5548 HUnresolvedStaticFieldGet* instruction) {
5549 FieldAccessCallingConventionX86 calling_convention;
5550 codegen_->CreateUnresolvedFieldLocationSummary(
5551 instruction, instruction->GetFieldType(), calling_convention);
5552 }
5553
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5554 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
5555 HUnresolvedStaticFieldGet* instruction) {
5556 FieldAccessCallingConventionX86 calling_convention;
5557 codegen_->GenerateUnresolvedFieldAccess(instruction,
5558 instruction->GetFieldType(),
5559 instruction->GetFieldIndex(),
5560 instruction->GetDexPc(),
5561 calling_convention);
5562 }
5563
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5564 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
5565 HUnresolvedStaticFieldSet* instruction) {
5566 FieldAccessCallingConventionX86 calling_convention;
5567 codegen_->CreateUnresolvedFieldLocationSummary(
5568 instruction, instruction->GetFieldType(), calling_convention);
5569 }
5570
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5571 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
5572 HUnresolvedStaticFieldSet* instruction) {
5573 FieldAccessCallingConventionX86 calling_convention;
5574 codegen_->GenerateUnresolvedFieldAccess(instruction,
5575 instruction->GetFieldType(),
5576 instruction->GetFieldIndex(),
5577 instruction->GetDexPc(),
5578 calling_convention);
5579 }
5580
VisitNullCheck(HNullCheck * instruction)5581 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
5582 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5583 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5584 ? Location::RequiresRegister()
5585 : Location::Any();
5586 locations->SetInAt(0, loc);
5587 }
5588
GenerateImplicitNullCheck(HNullCheck * instruction)5589 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
5590 if (CanMoveNullCheckToUser(instruction)) {
5591 return;
5592 }
5593 LocationSummary* locations = instruction->GetLocations();
5594 Location obj = locations->InAt(0);
5595
5596 __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
5597 RecordPcInfo(instruction, instruction->GetDexPc());
5598 }
5599
GenerateExplicitNullCheck(HNullCheck * instruction)5600 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
5601 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
5602 AddSlowPath(slow_path);
5603
5604 LocationSummary* locations = instruction->GetLocations();
5605 Location obj = locations->InAt(0);
5606
5607 if (obj.IsRegister()) {
5608 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
5609 } else if (obj.IsStackSlot()) {
5610 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
5611 } else {
5612 DCHECK(obj.IsConstant()) << obj;
5613 DCHECK(obj.GetConstant()->IsNullConstant());
5614 __ jmp(slow_path->GetEntryLabel());
5615 return;
5616 }
5617 __ j(kEqual, slow_path->GetEntryLabel());
5618 }
5619
VisitNullCheck(HNullCheck * instruction)5620 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
5621 codegen_->GenerateNullCheck(instruction);
5622 }
5623
VisitArrayGet(HArrayGet * instruction)5624 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
5625 bool object_array_get_with_read_barrier =
5626 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5627 LocationSummary* locations =
5628 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5629 object_array_get_with_read_barrier
5630 ? LocationSummary::kCallOnSlowPath
5631 : LocationSummary::kNoCall);
5632 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5633 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5634 }
5635 locations->SetInAt(0, Location::RequiresRegister());
5636 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5637 if (DataType::IsFloatingPointType(instruction->GetType())) {
5638 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5639 } else {
5640 // The output overlaps in case of long: we don't want the low move
5641 // to overwrite the array's location. Likewise, in the case of an
5642 // object array get with read barriers enabled, we do not want the
5643 // move to overwrite the array's location, as we need it to emit
5644 // the read barrier.
5645 locations->SetOut(
5646 Location::RequiresRegister(),
5647 (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
5648 ? Location::kOutputOverlap
5649 : Location::kNoOutputOverlap);
5650 }
5651 }
5652
VisitArrayGet(HArrayGet * instruction)5653 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
5654 LocationSummary* locations = instruction->GetLocations();
5655 Location obj_loc = locations->InAt(0);
5656 Register obj = obj_loc.AsRegister<Register>();
5657 Location index = locations->InAt(1);
5658 Location out_loc = locations->Out();
5659 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5660
5661 DataType::Type type = instruction->GetType();
5662 switch (type) {
5663 case DataType::Type::kBool:
5664 case DataType::Type::kUint8: {
5665 Register out = out_loc.AsRegister<Register>();
5666 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5667 break;
5668 }
5669
5670 case DataType::Type::kInt8: {
5671 Register out = out_loc.AsRegister<Register>();
5672 __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5673 break;
5674 }
5675
5676 case DataType::Type::kUint16: {
5677 Register out = out_loc.AsRegister<Register>();
5678 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5679 // Branch cases into compressed and uncompressed for each index's type.
5680 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5681 NearLabel done, not_compressed;
5682 __ testb(Address(obj, count_offset), Immediate(1));
5683 codegen_->MaybeRecordImplicitNullCheck(instruction);
5684 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5685 "Expecting 0=compressed, 1=uncompressed");
5686 __ j(kNotZero, ¬_compressed);
5687 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5688 __ jmp(&done);
5689 __ Bind(¬_compressed);
5690 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5691 __ Bind(&done);
5692 } else {
5693 // Common case for charAt of array of char or when string compression's
5694 // feature is turned off.
5695 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5696 }
5697 break;
5698 }
5699
5700 case DataType::Type::kInt16: {
5701 Register out = out_loc.AsRegister<Register>();
5702 __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5703 break;
5704 }
5705
5706 case DataType::Type::kInt32: {
5707 Register out = out_loc.AsRegister<Register>();
5708 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5709 break;
5710 }
5711
5712 case DataType::Type::kReference: {
5713 static_assert(
5714 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5715 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5716 // /* HeapReference<Object> */ out =
5717 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5718 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5719 // Note that a potential implicit null check is handled in this
5720 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
5721 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5722 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5723 } else {
5724 Register out = out_loc.AsRegister<Register>();
5725 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5726 codegen_->MaybeRecordImplicitNullCheck(instruction);
5727 // If read barriers are enabled, emit read barriers other than
5728 // Baker's using a slow path (and also unpoison the loaded
5729 // reference, if heap poisoning is enabled).
5730 if (index.IsConstant()) {
5731 uint32_t offset =
5732 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5733 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5734 } else {
5735 codegen_->MaybeGenerateReadBarrierSlow(
5736 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5737 }
5738 }
5739 break;
5740 }
5741
5742 case DataType::Type::kInt64: {
5743 DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
5744 __ movl(out_loc.AsRegisterPairLow<Register>(),
5745 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5746 codegen_->MaybeRecordImplicitNullCheck(instruction);
5747 __ movl(out_loc.AsRegisterPairHigh<Register>(),
5748 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize));
5749 break;
5750 }
5751
5752 case DataType::Type::kFloat32: {
5753 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5754 __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5755 break;
5756 }
5757
5758 case DataType::Type::kFloat64: {
5759 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5760 __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5761 break;
5762 }
5763
5764 case DataType::Type::kUint32:
5765 case DataType::Type::kUint64:
5766 case DataType::Type::kVoid:
5767 LOG(FATAL) << "Unreachable type " << type;
5768 UNREACHABLE();
5769 }
5770
5771 if (type == DataType::Type::kReference || type == DataType::Type::kInt64) {
5772 // Potential implicit null checks, in the case of reference or
5773 // long arrays, are handled in the previous switch statement.
5774 } else {
5775 codegen_->MaybeRecordImplicitNullCheck(instruction);
5776 }
5777 }
5778
VisitArraySet(HArraySet * instruction)5779 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
5780 DataType::Type value_type = instruction->GetComponentType();
5781
5782 bool needs_write_barrier =
5783 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5784 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
5785
5786 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5787 instruction,
5788 may_need_runtime_call_for_type_check ?
5789 LocationSummary::kCallOnSlowPath :
5790 LocationSummary::kNoCall);
5791
5792 bool is_byte_type = DataType::Size(value_type) == 1u;
5793 // We need the inputs to be different than the output in case of long operation.
5794 // In case of a byte operation, the register allocator does not support multiple
5795 // inputs that die at entry with one in a specific register.
5796 locations->SetInAt(0, Location::RequiresRegister());
5797 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5798 if (is_byte_type) {
5799 // Ensure the value is in a byte register.
5800 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
5801 } else if (DataType::IsFloatingPointType(value_type)) {
5802 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5803 } else {
5804 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5805 }
5806 if (needs_write_barrier) {
5807 // Temporary registers for the write barrier.
5808 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
5809 // Ensure the card is in a byte register.
5810 locations->AddTemp(Location::RegisterLocation(ECX));
5811 }
5812 }
5813
VisitArraySet(HArraySet * instruction)5814 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
5815 LocationSummary* locations = instruction->GetLocations();
5816 Location array_loc = locations->InAt(0);
5817 Register array = array_loc.AsRegister<Register>();
5818 Location index = locations->InAt(1);
5819 Location value = locations->InAt(2);
5820 DataType::Type value_type = instruction->GetComponentType();
5821 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5822 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5823 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5824 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
5825 bool needs_write_barrier =
5826 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5827
5828 switch (value_type) {
5829 case DataType::Type::kBool:
5830 case DataType::Type::kUint8:
5831 case DataType::Type::kInt8: {
5832 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5833 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
5834 if (value.IsRegister()) {
5835 __ movb(address, value.AsRegister<ByteRegister>());
5836 } else {
5837 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5838 }
5839 codegen_->MaybeRecordImplicitNullCheck(instruction);
5840 break;
5841 }
5842
5843 case DataType::Type::kUint16:
5844 case DataType::Type::kInt16: {
5845 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5846 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
5847 if (value.IsRegister()) {
5848 __ movw(address, value.AsRegister<Register>());
5849 } else {
5850 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5851 }
5852 codegen_->MaybeRecordImplicitNullCheck(instruction);
5853 break;
5854 }
5855
5856 case DataType::Type::kReference: {
5857 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5858 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5859
5860 if (!value.IsRegister()) {
5861 // Just setting null.
5862 DCHECK(instruction->InputAt(2)->IsNullConstant());
5863 DCHECK(value.IsConstant()) << value;
5864 __ movl(address, Immediate(0));
5865 codegen_->MaybeRecordImplicitNullCheck(instruction);
5866 DCHECK(!needs_write_barrier);
5867 DCHECK(!may_need_runtime_call_for_type_check);
5868 break;
5869 }
5870
5871 DCHECK(needs_write_barrier);
5872 Register register_value = value.AsRegister<Register>();
5873 // We cannot use a NearLabel for `done`, as its range may be too
5874 // short when Baker read barriers are enabled.
5875 Label done;
5876 NearLabel not_null, do_put;
5877 SlowPathCode* slow_path = nullptr;
5878 Location temp_loc = locations->GetTemp(0);
5879 Register temp = temp_loc.AsRegister<Register>();
5880 if (may_need_runtime_call_for_type_check) {
5881 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
5882 codegen_->AddSlowPath(slow_path);
5883 if (instruction->GetValueCanBeNull()) {
5884 __ testl(register_value, register_value);
5885 __ j(kNotEqual, ¬_null);
5886 __ movl(address, Immediate(0));
5887 codegen_->MaybeRecordImplicitNullCheck(instruction);
5888 __ jmp(&done);
5889 __ Bind(¬_null);
5890 }
5891
5892 // Note that when Baker read barriers are enabled, the type
5893 // checks are performed without read barriers. This is fine,
5894 // even in the case where a class object is in the from-space
5895 // after the flip, as a comparison involving such a type would
5896 // not produce a false positive; it may of course produce a
5897 // false negative, in which case we would take the ArraySet
5898 // slow path.
5899
5900 // /* HeapReference<Class> */ temp = array->klass_
5901 __ movl(temp, Address(array, class_offset));
5902 codegen_->MaybeRecordImplicitNullCheck(instruction);
5903 __ MaybeUnpoisonHeapReference(temp);
5904
5905 // /* HeapReference<Class> */ temp = temp->component_type_
5906 __ movl(temp, Address(temp, component_offset));
5907 // If heap poisoning is enabled, no need to unpoison `temp`
5908 // nor the object reference in `register_value->klass`, as
5909 // we are comparing two poisoned references.
5910 __ cmpl(temp, Address(register_value, class_offset));
5911
5912 if (instruction->StaticTypeOfArrayIsObjectArray()) {
5913 __ j(kEqual, &do_put);
5914 // If heap poisoning is enabled, the `temp` reference has
5915 // not been unpoisoned yet; unpoison it now.
5916 __ MaybeUnpoisonHeapReference(temp);
5917
5918 // If heap poisoning is enabled, no need to unpoison the
5919 // heap reference loaded below, as it is only used for a
5920 // comparison with null.
5921 __ cmpl(Address(temp, super_offset), Immediate(0));
5922 __ j(kNotEqual, slow_path->GetEntryLabel());
5923 __ Bind(&do_put);
5924 } else {
5925 __ j(kNotEqual, slow_path->GetEntryLabel());
5926 }
5927 }
5928
5929 if (kPoisonHeapReferences) {
5930 __ movl(temp, register_value);
5931 __ PoisonHeapReference(temp);
5932 __ movl(address, temp);
5933 } else {
5934 __ movl(address, register_value);
5935 }
5936 if (!may_need_runtime_call_for_type_check) {
5937 codegen_->MaybeRecordImplicitNullCheck(instruction);
5938 }
5939
5940 Register card = locations->GetTemp(1).AsRegister<Register>();
5941 codegen_->MarkGCCard(
5942 temp, card, array, value.AsRegister<Register>(), instruction->GetValueCanBeNull());
5943 __ Bind(&done);
5944
5945 if (slow_path != nullptr) {
5946 __ Bind(slow_path->GetExitLabel());
5947 }
5948
5949 break;
5950 }
5951
5952 case DataType::Type::kInt32: {
5953 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5954 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5955 if (value.IsRegister()) {
5956 __ movl(address, value.AsRegister<Register>());
5957 } else {
5958 DCHECK(value.IsConstant()) << value;
5959 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5960 __ movl(address, Immediate(v));
5961 }
5962 codegen_->MaybeRecordImplicitNullCheck(instruction);
5963 break;
5964 }
5965
5966 case DataType::Type::kInt64: {
5967 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5968 if (value.IsRegisterPair()) {
5969 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
5970 value.AsRegisterPairLow<Register>());
5971 codegen_->MaybeRecordImplicitNullCheck(instruction);
5972 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
5973 value.AsRegisterPairHigh<Register>());
5974 } else {
5975 DCHECK(value.IsConstant());
5976 int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
5977 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
5978 Immediate(Low32Bits(val)));
5979 codegen_->MaybeRecordImplicitNullCheck(instruction);
5980 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
5981 Immediate(High32Bits(val)));
5982 }
5983 break;
5984 }
5985
5986 case DataType::Type::kFloat32: {
5987 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5988 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5989 if (value.IsFpuRegister()) {
5990 __ movss(address, value.AsFpuRegister<XmmRegister>());
5991 } else {
5992 DCHECK(value.IsConstant());
5993 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5994 __ movl(address, Immediate(v));
5995 }
5996 codegen_->MaybeRecordImplicitNullCheck(instruction);
5997 break;
5998 }
5999
6000 case DataType::Type::kFloat64: {
6001 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6002 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6003 if (value.IsFpuRegister()) {
6004 __ movsd(address, value.AsFpuRegister<XmmRegister>());
6005 } else {
6006 DCHECK(value.IsConstant());
6007 Address address_hi =
6008 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6009 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6010 __ movl(address, Immediate(Low32Bits(v)));
6011 codegen_->MaybeRecordImplicitNullCheck(instruction);
6012 __ movl(address_hi, Immediate(High32Bits(v)));
6013 }
6014 break;
6015 }
6016
6017 case DataType::Type::kUint32:
6018 case DataType::Type::kUint64:
6019 case DataType::Type::kVoid:
6020 LOG(FATAL) << "Unreachable type " << instruction->GetType();
6021 UNREACHABLE();
6022 }
6023 }
6024
VisitArrayLength(HArrayLength * instruction)6025 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6026 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6027 locations->SetInAt(0, Location::RequiresRegister());
6028 if (!instruction->IsEmittedAtUseSite()) {
6029 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6030 }
6031 }
6032
VisitArrayLength(HArrayLength * instruction)6033 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6034 if (instruction->IsEmittedAtUseSite()) {
6035 return;
6036 }
6037
6038 LocationSummary* locations = instruction->GetLocations();
6039 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6040 Register obj = locations->InAt(0).AsRegister<Register>();
6041 Register out = locations->Out().AsRegister<Register>();
6042 __ movl(out, Address(obj, offset));
6043 codegen_->MaybeRecordImplicitNullCheck(instruction);
6044 // Mask out most significant bit in case the array is String's array of char.
6045 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6046 __ shrl(out, Immediate(1));
6047 }
6048 }
6049
VisitBoundsCheck(HBoundsCheck * instruction)6050 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6051 RegisterSet caller_saves = RegisterSet::Empty();
6052 InvokeRuntimeCallingConvention calling_convention;
6053 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6054 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6055 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6056 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6057 HInstruction* length = instruction->InputAt(1);
6058 if (!length->IsEmittedAtUseSite()) {
6059 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6060 }
6061 // Need register to see array's length.
6062 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6063 locations->AddTemp(Location::RequiresRegister());
6064 }
6065 }
6066
VisitBoundsCheck(HBoundsCheck * instruction)6067 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6068 const bool is_string_compressed_char_at =
6069 mirror::kUseStringCompression && instruction->IsStringCharAt();
6070 LocationSummary* locations = instruction->GetLocations();
6071 Location index_loc = locations->InAt(0);
6072 Location length_loc = locations->InAt(1);
6073 SlowPathCode* slow_path =
6074 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6075
6076 if (length_loc.IsConstant()) {
6077 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6078 if (index_loc.IsConstant()) {
6079 // BCE will remove the bounds check if we are guarenteed to pass.
6080 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6081 if (index < 0 || index >= length) {
6082 codegen_->AddSlowPath(slow_path);
6083 __ jmp(slow_path->GetEntryLabel());
6084 } else {
6085 // Some optimization after BCE may have generated this, and we should not
6086 // generate a bounds check if it is a valid range.
6087 }
6088 return;
6089 }
6090
6091 // We have to reverse the jump condition because the length is the constant.
6092 Register index_reg = index_loc.AsRegister<Register>();
6093 __ cmpl(index_reg, Immediate(length));
6094 codegen_->AddSlowPath(slow_path);
6095 __ j(kAboveEqual, slow_path->GetEntryLabel());
6096 } else {
6097 HInstruction* array_length = instruction->InputAt(1);
6098 if (array_length->IsEmittedAtUseSite()) {
6099 // Address the length field in the array.
6100 DCHECK(array_length->IsArrayLength());
6101 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6102 Location array_loc = array_length->GetLocations()->InAt(0);
6103 Address array_len(array_loc.AsRegister<Register>(), len_offset);
6104 if (is_string_compressed_char_at) {
6105 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6106 // the string compression flag) with the in-memory length and avoid the temporary.
6107 Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6108 __ movl(length_reg, array_len);
6109 codegen_->MaybeRecordImplicitNullCheck(array_length);
6110 __ shrl(length_reg, Immediate(1));
6111 codegen_->GenerateIntCompare(length_reg, index_loc);
6112 } else {
6113 // Checking bounds for general case:
6114 // Array of char or string's array with feature compression off.
6115 if (index_loc.IsConstant()) {
6116 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6117 __ cmpl(array_len, Immediate(value));
6118 } else {
6119 __ cmpl(array_len, index_loc.AsRegister<Register>());
6120 }
6121 codegen_->MaybeRecordImplicitNullCheck(array_length);
6122 }
6123 } else {
6124 codegen_->GenerateIntCompare(length_loc, index_loc);
6125 }
6126 codegen_->AddSlowPath(slow_path);
6127 __ j(kBelowEqual, slow_path->GetEntryLabel());
6128 }
6129 }
6130
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)6131 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
6132 LOG(FATAL) << "Unreachable";
6133 }
6134
VisitParallelMove(HParallelMove * instruction)6135 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6136 if (instruction->GetNext()->IsSuspendCheck() &&
6137 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6138 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6139 // The back edge will generate the suspend check.
6140 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6141 }
6142
6143 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6144 }
6145
VisitSuspendCheck(HSuspendCheck * instruction)6146 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6147 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6148 instruction, LocationSummary::kCallOnSlowPath);
6149 // In suspend check slow path, usually there are no caller-save registers at all.
6150 // If SIMD instructions are present, however, we force spilling all live SIMD
6151 // registers in full width (since the runtime only saves/restores lower part).
6152 locations->SetCustomSlowPathCallerSaves(
6153 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6154 }
6155
VisitSuspendCheck(HSuspendCheck * instruction)6156 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6157 HBasicBlock* block = instruction->GetBlock();
6158 if (block->GetLoopInformation() != nullptr) {
6159 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6160 // The back edge will generate the suspend check.
6161 return;
6162 }
6163 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6164 // The goto will generate the suspend check.
6165 return;
6166 }
6167 GenerateSuspendCheck(instruction, nullptr);
6168 }
6169
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6170 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
6171 HBasicBlock* successor) {
6172 SuspendCheckSlowPathX86* slow_path =
6173 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
6174 if (slow_path == nullptr) {
6175 slow_path =
6176 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
6177 instruction->SetSlowPath(slow_path);
6178 codegen_->AddSlowPath(slow_path);
6179 if (successor != nullptr) {
6180 DCHECK(successor->IsLoopHeader());
6181 }
6182 } else {
6183 DCHECK_EQ(slow_path->GetSuccessor(), successor);
6184 }
6185
6186 __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
6187 Immediate(0));
6188 if (successor == nullptr) {
6189 __ j(kNotEqual, slow_path->GetEntryLabel());
6190 __ Bind(slow_path->GetReturnLabel());
6191 } else {
6192 __ j(kEqual, codegen_->GetLabelOf(successor));
6193 __ jmp(slow_path->GetEntryLabel());
6194 }
6195 }
6196
GetAssembler() const6197 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
6198 return codegen_->GetAssembler();
6199 }
6200
MoveMemoryToMemory(int dst,int src,int number_of_words)6201 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
6202 ScratchRegisterScope ensure_scratch(
6203 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6204 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6205 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6206
6207 // Now that temp register is available (possibly spilled), move blocks of memory.
6208 for (int i = 0; i < number_of_words; i++) {
6209 __ movl(temp_reg, Address(ESP, src + stack_offset));
6210 __ movl(Address(ESP, dst + stack_offset), temp_reg);
6211 stack_offset += kX86WordSize;
6212 }
6213 }
6214
EmitMove(size_t index)6215 void ParallelMoveResolverX86::EmitMove(size_t index) {
6216 MoveOperands* move = moves_[index];
6217 Location source = move->GetSource();
6218 Location destination = move->GetDestination();
6219
6220 if (source.IsRegister()) {
6221 if (destination.IsRegister()) {
6222 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6223 } else if (destination.IsFpuRegister()) {
6224 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
6225 } else {
6226 DCHECK(destination.IsStackSlot());
6227 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
6228 }
6229 } else if (source.IsRegisterPair()) {
6230 size_t elem_size = DataType::Size(DataType::Type::kInt32);
6231 // Create stack space for 2 elements.
6232 __ subl(ESP, Immediate(2 * elem_size));
6233 __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
6234 __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
6235 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
6236 // And remove the temporary stack space we allocated.
6237 __ addl(ESP, Immediate(2 * elem_size));
6238 } else if (source.IsFpuRegister()) {
6239 if (destination.IsRegister()) {
6240 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
6241 } else if (destination.IsFpuRegister()) {
6242 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6243 } else if (destination.IsRegisterPair()) {
6244 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
6245 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
6246 __ psrlq(src_reg, Immediate(32));
6247 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
6248 } else if (destination.IsStackSlot()) {
6249 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6250 } else if (destination.IsDoubleStackSlot()) {
6251 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6252 } else {
6253 DCHECK(destination.IsSIMDStackSlot());
6254 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6255 }
6256 } else if (source.IsStackSlot()) {
6257 if (destination.IsRegister()) {
6258 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
6259 } else if (destination.IsFpuRegister()) {
6260 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6261 } else {
6262 DCHECK(destination.IsStackSlot());
6263 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6264 }
6265 } else if (source.IsDoubleStackSlot()) {
6266 if (destination.IsRegisterPair()) {
6267 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
6268 __ movl(destination.AsRegisterPairHigh<Register>(),
6269 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
6270 } else if (destination.IsFpuRegister()) {
6271 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6272 } else {
6273 DCHECK(destination.IsDoubleStackSlot()) << destination;
6274 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6275 }
6276 } else if (source.IsSIMDStackSlot()) {
6277 if (destination.IsFpuRegister()) {
6278 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6279 } else {
6280 DCHECK(destination.IsSIMDStackSlot());
6281 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6282 }
6283 } else if (source.IsConstant()) {
6284 HConstant* constant = source.GetConstant();
6285 if (constant->IsIntConstant() || constant->IsNullConstant()) {
6286 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6287 if (destination.IsRegister()) {
6288 if (value == 0) {
6289 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
6290 } else {
6291 __ movl(destination.AsRegister<Register>(), Immediate(value));
6292 }
6293 } else {
6294 DCHECK(destination.IsStackSlot()) << destination;
6295 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
6296 }
6297 } else if (constant->IsFloatConstant()) {
6298 float fp_value = constant->AsFloatConstant()->GetValue();
6299 int32_t value = bit_cast<int32_t, float>(fp_value);
6300 Immediate imm(value);
6301 if (destination.IsFpuRegister()) {
6302 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6303 if (value == 0) {
6304 // Easy handling of 0.0.
6305 __ xorps(dest, dest);
6306 } else {
6307 ScratchRegisterScope ensure_scratch(
6308 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6309 Register temp = static_cast<Register>(ensure_scratch.GetRegister());
6310 __ movl(temp, Immediate(value));
6311 __ movd(dest, temp);
6312 }
6313 } else {
6314 DCHECK(destination.IsStackSlot()) << destination;
6315 __ movl(Address(ESP, destination.GetStackIndex()), imm);
6316 }
6317 } else if (constant->IsLongConstant()) {
6318 int64_t value = constant->AsLongConstant()->GetValue();
6319 int32_t low_value = Low32Bits(value);
6320 int32_t high_value = High32Bits(value);
6321 Immediate low(low_value);
6322 Immediate high(high_value);
6323 if (destination.IsDoubleStackSlot()) {
6324 __ movl(Address(ESP, destination.GetStackIndex()), low);
6325 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6326 } else {
6327 __ movl(destination.AsRegisterPairLow<Register>(), low);
6328 __ movl(destination.AsRegisterPairHigh<Register>(), high);
6329 }
6330 } else {
6331 DCHECK(constant->IsDoubleConstant());
6332 double dbl_value = constant->AsDoubleConstant()->GetValue();
6333 int64_t value = bit_cast<int64_t, double>(dbl_value);
6334 int32_t low_value = Low32Bits(value);
6335 int32_t high_value = High32Bits(value);
6336 Immediate low(low_value);
6337 Immediate high(high_value);
6338 if (destination.IsFpuRegister()) {
6339 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6340 if (value == 0) {
6341 // Easy handling of 0.0.
6342 __ xorpd(dest, dest);
6343 } else {
6344 __ pushl(high);
6345 __ pushl(low);
6346 __ movsd(dest, Address(ESP, 0));
6347 __ addl(ESP, Immediate(8));
6348 }
6349 } else {
6350 DCHECK(destination.IsDoubleStackSlot()) << destination;
6351 __ movl(Address(ESP, destination.GetStackIndex()), low);
6352 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6353 }
6354 }
6355 } else {
6356 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
6357 }
6358 }
6359
Exchange(Register reg,int mem)6360 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
6361 Register suggested_scratch = reg == EAX ? EBX : EAX;
6362 ScratchRegisterScope ensure_scratch(
6363 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6364
6365 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6366 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
6367 __ movl(Address(ESP, mem + stack_offset), reg);
6368 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
6369 }
6370
Exchange32(XmmRegister reg,int mem)6371 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
6372 ScratchRegisterScope ensure_scratch(
6373 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6374
6375 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6376 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6377 __ movl(temp_reg, Address(ESP, mem + stack_offset));
6378 __ movss(Address(ESP, mem + stack_offset), reg);
6379 __ movd(reg, temp_reg);
6380 }
6381
Exchange128(XmmRegister reg,int mem)6382 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
6383 size_t extra_slot = 4 * kX86WordSize;
6384 __ subl(ESP, Immediate(extra_slot));
6385 __ movups(Address(ESP, 0), XmmRegister(reg));
6386 ExchangeMemory(0, mem + extra_slot, 4);
6387 __ movups(XmmRegister(reg), Address(ESP, 0));
6388 __ addl(ESP, Immediate(extra_slot));
6389 }
6390
ExchangeMemory(int mem1,int mem2,int number_of_words)6391 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
6392 ScratchRegisterScope ensure_scratch1(
6393 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6394
6395 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
6396 ScratchRegisterScope ensure_scratch2(
6397 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6398
6399 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
6400 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
6401
6402 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6403 for (int i = 0; i < number_of_words; i++) {
6404 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
6405 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
6406 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
6407 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
6408 stack_offset += kX86WordSize;
6409 }
6410 }
6411
EmitSwap(size_t index)6412 void ParallelMoveResolverX86::EmitSwap(size_t index) {
6413 MoveOperands* move = moves_[index];
6414 Location source = move->GetSource();
6415 Location destination = move->GetDestination();
6416
6417 if (source.IsRegister() && destination.IsRegister()) {
6418 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
6419 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
6420 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6421 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
6422 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6423 } else if (source.IsRegister() && destination.IsStackSlot()) {
6424 Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
6425 } else if (source.IsStackSlot() && destination.IsRegister()) {
6426 Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
6427 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6428 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6429 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6430 // Use XOR Swap algorithm to avoid a temporary.
6431 DCHECK_NE(source.reg(), destination.reg());
6432 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6433 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6434 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6435 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6436 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6437 } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
6438 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6439 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6440 // Take advantage of the 16 bytes in the XMM register.
6441 XmmRegister reg = source.AsFpuRegister<XmmRegister>();
6442 Address stack(ESP, destination.GetStackIndex());
6443 // Load the double into the high doubleword.
6444 __ movhpd(reg, stack);
6445
6446 // Store the low double into the destination.
6447 __ movsd(stack, reg);
6448
6449 // Move the high double to the low double.
6450 __ psrldq(reg, Immediate(8));
6451 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
6452 // Take advantage of the 16 bytes in the XMM register.
6453 XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
6454 Address stack(ESP, source.GetStackIndex());
6455 // Load the double into the high doubleword.
6456 __ movhpd(reg, stack);
6457
6458 // Store the low double into the destination.
6459 __ movsd(stack, reg);
6460
6461 // Move the high double to the low double.
6462 __ psrldq(reg, Immediate(8));
6463 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
6464 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6465 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6466 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6467 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6468 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6469 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6470 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6471 } else {
6472 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
6473 }
6474 }
6475
SpillScratch(int reg)6476 void ParallelMoveResolverX86::SpillScratch(int reg) {
6477 __ pushl(static_cast<Register>(reg));
6478 }
6479
RestoreScratch(int reg)6480 void ParallelMoveResolverX86::RestoreScratch(int reg) {
6481 __ popl(static_cast<Register>(reg));
6482 }
6483
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6484 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
6485 HLoadClass::LoadKind desired_class_load_kind) {
6486 switch (desired_class_load_kind) {
6487 case HLoadClass::LoadKind::kInvalid:
6488 LOG(FATAL) << "UNREACHABLE";
6489 UNREACHABLE();
6490 case HLoadClass::LoadKind::kReferrersClass:
6491 break;
6492 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6493 case HLoadClass::LoadKind::kBootImageRelRo:
6494 case HLoadClass::LoadKind::kBssEntry:
6495 DCHECK(!Runtime::Current()->UseJitCompilation());
6496 break;
6497 case HLoadClass::LoadKind::kJitBootImageAddress:
6498 case HLoadClass::LoadKind::kJitTableAddress:
6499 DCHECK(Runtime::Current()->UseJitCompilation());
6500 break;
6501 case HLoadClass::LoadKind::kRuntimeCall:
6502 break;
6503 }
6504 return desired_class_load_kind;
6505 }
6506
VisitLoadClass(HLoadClass * cls)6507 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
6508 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6509 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6510 InvokeRuntimeCallingConvention calling_convention;
6511 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6512 cls,
6513 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
6514 Location::RegisterLocation(EAX));
6515 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
6516 return;
6517 }
6518 DCHECK(!cls->NeedsAccessCheck());
6519
6520 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6521 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6522 ? LocationSummary::kCallOnSlowPath
6523 : LocationSummary::kNoCall;
6524 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6525 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6526 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6527 }
6528
6529 if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
6530 load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
6531 load_kind == HLoadClass::LoadKind::kBootImageRelRo ||
6532 load_kind == HLoadClass::LoadKind::kBssEntry) {
6533 locations->SetInAt(0, Location::RequiresRegister());
6534 }
6535 locations->SetOut(Location::RequiresRegister());
6536 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6537 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6538 // Rely on the type resolution and/or initialization to save everything.
6539 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6540 } else {
6541 // For non-Baker read barrier we have a temp-clobbering call.
6542 }
6543 }
6544 }
6545
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6546 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
6547 dex::TypeIndex type_index,
6548 Handle<mirror::Class> handle) {
6549 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6550 // Add a patch entry and return the label.
6551 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6552 PatchInfo<Label>* info = &jit_class_patches_.back();
6553 return &info->label;
6554 }
6555
6556 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6557 // move.
VisitLoadClass(HLoadClass * cls)6558 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6559 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6560 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6561 codegen_->GenerateLoadClassRuntimeCall(cls);
6562 return;
6563 }
6564 DCHECK(!cls->NeedsAccessCheck());
6565
6566 LocationSummary* locations = cls->GetLocations();
6567 Location out_loc = locations->Out();
6568 Register out = out_loc.AsRegister<Register>();
6569
6570 bool generate_null_check = false;
6571 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6572 ? kWithoutReadBarrier
6573 : kCompilerReadBarrierOption;
6574 switch (load_kind) {
6575 case HLoadClass::LoadKind::kReferrersClass: {
6576 DCHECK(!cls->CanCallRuntime());
6577 DCHECK(!cls->MustGenerateClinitCheck());
6578 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6579 Register current_method = locations->InAt(0).AsRegister<Register>();
6580 GenerateGcRootFieldLoad(
6581 cls,
6582 out_loc,
6583 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6584 /* fixup_label= */ nullptr,
6585 read_barrier_option);
6586 break;
6587 }
6588 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
6589 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
6590 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6591 Register method_address = locations->InAt(0).AsRegister<Register>();
6592 __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6593 codegen_->RecordBootImageTypePatch(cls);
6594 break;
6595 }
6596 case HLoadClass::LoadKind::kBootImageRelRo: {
6597 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6598 Register method_address = locations->InAt(0).AsRegister<Register>();
6599 __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6600 codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
6601 codegen_->GetBootImageOffset(cls));
6602 break;
6603 }
6604 case HLoadClass::LoadKind::kBssEntry: {
6605 Register method_address = locations->InAt(0).AsRegister<Register>();
6606 Address address(method_address, CodeGeneratorX86::kDummy32BitOffset);
6607 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6608 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6609 generate_null_check = true;
6610 break;
6611 }
6612 case HLoadClass::LoadKind::kJitBootImageAddress: {
6613 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6614 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6615 DCHECK_NE(address, 0u);
6616 __ movl(out, Immediate(address));
6617 break;
6618 }
6619 case HLoadClass::LoadKind::kJitTableAddress: {
6620 Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
6621 Label* fixup_label = codegen_->NewJitRootClassPatch(
6622 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6623 // /* GcRoot<mirror::Class> */ out = *address
6624 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6625 break;
6626 }
6627 case HLoadClass::LoadKind::kRuntimeCall:
6628 case HLoadClass::LoadKind::kInvalid:
6629 LOG(FATAL) << "UNREACHABLE";
6630 UNREACHABLE();
6631 }
6632
6633 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6634 DCHECK(cls->CanCallRuntime());
6635 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
6636 codegen_->AddSlowPath(slow_path);
6637
6638 if (generate_null_check) {
6639 __ testl(out, out);
6640 __ j(kEqual, slow_path->GetEntryLabel());
6641 }
6642
6643 if (cls->MustGenerateClinitCheck()) {
6644 GenerateClassInitializationCheck(slow_path, out);
6645 } else {
6646 __ Bind(slow_path->GetExitLabel());
6647 }
6648 }
6649 }
6650
VisitLoadMethodHandle(HLoadMethodHandle * load)6651 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6652 InvokeRuntimeCallingConvention calling_convention;
6653 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
6654 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6655 }
6656
VisitLoadMethodHandle(HLoadMethodHandle * load)6657 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6658 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6659 }
6660
VisitLoadMethodType(HLoadMethodType * load)6661 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
6662 InvokeRuntimeCallingConvention calling_convention;
6663 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
6664 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6665 }
6666
VisitLoadMethodType(HLoadMethodType * load)6667 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
6668 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6669 }
6670
VisitClinitCheck(HClinitCheck * check)6671 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
6672 LocationSummary* locations =
6673 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6674 locations->SetInAt(0, Location::RequiresRegister());
6675 if (check->HasUses()) {
6676 locations->SetOut(Location::SameAsFirstInput());
6677 }
6678 // Rely on the type initialization to save everything we need.
6679 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6680 }
6681
VisitClinitCheck(HClinitCheck * check)6682 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
6683 // We assume the class to not be null.
6684 SlowPathCode* slow_path =
6685 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
6686 codegen_->AddSlowPath(slow_path);
6687 GenerateClassInitializationCheck(slow_path,
6688 check->GetLocations()->InAt(0).AsRegister<Register>());
6689 }
6690
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)6691 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
6692 SlowPathCode* slow_path, Register class_reg) {
6693 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
6694 const size_t status_byte_offset =
6695 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
6696 constexpr uint32_t shifted_initialized_value =
6697 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
6698
6699 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value));
6700 __ j(kBelow, slow_path->GetEntryLabel());
6701 __ Bind(slow_path->GetExitLabel());
6702 // No need for memory fence, thanks to the X86 memory model.
6703 }
6704
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)6705 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6706 Register temp) {
6707 uint32_t path_to_root = check->GetBitstringPathToRoot();
6708 uint32_t mask = check->GetBitstringMask();
6709 DCHECK(IsPowerOfTwo(mask + 1));
6710 size_t mask_bits = WhichPowerOf2(mask + 1);
6711
6712 if (mask_bits == 16u) {
6713 // Compare the bitstring in memory.
6714 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6715 } else {
6716 // /* uint32_t */ temp = temp->status_
6717 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6718 // Compare the bitstring bits using SUB.
6719 __ subl(temp, Immediate(path_to_root));
6720 // Shift out bits that do not contribute to the comparison.
6721 __ shll(temp, Immediate(32u - mask_bits));
6722 }
6723 }
6724
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6725 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
6726 HLoadString::LoadKind desired_string_load_kind) {
6727 switch (desired_string_load_kind) {
6728 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6729 case HLoadString::LoadKind::kBootImageRelRo:
6730 case HLoadString::LoadKind::kBssEntry:
6731 DCHECK(!Runtime::Current()->UseJitCompilation());
6732 break;
6733 case HLoadString::LoadKind::kJitBootImageAddress:
6734 case HLoadString::LoadKind::kJitTableAddress:
6735 DCHECK(Runtime::Current()->UseJitCompilation());
6736 break;
6737 case HLoadString::LoadKind::kRuntimeCall:
6738 break;
6739 }
6740 return desired_string_load_kind;
6741 }
6742
VisitLoadString(HLoadString * load)6743 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
6744 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6745 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6746 HLoadString::LoadKind load_kind = load->GetLoadKind();
6747 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
6748 load_kind == HLoadString::LoadKind::kBootImageRelRo ||
6749 load_kind == HLoadString::LoadKind::kBssEntry) {
6750 locations->SetInAt(0, Location::RequiresRegister());
6751 }
6752 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
6753 locations->SetOut(Location::RegisterLocation(EAX));
6754 } else {
6755 locations->SetOut(Location::RequiresRegister());
6756 if (load_kind == HLoadString::LoadKind::kBssEntry) {
6757 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6758 // Rely on the pResolveString to save everything.
6759 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6760 } else {
6761 // For non-Baker read barrier we have a temp-clobbering call.
6762 }
6763 }
6764 }
6765 }
6766
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6767 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
6768 dex::StringIndex string_index,
6769 Handle<mirror::String> handle) {
6770 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6771 // Add a patch entry and return the label.
6772 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6773 PatchInfo<Label>* info = &jit_string_patches_.back();
6774 return &info->label;
6775 }
6776
6777 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6778 // move.
VisitLoadString(HLoadString * load)6779 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6780 LocationSummary* locations = load->GetLocations();
6781 Location out_loc = locations->Out();
6782 Register out = out_loc.AsRegister<Register>();
6783
6784 switch (load->GetLoadKind()) {
6785 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6786 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
6787 Register method_address = locations->InAt(0).AsRegister<Register>();
6788 __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6789 codegen_->RecordBootImageStringPatch(load);
6790 return;
6791 }
6792 case HLoadString::LoadKind::kBootImageRelRo: {
6793 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6794 Register method_address = locations->InAt(0).AsRegister<Register>();
6795 __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6796 codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
6797 codegen_->GetBootImageOffset(load));
6798 return;
6799 }
6800 case HLoadString::LoadKind::kBssEntry: {
6801 Register method_address = locations->InAt(0).AsRegister<Register>();
6802 Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset);
6803 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6804 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
6805 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6806 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
6807 codegen_->AddSlowPath(slow_path);
6808 __ testl(out, out);
6809 __ j(kEqual, slow_path->GetEntryLabel());
6810 __ Bind(slow_path->GetExitLabel());
6811 return;
6812 }
6813 case HLoadString::LoadKind::kJitBootImageAddress: {
6814 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6815 DCHECK_NE(address, 0u);
6816 __ movl(out, Immediate(address));
6817 return;
6818 }
6819 case HLoadString::LoadKind::kJitTableAddress: {
6820 Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
6821 Label* fixup_label = codegen_->NewJitRootStringPatch(
6822 load->GetDexFile(), load->GetStringIndex(), load->GetString());
6823 // /* GcRoot<mirror::String> */ out = *address
6824 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6825 return;
6826 }
6827 default:
6828 break;
6829 }
6830
6831 // TODO: Re-add the compiler code to do string dex cache lookup again.
6832 InvokeRuntimeCallingConvention calling_convention;
6833 DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
6834 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
6835 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
6836 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6837 }
6838
GetExceptionTlsAddress()6839 static Address GetExceptionTlsAddress() {
6840 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
6841 }
6842
VisitLoadException(HLoadException * load)6843 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
6844 LocationSummary* locations =
6845 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6846 locations->SetOut(Location::RequiresRegister());
6847 }
6848
VisitLoadException(HLoadException * load)6849 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
6850 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
6851 }
6852
VisitClearException(HClearException * clear)6853 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
6854 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6855 }
6856
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6857 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6858 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
6859 }
6860
VisitThrow(HThrow * instruction)6861 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
6862 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6863 instruction, LocationSummary::kCallOnMainOnly);
6864 InvokeRuntimeCallingConvention calling_convention;
6865 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6866 }
6867
VisitThrow(HThrow * instruction)6868 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
6869 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6870 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6871 }
6872
6873 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6874 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6875 if (kEmitCompilerReadBarrier &&
6876 !kUseBakerReadBarrier &&
6877 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6878 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6879 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6880 return 1;
6881 }
6882 return 0;
6883 }
6884
6885 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
6886 // interface pointer, the current interface is compared in memory.
6887 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6888 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6889 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6890 return 2;
6891 }
6892 return 1 + NumberOfInstanceOfTemps(type_check_kind);
6893 }
6894
VisitInstanceOf(HInstanceOf * instruction)6895 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
6896 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6897 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6898 bool baker_read_barrier_slow_path = false;
6899 switch (type_check_kind) {
6900 case TypeCheckKind::kExactCheck:
6901 case TypeCheckKind::kAbstractClassCheck:
6902 case TypeCheckKind::kClassHierarchyCheck:
6903 case TypeCheckKind::kArrayObjectCheck: {
6904 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
6905 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6906 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
6907 break;
6908 }
6909 case TypeCheckKind::kArrayCheck:
6910 case TypeCheckKind::kUnresolvedCheck:
6911 case TypeCheckKind::kInterfaceCheck:
6912 call_kind = LocationSummary::kCallOnSlowPath;
6913 break;
6914 case TypeCheckKind::kBitstringCheck:
6915 break;
6916 }
6917
6918 LocationSummary* locations =
6919 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6920 if (baker_read_barrier_slow_path) {
6921 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6922 }
6923 locations->SetInAt(0, Location::RequiresRegister());
6924 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6925 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6926 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6927 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6928 } else {
6929 locations->SetInAt(1, Location::Any());
6930 }
6931 // Note that TypeCheckSlowPathX86 uses this "out" register too.
6932 locations->SetOut(Location::RequiresRegister());
6933 // When read barriers are enabled, we need a temporary register for some cases.
6934 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6935 }
6936
VisitInstanceOf(HInstanceOf * instruction)6937 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
6938 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6939 LocationSummary* locations = instruction->GetLocations();
6940 Location obj_loc = locations->InAt(0);
6941 Register obj = obj_loc.AsRegister<Register>();
6942 Location cls = locations->InAt(1);
6943 Location out_loc = locations->Out();
6944 Register out = out_loc.AsRegister<Register>();
6945 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6946 DCHECK_LE(num_temps, 1u);
6947 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
6948 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6949 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6950 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6951 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6952 SlowPathCode* slow_path = nullptr;
6953 NearLabel done, zero;
6954
6955 // Return 0 if `obj` is null.
6956 // Avoid null check if we know obj is not null.
6957 if (instruction->MustDoNullCheck()) {
6958 __ testl(obj, obj);
6959 __ j(kEqual, &zero);
6960 }
6961
6962 switch (type_check_kind) {
6963 case TypeCheckKind::kExactCheck: {
6964 ReadBarrierOption read_barrier_option =
6965 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6966 // /* HeapReference<Class> */ out = obj->klass_
6967 GenerateReferenceLoadTwoRegisters(instruction,
6968 out_loc,
6969 obj_loc,
6970 class_offset,
6971 read_barrier_option);
6972 if (cls.IsRegister()) {
6973 __ cmpl(out, cls.AsRegister<Register>());
6974 } else {
6975 DCHECK(cls.IsStackSlot()) << cls;
6976 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6977 }
6978
6979 // Classes must be equal for the instanceof to succeed.
6980 __ j(kNotEqual, &zero);
6981 __ movl(out, Immediate(1));
6982 __ jmp(&done);
6983 break;
6984 }
6985
6986 case TypeCheckKind::kAbstractClassCheck: {
6987 ReadBarrierOption read_barrier_option =
6988 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6989 // /* HeapReference<Class> */ out = obj->klass_
6990 GenerateReferenceLoadTwoRegisters(instruction,
6991 out_loc,
6992 obj_loc,
6993 class_offset,
6994 read_barrier_option);
6995 // If the class is abstract, we eagerly fetch the super class of the
6996 // object to avoid doing a comparison we know will fail.
6997 NearLabel loop;
6998 __ Bind(&loop);
6999 // /* HeapReference<Class> */ out = out->super_class_
7000 GenerateReferenceLoadOneRegister(instruction,
7001 out_loc,
7002 super_offset,
7003 maybe_temp_loc,
7004 read_barrier_option);
7005 __ testl(out, out);
7006 // If `out` is null, we use it for the result, and jump to `done`.
7007 __ j(kEqual, &done);
7008 if (cls.IsRegister()) {
7009 __ cmpl(out, cls.AsRegister<Register>());
7010 } else {
7011 DCHECK(cls.IsStackSlot()) << cls;
7012 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7013 }
7014 __ j(kNotEqual, &loop);
7015 __ movl(out, Immediate(1));
7016 if (zero.IsLinked()) {
7017 __ jmp(&done);
7018 }
7019 break;
7020 }
7021
7022 case TypeCheckKind::kClassHierarchyCheck: {
7023 ReadBarrierOption read_barrier_option =
7024 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7025 // /* HeapReference<Class> */ out = obj->klass_
7026 GenerateReferenceLoadTwoRegisters(instruction,
7027 out_loc,
7028 obj_loc,
7029 class_offset,
7030 read_barrier_option);
7031 // Walk over the class hierarchy to find a match.
7032 NearLabel loop, success;
7033 __ Bind(&loop);
7034 if (cls.IsRegister()) {
7035 __ cmpl(out, cls.AsRegister<Register>());
7036 } else {
7037 DCHECK(cls.IsStackSlot()) << cls;
7038 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7039 }
7040 __ j(kEqual, &success);
7041 // /* HeapReference<Class> */ out = out->super_class_
7042 GenerateReferenceLoadOneRegister(instruction,
7043 out_loc,
7044 super_offset,
7045 maybe_temp_loc,
7046 read_barrier_option);
7047 __ testl(out, out);
7048 __ j(kNotEqual, &loop);
7049 // If `out` is null, we use it for the result, and jump to `done`.
7050 __ jmp(&done);
7051 __ Bind(&success);
7052 __ movl(out, Immediate(1));
7053 if (zero.IsLinked()) {
7054 __ jmp(&done);
7055 }
7056 break;
7057 }
7058
7059 case TypeCheckKind::kArrayObjectCheck: {
7060 ReadBarrierOption read_barrier_option =
7061 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7062 // /* HeapReference<Class> */ out = obj->klass_
7063 GenerateReferenceLoadTwoRegisters(instruction,
7064 out_loc,
7065 obj_loc,
7066 class_offset,
7067 read_barrier_option);
7068 // Do an exact check.
7069 NearLabel exact_check;
7070 if (cls.IsRegister()) {
7071 __ cmpl(out, cls.AsRegister<Register>());
7072 } else {
7073 DCHECK(cls.IsStackSlot()) << cls;
7074 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7075 }
7076 __ j(kEqual, &exact_check);
7077 // Otherwise, we need to check that the object's class is a non-primitive array.
7078 // /* HeapReference<Class> */ out = out->component_type_
7079 GenerateReferenceLoadOneRegister(instruction,
7080 out_loc,
7081 component_offset,
7082 maybe_temp_loc,
7083 read_barrier_option);
7084 __ testl(out, out);
7085 // If `out` is null, we use it for the result, and jump to `done`.
7086 __ j(kEqual, &done);
7087 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7088 __ j(kNotEqual, &zero);
7089 __ Bind(&exact_check);
7090 __ movl(out, Immediate(1));
7091 __ jmp(&done);
7092 break;
7093 }
7094
7095 case TypeCheckKind::kArrayCheck: {
7096 // No read barrier since the slow path will retry upon failure.
7097 // /* HeapReference<Class> */ out = obj->klass_
7098 GenerateReferenceLoadTwoRegisters(instruction,
7099 out_loc,
7100 obj_loc,
7101 class_offset,
7102 kWithoutReadBarrier);
7103 if (cls.IsRegister()) {
7104 __ cmpl(out, cls.AsRegister<Register>());
7105 } else {
7106 DCHECK(cls.IsStackSlot()) << cls;
7107 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7108 }
7109 DCHECK(locations->OnlyCallsOnSlowPath());
7110 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7111 instruction, /* is_fatal= */ false);
7112 codegen_->AddSlowPath(slow_path);
7113 __ j(kNotEqual, slow_path->GetEntryLabel());
7114 __ movl(out, Immediate(1));
7115 if (zero.IsLinked()) {
7116 __ jmp(&done);
7117 }
7118 break;
7119 }
7120
7121 case TypeCheckKind::kUnresolvedCheck:
7122 case TypeCheckKind::kInterfaceCheck: {
7123 // Note that we indeed only call on slow path, but we always go
7124 // into the slow path for the unresolved and interface check
7125 // cases.
7126 //
7127 // We cannot directly call the InstanceofNonTrivial runtime
7128 // entry point without resorting to a type checking slow path
7129 // here (i.e. by calling InvokeRuntime directly), as it would
7130 // require to assign fixed registers for the inputs of this
7131 // HInstanceOf instruction (following the runtime calling
7132 // convention), which might be cluttered by the potential first
7133 // read barrier emission at the beginning of this method.
7134 //
7135 // TODO: Introduce a new runtime entry point taking the object
7136 // to test (instead of its class) as argument, and let it deal
7137 // with the read barrier issues. This will let us refactor this
7138 // case of the `switch` code as it was previously (with a direct
7139 // call to the runtime not using a type checking slow path).
7140 // This should also be beneficial for the other cases above.
7141 DCHECK(locations->OnlyCallsOnSlowPath());
7142 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7143 instruction, /* is_fatal= */ false);
7144 codegen_->AddSlowPath(slow_path);
7145 __ jmp(slow_path->GetEntryLabel());
7146 if (zero.IsLinked()) {
7147 __ jmp(&done);
7148 }
7149 break;
7150 }
7151
7152 case TypeCheckKind::kBitstringCheck: {
7153 // /* HeapReference<Class> */ temp = obj->klass_
7154 GenerateReferenceLoadTwoRegisters(instruction,
7155 out_loc,
7156 obj_loc,
7157 class_offset,
7158 kWithoutReadBarrier);
7159
7160 GenerateBitstringTypeCheckCompare(instruction, out);
7161 __ j(kNotEqual, &zero);
7162 __ movl(out, Immediate(1));
7163 __ jmp(&done);
7164 break;
7165 }
7166 }
7167
7168 if (zero.IsLinked()) {
7169 __ Bind(&zero);
7170 __ xorl(out, out);
7171 }
7172
7173 if (done.IsLinked()) {
7174 __ Bind(&done);
7175 }
7176
7177 if (slow_path != nullptr) {
7178 __ Bind(slow_path->GetExitLabel());
7179 }
7180 }
7181
VisitCheckCast(HCheckCast * instruction)7182 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
7183 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7184 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
7185 LocationSummary* locations =
7186 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7187 locations->SetInAt(0, Location::RequiresRegister());
7188 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7189 // Require a register for the interface check since there is a loop that compares the class to
7190 // a memory address.
7191 locations->SetInAt(1, Location::RequiresRegister());
7192 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7193 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7194 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7195 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7196 } else {
7197 locations->SetInAt(1, Location::Any());
7198 }
7199 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7200 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7201 }
7202
VisitCheckCast(HCheckCast * instruction)7203 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
7204 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7205 LocationSummary* locations = instruction->GetLocations();
7206 Location obj_loc = locations->InAt(0);
7207 Register obj = obj_loc.AsRegister<Register>();
7208 Location cls = locations->InAt(1);
7209 Location temp_loc = locations->GetTemp(0);
7210 Register temp = temp_loc.AsRegister<Register>();
7211 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7212 DCHECK_GE(num_temps, 1u);
7213 DCHECK_LE(num_temps, 2u);
7214 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
7215 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7216 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7217 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7218 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7219 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7220 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7221 const uint32_t object_array_data_offset =
7222 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7223
7224 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7225 SlowPathCode* type_check_slow_path =
7226 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7227 instruction, is_type_check_slow_path_fatal);
7228 codegen_->AddSlowPath(type_check_slow_path);
7229
7230 NearLabel done;
7231 // Avoid null check if we know obj is not null.
7232 if (instruction->MustDoNullCheck()) {
7233 __ testl(obj, obj);
7234 __ j(kEqual, &done);
7235 }
7236
7237 switch (type_check_kind) {
7238 case TypeCheckKind::kExactCheck:
7239 case TypeCheckKind::kArrayCheck: {
7240 // /* HeapReference<Class> */ temp = obj->klass_
7241 GenerateReferenceLoadTwoRegisters(instruction,
7242 temp_loc,
7243 obj_loc,
7244 class_offset,
7245 kWithoutReadBarrier);
7246
7247 if (cls.IsRegister()) {
7248 __ cmpl(temp, cls.AsRegister<Register>());
7249 } else {
7250 DCHECK(cls.IsStackSlot()) << cls;
7251 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7252 }
7253 // Jump to slow path for throwing the exception or doing a
7254 // more involved array check.
7255 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7256 break;
7257 }
7258
7259 case TypeCheckKind::kAbstractClassCheck: {
7260 // /* HeapReference<Class> */ temp = obj->klass_
7261 GenerateReferenceLoadTwoRegisters(instruction,
7262 temp_loc,
7263 obj_loc,
7264 class_offset,
7265 kWithoutReadBarrier);
7266
7267 // If the class is abstract, we eagerly fetch the super class of the
7268 // object to avoid doing a comparison we know will fail.
7269 NearLabel loop;
7270 __ Bind(&loop);
7271 // /* HeapReference<Class> */ temp = temp->super_class_
7272 GenerateReferenceLoadOneRegister(instruction,
7273 temp_loc,
7274 super_offset,
7275 maybe_temp2_loc,
7276 kWithoutReadBarrier);
7277
7278 // If the class reference currently in `temp` is null, jump to the slow path to throw the
7279 // exception.
7280 __ testl(temp, temp);
7281 __ j(kZero, type_check_slow_path->GetEntryLabel());
7282
7283 // Otherwise, compare the classes
7284 if (cls.IsRegister()) {
7285 __ cmpl(temp, cls.AsRegister<Register>());
7286 } else {
7287 DCHECK(cls.IsStackSlot()) << cls;
7288 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7289 }
7290 __ j(kNotEqual, &loop);
7291 break;
7292 }
7293
7294 case TypeCheckKind::kClassHierarchyCheck: {
7295 // /* HeapReference<Class> */ temp = obj->klass_
7296 GenerateReferenceLoadTwoRegisters(instruction,
7297 temp_loc,
7298 obj_loc,
7299 class_offset,
7300 kWithoutReadBarrier);
7301
7302 // Walk over the class hierarchy to find a match.
7303 NearLabel loop;
7304 __ Bind(&loop);
7305 if (cls.IsRegister()) {
7306 __ cmpl(temp, cls.AsRegister<Register>());
7307 } else {
7308 DCHECK(cls.IsStackSlot()) << cls;
7309 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7310 }
7311 __ j(kEqual, &done);
7312
7313 // /* HeapReference<Class> */ temp = temp->super_class_
7314 GenerateReferenceLoadOneRegister(instruction,
7315 temp_loc,
7316 super_offset,
7317 maybe_temp2_loc,
7318 kWithoutReadBarrier);
7319
7320 // If the class reference currently in `temp` is not null, jump
7321 // back at the beginning of the loop.
7322 __ testl(temp, temp);
7323 __ j(kNotZero, &loop);
7324 // Otherwise, jump to the slow path to throw the exception.;
7325 __ jmp(type_check_slow_path->GetEntryLabel());
7326 break;
7327 }
7328
7329 case TypeCheckKind::kArrayObjectCheck: {
7330 // /* HeapReference<Class> */ temp = obj->klass_
7331 GenerateReferenceLoadTwoRegisters(instruction,
7332 temp_loc,
7333 obj_loc,
7334 class_offset,
7335 kWithoutReadBarrier);
7336
7337 // Do an exact check.
7338 if (cls.IsRegister()) {
7339 __ cmpl(temp, cls.AsRegister<Register>());
7340 } else {
7341 DCHECK(cls.IsStackSlot()) << cls;
7342 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7343 }
7344 __ j(kEqual, &done);
7345
7346 // Otherwise, we need to check that the object's class is a non-primitive array.
7347 // /* HeapReference<Class> */ temp = temp->component_type_
7348 GenerateReferenceLoadOneRegister(instruction,
7349 temp_loc,
7350 component_offset,
7351 maybe_temp2_loc,
7352 kWithoutReadBarrier);
7353
7354 // If the component type is null (i.e. the object not an array), jump to the slow path to
7355 // throw the exception. Otherwise proceed with the check.
7356 __ testl(temp, temp);
7357 __ j(kZero, type_check_slow_path->GetEntryLabel());
7358
7359 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7360 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7361 break;
7362 }
7363
7364 case TypeCheckKind::kUnresolvedCheck:
7365 // We always go into the type check slow path for the unresolved check case.
7366 // We cannot directly call the CheckCast runtime entry point
7367 // without resorting to a type checking slow path here (i.e. by
7368 // calling InvokeRuntime directly), as it would require to
7369 // assign fixed registers for the inputs of this HInstanceOf
7370 // instruction (following the runtime calling convention), which
7371 // might be cluttered by the potential first read barrier
7372 // emission at the beginning of this method.
7373 __ jmp(type_check_slow_path->GetEntryLabel());
7374 break;
7375
7376 case TypeCheckKind::kInterfaceCheck: {
7377 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7378 // We can not get false positives by doing this.
7379 // /* HeapReference<Class> */ temp = obj->klass_
7380 GenerateReferenceLoadTwoRegisters(instruction,
7381 temp_loc,
7382 obj_loc,
7383 class_offset,
7384 kWithoutReadBarrier);
7385
7386 // /* HeapReference<Class> */ temp = temp->iftable_
7387 GenerateReferenceLoadTwoRegisters(instruction,
7388 temp_loc,
7389 temp_loc,
7390 iftable_offset,
7391 kWithoutReadBarrier);
7392 // Iftable is never null.
7393 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
7394 // Maybe poison the `cls` for direct comparison with memory.
7395 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
7396 // Loop through the iftable and check if any class matches.
7397 NearLabel start_loop;
7398 __ Bind(&start_loop);
7399 // Need to subtract first to handle the empty array case.
7400 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
7401 __ j(kNegative, type_check_slow_path->GetEntryLabel());
7402 // Go to next interface if the classes do not match.
7403 __ cmpl(cls.AsRegister<Register>(),
7404 CodeGeneratorX86::ArrayAddress(temp,
7405 maybe_temp2_loc,
7406 TIMES_4,
7407 object_array_data_offset));
7408 __ j(kNotEqual, &start_loop);
7409 // If `cls` was poisoned above, unpoison it.
7410 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
7411 break;
7412 }
7413
7414 case TypeCheckKind::kBitstringCheck: {
7415 // /* HeapReference<Class> */ temp = obj->klass_
7416 GenerateReferenceLoadTwoRegisters(instruction,
7417 temp_loc,
7418 obj_loc,
7419 class_offset,
7420 kWithoutReadBarrier);
7421
7422 GenerateBitstringTypeCheckCompare(instruction, temp);
7423 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7424 break;
7425 }
7426 }
7427 __ Bind(&done);
7428
7429 __ Bind(type_check_slow_path->GetExitLabel());
7430 }
7431
VisitMonitorOperation(HMonitorOperation * instruction)7432 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
7433 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7434 instruction, LocationSummary::kCallOnMainOnly);
7435 InvokeRuntimeCallingConvention calling_convention;
7436 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7437 }
7438
VisitMonitorOperation(HMonitorOperation * instruction)7439 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
7440 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
7441 : kQuickUnlockObject,
7442 instruction,
7443 instruction->GetDexPc());
7444 if (instruction->IsEnter()) {
7445 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7446 } else {
7447 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7448 }
7449 }
7450
VisitX86AndNot(HX86AndNot * instruction)7451 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
7452 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7453 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7454 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7455 locations->SetInAt(0, Location::RequiresRegister());
7456 locations->SetInAt(1, Location::RequiresRegister());
7457 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7458 }
7459
VisitX86AndNot(HX86AndNot * instruction)7460 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
7461 LocationSummary* locations = instruction->GetLocations();
7462 Location first = locations->InAt(0);
7463 Location second = locations->InAt(1);
7464 Location dest = locations->Out();
7465 if (instruction->GetResultType() == DataType::Type::kInt32) {
7466 __ andn(dest.AsRegister<Register>(),
7467 first.AsRegister<Register>(),
7468 second.AsRegister<Register>());
7469 } else {
7470 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7471 __ andn(dest.AsRegisterPairLow<Register>(),
7472 first.AsRegisterPairLow<Register>(),
7473 second.AsRegisterPairLow<Register>());
7474 __ andn(dest.AsRegisterPairHigh<Register>(),
7475 first.AsRegisterPairHigh<Register>(),
7476 second.AsRegisterPairHigh<Register>());
7477 }
7478 }
7479
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7480 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7481 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7482 DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
7483 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7484 locations->SetInAt(0, Location::RequiresRegister());
7485 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7486 }
7487
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7488 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
7489 HX86MaskOrResetLeastSetBit* instruction) {
7490 LocationSummary* locations = instruction->GetLocations();
7491 Location src = locations->InAt(0);
7492 Location dest = locations->Out();
7493 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
7494 switch (instruction->GetOpKind()) {
7495 case HInstruction::kAnd:
7496 __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
7497 break;
7498 case HInstruction::kXor:
7499 __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
7500 break;
7501 default:
7502 LOG(FATAL) << "Unreachable";
7503 }
7504 }
7505
VisitAnd(HAnd * instruction)7506 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7507 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7508 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7509
HandleBitwiseOperation(HBinaryOperation * instruction)7510 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
7511 LocationSummary* locations =
7512 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7513 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7514 || instruction->GetResultType() == DataType::Type::kInt64);
7515 locations->SetInAt(0, Location::RequiresRegister());
7516 locations->SetInAt(1, Location::Any());
7517 locations->SetOut(Location::SameAsFirstInput());
7518 }
7519
VisitAnd(HAnd * instruction)7520 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
7521 HandleBitwiseOperation(instruction);
7522 }
7523
VisitOr(HOr * instruction)7524 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
7525 HandleBitwiseOperation(instruction);
7526 }
7527
VisitXor(HXor * instruction)7528 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
7529 HandleBitwiseOperation(instruction);
7530 }
7531
HandleBitwiseOperation(HBinaryOperation * instruction)7532 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
7533 LocationSummary* locations = instruction->GetLocations();
7534 Location first = locations->InAt(0);
7535 Location second = locations->InAt(1);
7536 DCHECK(first.Equals(locations->Out()));
7537
7538 if (instruction->GetResultType() == DataType::Type::kInt32) {
7539 if (second.IsRegister()) {
7540 if (instruction->IsAnd()) {
7541 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
7542 } else if (instruction->IsOr()) {
7543 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
7544 } else {
7545 DCHECK(instruction->IsXor());
7546 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
7547 }
7548 } else if (second.IsConstant()) {
7549 if (instruction->IsAnd()) {
7550 __ andl(first.AsRegister<Register>(),
7551 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7552 } else if (instruction->IsOr()) {
7553 __ orl(first.AsRegister<Register>(),
7554 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7555 } else {
7556 DCHECK(instruction->IsXor());
7557 __ xorl(first.AsRegister<Register>(),
7558 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7559 }
7560 } else {
7561 if (instruction->IsAnd()) {
7562 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7563 } else if (instruction->IsOr()) {
7564 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7565 } else {
7566 DCHECK(instruction->IsXor());
7567 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7568 }
7569 }
7570 } else {
7571 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7572 if (second.IsRegisterPair()) {
7573 if (instruction->IsAnd()) {
7574 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7575 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7576 } else if (instruction->IsOr()) {
7577 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7578 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7579 } else {
7580 DCHECK(instruction->IsXor());
7581 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7582 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7583 }
7584 } else if (second.IsDoubleStackSlot()) {
7585 if (instruction->IsAnd()) {
7586 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7587 __ andl(first.AsRegisterPairHigh<Register>(),
7588 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7589 } else if (instruction->IsOr()) {
7590 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7591 __ orl(first.AsRegisterPairHigh<Register>(),
7592 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7593 } else {
7594 DCHECK(instruction->IsXor());
7595 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7596 __ xorl(first.AsRegisterPairHigh<Register>(),
7597 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7598 }
7599 } else {
7600 DCHECK(second.IsConstant()) << second;
7601 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
7602 int32_t low_value = Low32Bits(value);
7603 int32_t high_value = High32Bits(value);
7604 Immediate low(low_value);
7605 Immediate high(high_value);
7606 Register first_low = first.AsRegisterPairLow<Register>();
7607 Register first_high = first.AsRegisterPairHigh<Register>();
7608 if (instruction->IsAnd()) {
7609 if (low_value == 0) {
7610 __ xorl(first_low, first_low);
7611 } else if (low_value != -1) {
7612 __ andl(first_low, low);
7613 }
7614 if (high_value == 0) {
7615 __ xorl(first_high, first_high);
7616 } else if (high_value != -1) {
7617 __ andl(first_high, high);
7618 }
7619 } else if (instruction->IsOr()) {
7620 if (low_value != 0) {
7621 __ orl(first_low, low);
7622 }
7623 if (high_value != 0) {
7624 __ orl(first_high, high);
7625 }
7626 } else {
7627 DCHECK(instruction->IsXor());
7628 if (low_value != 0) {
7629 __ xorl(first_low, low);
7630 }
7631 if (high_value != 0) {
7632 __ xorl(first_high, high);
7633 }
7634 }
7635 }
7636 }
7637 }
7638
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7639 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
7640 HInstruction* instruction,
7641 Location out,
7642 uint32_t offset,
7643 Location maybe_temp,
7644 ReadBarrierOption read_barrier_option) {
7645 Register out_reg = out.AsRegister<Register>();
7646 if (read_barrier_option == kWithReadBarrier) {
7647 CHECK(kEmitCompilerReadBarrier);
7648 if (kUseBakerReadBarrier) {
7649 // Load with fast path based Baker's read barrier.
7650 // /* HeapReference<Object> */ out = *(out + offset)
7651 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7652 instruction, out, out_reg, offset, /* needs_null_check= */ false);
7653 } else {
7654 // Load with slow path based read barrier.
7655 // Save the value of `out` into `maybe_temp` before overwriting it
7656 // in the following move operation, as we will need it for the
7657 // read barrier below.
7658 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7659 __ movl(maybe_temp.AsRegister<Register>(), out_reg);
7660 // /* HeapReference<Object> */ out = *(out + offset)
7661 __ movl(out_reg, Address(out_reg, offset));
7662 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7663 }
7664 } else {
7665 // Plain load with no read barrier.
7666 // /* HeapReference<Object> */ out = *(out + offset)
7667 __ movl(out_reg, Address(out_reg, offset));
7668 __ MaybeUnpoisonHeapReference(out_reg);
7669 }
7670 }
7671
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7672 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
7673 HInstruction* instruction,
7674 Location out,
7675 Location obj,
7676 uint32_t offset,
7677 ReadBarrierOption read_barrier_option) {
7678 Register out_reg = out.AsRegister<Register>();
7679 Register obj_reg = obj.AsRegister<Register>();
7680 if (read_barrier_option == kWithReadBarrier) {
7681 CHECK(kEmitCompilerReadBarrier);
7682 if (kUseBakerReadBarrier) {
7683 // Load with fast path based Baker's read barrier.
7684 // /* HeapReference<Object> */ out = *(obj + offset)
7685 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7686 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7687 } else {
7688 // Load with slow path based read barrier.
7689 // /* HeapReference<Object> */ out = *(obj + offset)
7690 __ movl(out_reg, Address(obj_reg, offset));
7691 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7692 }
7693 } else {
7694 // Plain load with no read barrier.
7695 // /* HeapReference<Object> */ out = *(obj + offset)
7696 __ movl(out_reg, Address(obj_reg, offset));
7697 __ MaybeUnpoisonHeapReference(out_reg);
7698 }
7699 }
7700
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7701 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
7702 HInstruction* instruction,
7703 Location root,
7704 const Address& address,
7705 Label* fixup_label,
7706 ReadBarrierOption read_barrier_option) {
7707 Register root_reg = root.AsRegister<Register>();
7708 if (read_barrier_option == kWithReadBarrier) {
7709 DCHECK(kEmitCompilerReadBarrier);
7710 if (kUseBakerReadBarrier) {
7711 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7712 // Baker's read barrier are used:
7713 //
7714 // root = obj.field;
7715 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7716 // if (temp != null) {
7717 // root = temp(root)
7718 // }
7719
7720 // /* GcRoot<mirror::Object> */ root = *address
7721 __ movl(root_reg, address);
7722 if (fixup_label != nullptr) {
7723 __ Bind(fixup_label);
7724 }
7725 static_assert(
7726 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7727 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7728 "have different sizes.");
7729 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7730 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7731 "have different sizes.");
7732
7733 // Slow path marking the GC root `root`.
7734 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
7735 instruction, root, /* unpoison_ref_before_marking= */ false);
7736 codegen_->AddSlowPath(slow_path);
7737
7738 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
7739 const int32_t entry_point_offset =
7740 Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
7741 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
7742 // The entrypoint is null when the GC is not marking.
7743 __ j(kNotEqual, slow_path->GetEntryLabel());
7744 __ Bind(slow_path->GetExitLabel());
7745 } else {
7746 // GC root loaded through a slow path for read barriers other
7747 // than Baker's.
7748 // /* GcRoot<mirror::Object>* */ root = address
7749 __ leal(root_reg, address);
7750 if (fixup_label != nullptr) {
7751 __ Bind(fixup_label);
7752 }
7753 // /* mirror::Object* */ root = root->Read()
7754 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7755 }
7756 } else {
7757 // Plain GC root load with no read barrier.
7758 // /* GcRoot<mirror::Object> */ root = *address
7759 __ movl(root_reg, address);
7760 if (fixup_label != nullptr) {
7761 __ Bind(fixup_label);
7762 }
7763 // Note that GC roots are not affected by heap poisoning, thus we
7764 // do not have to unpoison `root_reg` here.
7765 }
7766 }
7767
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)7768 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7769 Location ref,
7770 Register obj,
7771 uint32_t offset,
7772 bool needs_null_check) {
7773 DCHECK(kEmitCompilerReadBarrier);
7774 DCHECK(kUseBakerReadBarrier);
7775
7776 // /* HeapReference<Object> */ ref = *(obj + offset)
7777 Address src(obj, offset);
7778 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7779 }
7780
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)7781 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7782 Location ref,
7783 Register obj,
7784 uint32_t data_offset,
7785 Location index,
7786 bool needs_null_check) {
7787 DCHECK(kEmitCompilerReadBarrier);
7788 DCHECK(kUseBakerReadBarrier);
7789
7790 static_assert(
7791 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7792 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7793 // /* HeapReference<Object> */ ref =
7794 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
7795 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
7796 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7797 }
7798
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)7799 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7800 Location ref,
7801 Register obj,
7802 const Address& src,
7803 bool needs_null_check,
7804 bool always_update_field,
7805 Register* temp) {
7806 DCHECK(kEmitCompilerReadBarrier);
7807 DCHECK(kUseBakerReadBarrier);
7808
7809 // In slow path based read barriers, the read barrier call is
7810 // inserted after the original load. However, in fast path based
7811 // Baker's read barriers, we need to perform the load of
7812 // mirror::Object::monitor_ *before* the original reference load.
7813 // This load-load ordering is required by the read barrier.
7814 // The fast path/slow path (for Baker's algorithm) should look like:
7815 //
7816 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7817 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
7818 // HeapReference<Object> ref = *src; // Original reference load.
7819 // bool is_gray = (rb_state == ReadBarrier::GrayState());
7820 // if (is_gray) {
7821 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
7822 // }
7823 //
7824 // Note: the original implementation in ReadBarrier::Barrier is
7825 // slightly more complex as:
7826 // - it implements the load-load fence using a data dependency on
7827 // the high-bits of rb_state, which are expected to be all zeroes
7828 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
7829 // which is a no-op thanks to the x86 memory model);
7830 // - it performs additional checks that we do not do here for
7831 // performance reasons.
7832
7833 Register ref_reg = ref.AsRegister<Register>();
7834 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7835
7836 // Given the numeric representation, it's enough to check the low bit of the rb_state.
7837 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7838 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7839 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7840 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7841 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7842
7843 // if (rb_state == ReadBarrier::GrayState())
7844 // ref = ReadBarrier::Mark(ref);
7845 // At this point, just do the "if" and make sure that flags are preserved until the branch.
7846 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7847 if (needs_null_check) {
7848 MaybeRecordImplicitNullCheck(instruction);
7849 }
7850
7851 // Load fence to prevent load-load reordering.
7852 // Note that this is a no-op, thanks to the x86 memory model.
7853 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7854
7855 // The actual reference load.
7856 // /* HeapReference<Object> */ ref = *src
7857 __ movl(ref_reg, src); // Flags are unaffected.
7858
7859 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7860 // Slow path marking the object `ref` when it is gray.
7861 SlowPathCode* slow_path;
7862 if (always_update_field) {
7863 DCHECK(temp != nullptr);
7864 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
7865 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
7866 } else {
7867 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
7868 instruction, ref, /* unpoison_ref_before_marking= */ true);
7869 }
7870 AddSlowPath(slow_path);
7871
7872 // We have done the "if" of the gray bit check above, now branch based on the flags.
7873 __ j(kNotZero, slow_path->GetEntryLabel());
7874
7875 // Object* ref = ref_addr->AsMirrorPtr()
7876 __ MaybeUnpoisonHeapReference(ref_reg);
7877
7878 __ Bind(slow_path->GetExitLabel());
7879 }
7880
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7881 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
7882 Location out,
7883 Location ref,
7884 Location obj,
7885 uint32_t offset,
7886 Location index) {
7887 DCHECK(kEmitCompilerReadBarrier);
7888
7889 // Insert a slow path based read barrier *after* the reference load.
7890 //
7891 // If heap poisoning is enabled, the unpoisoning of the loaded
7892 // reference will be carried out by the runtime within the slow
7893 // path.
7894 //
7895 // Note that `ref` currently does not get unpoisoned (when heap
7896 // poisoning is enabled), which is alright as the `ref` argument is
7897 // not used by the artReadBarrierSlow entry point.
7898 //
7899 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7900 SlowPathCode* slow_path = new (GetScopedAllocator())
7901 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
7902 AddSlowPath(slow_path);
7903
7904 __ jmp(slow_path->GetEntryLabel());
7905 __ Bind(slow_path->GetExitLabel());
7906 }
7907
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7908 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7909 Location out,
7910 Location ref,
7911 Location obj,
7912 uint32_t offset,
7913 Location index) {
7914 if (kEmitCompilerReadBarrier) {
7915 // Baker's read barriers shall be handled by the fast path
7916 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
7917 DCHECK(!kUseBakerReadBarrier);
7918 // If heap poisoning is enabled, unpoisoning will be taken care of
7919 // by the runtime within the slow path.
7920 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7921 } else if (kPoisonHeapReferences) {
7922 __ UnpoisonHeapReference(out.AsRegister<Register>());
7923 }
7924 }
7925
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7926 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7927 Location out,
7928 Location root) {
7929 DCHECK(kEmitCompilerReadBarrier);
7930
7931 // Insert a slow path based read barrier *after* the GC root load.
7932 //
7933 // Note that GC roots are not affected by heap poisoning, so we do
7934 // not need to do anything special for this here.
7935 SlowPathCode* slow_path =
7936 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
7937 AddSlowPath(slow_path);
7938
7939 __ jmp(slow_path->GetEntryLabel());
7940 __ Bind(slow_path->GetExitLabel());
7941 }
7942
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7943 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7944 // Nothing to do, this should be removed during prepare for register allocator.
7945 LOG(FATAL) << "Unreachable";
7946 }
7947
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7948 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7949 // Nothing to do, this should be removed during prepare for register allocator.
7950 LOG(FATAL) << "Unreachable";
7951 }
7952
7953 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7954 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7955 LocationSummary* locations =
7956 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7957 locations->SetInAt(0, Location::RequiresRegister());
7958 }
7959
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)7960 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
7961 int32_t lower_bound,
7962 uint32_t num_entries,
7963 HBasicBlock* switch_block,
7964 HBasicBlock* default_block) {
7965 // Figure out the correct compare values and jump conditions.
7966 // Handle the first compare/branch as a special case because it might
7967 // jump to the default case.
7968 DCHECK_GT(num_entries, 2u);
7969 Condition first_condition;
7970 uint32_t index;
7971 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
7972 if (lower_bound != 0) {
7973 first_condition = kLess;
7974 __ cmpl(value_reg, Immediate(lower_bound));
7975 __ j(first_condition, codegen_->GetLabelOf(default_block));
7976 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7977
7978 index = 1;
7979 } else {
7980 // Handle all the compare/jumps below.
7981 first_condition = kBelow;
7982 index = 0;
7983 }
7984
7985 // Handle the rest of the compare/jumps.
7986 for (; index + 1 < num_entries; index += 2) {
7987 int32_t compare_to_value = lower_bound + index + 1;
7988 __ cmpl(value_reg, Immediate(compare_to_value));
7989 // Jump to successors[index] if value < case_value[index].
7990 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7991 // Jump to successors[index + 1] if value == case_value[index + 1].
7992 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7993 }
7994
7995 if (index != num_entries) {
7996 // There are an odd number of entries. Handle the last one.
7997 DCHECK_EQ(index + 1, num_entries);
7998 __ cmpl(value_reg, Immediate(lower_bound + index));
7999 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8000 }
8001
8002 // And the default for any other value.
8003 if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8004 __ jmp(codegen_->GetLabelOf(default_block));
8005 }
8006 }
8007
VisitPackedSwitch(HPackedSwitch * switch_instr)8008 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8009 int32_t lower_bound = switch_instr->GetStartValue();
8010 uint32_t num_entries = switch_instr->GetNumEntries();
8011 LocationSummary* locations = switch_instr->GetLocations();
8012 Register value_reg = locations->InAt(0).AsRegister<Register>();
8013
8014 GenPackedSwitchWithCompares(value_reg,
8015 lower_bound,
8016 num_entries,
8017 switch_instr->GetBlock(),
8018 switch_instr->GetDefaultBlock());
8019 }
8020
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8021 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8022 LocationSummary* locations =
8023 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8024 locations->SetInAt(0, Location::RequiresRegister());
8025
8026 // Constant area pointer.
8027 locations->SetInAt(1, Location::RequiresRegister());
8028
8029 // And the temporary we need.
8030 locations->AddTemp(Location::RequiresRegister());
8031 }
8032
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8033 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8034 int32_t lower_bound = switch_instr->GetStartValue();
8035 uint32_t num_entries = switch_instr->GetNumEntries();
8036 LocationSummary* locations = switch_instr->GetLocations();
8037 Register value_reg = locations->InAt(0).AsRegister<Register>();
8038 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8039
8040 if (num_entries <= kPackedSwitchJumpTableThreshold) {
8041 GenPackedSwitchWithCompares(value_reg,
8042 lower_bound,
8043 num_entries,
8044 switch_instr->GetBlock(),
8045 default_block);
8046 return;
8047 }
8048
8049 // Optimizing has a jump area.
8050 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
8051 Register constant_area = locations->InAt(1).AsRegister<Register>();
8052
8053 // Remove the bias, if needed.
8054 if (lower_bound != 0) {
8055 __ leal(temp_reg, Address(value_reg, -lower_bound));
8056 value_reg = temp_reg;
8057 }
8058
8059 // Is the value in range?
8060 DCHECK_GE(num_entries, 1u);
8061 __ cmpl(value_reg, Immediate(num_entries - 1));
8062 __ j(kAbove, codegen_->GetLabelOf(default_block));
8063
8064 // We are in the range of the table.
8065 // Load (target-constant_area) from the jump table, indexing by the value.
8066 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
8067
8068 // Compute the actual target address by adding in constant_area.
8069 __ addl(temp_reg, constant_area);
8070
8071 // And jump.
8072 __ jmp(temp_reg);
8073 }
8074
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8075 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
8076 HX86ComputeBaseMethodAddress* insn) {
8077 LocationSummary* locations =
8078 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8079 locations->SetOut(Location::RequiresRegister());
8080 }
8081
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8082 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
8083 HX86ComputeBaseMethodAddress* insn) {
8084 LocationSummary* locations = insn->GetLocations();
8085 Register reg = locations->Out().AsRegister<Register>();
8086
8087 // Generate call to next instruction.
8088 Label next_instruction;
8089 __ call(&next_instruction);
8090 __ Bind(&next_instruction);
8091
8092 // Remember this offset for later use with constant area.
8093 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
8094
8095 // Grab the return address off the stack.
8096 __ popl(reg);
8097 }
8098
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8099 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
8100 HX86LoadFromConstantTable* insn) {
8101 LocationSummary* locations =
8102 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8103
8104 locations->SetInAt(0, Location::RequiresRegister());
8105 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
8106
8107 // If we don't need to be materialized, we only need the inputs to be set.
8108 if (insn->IsEmittedAtUseSite()) {
8109 return;
8110 }
8111
8112 switch (insn->GetType()) {
8113 case DataType::Type::kFloat32:
8114 case DataType::Type::kFloat64:
8115 locations->SetOut(Location::RequiresFpuRegister());
8116 break;
8117
8118 case DataType::Type::kInt32:
8119 locations->SetOut(Location::RequiresRegister());
8120 break;
8121
8122 default:
8123 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8124 }
8125 }
8126
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8127 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
8128 if (insn->IsEmittedAtUseSite()) {
8129 return;
8130 }
8131
8132 LocationSummary* locations = insn->GetLocations();
8133 Location out = locations->Out();
8134 Register const_area = locations->InAt(0).AsRegister<Register>();
8135 HConstant *value = insn->GetConstant();
8136
8137 switch (insn->GetType()) {
8138 case DataType::Type::kFloat32:
8139 __ movss(out.AsFpuRegister<XmmRegister>(),
8140 codegen_->LiteralFloatAddress(
8141 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8142 break;
8143
8144 case DataType::Type::kFloat64:
8145 __ movsd(out.AsFpuRegister<XmmRegister>(),
8146 codegen_->LiteralDoubleAddress(
8147 value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8148 break;
8149
8150 case DataType::Type::kInt32:
8151 __ movl(out.AsRegister<Register>(),
8152 codegen_->LiteralInt32Address(
8153 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8154 break;
8155
8156 default:
8157 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8158 }
8159 }
8160
8161 /**
8162 * Class to handle late fixup of offsets into constant area.
8163 */
8164 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8165 public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)8166 RIPFixup(CodeGeneratorX86& codegen,
8167 HX86ComputeBaseMethodAddress* base_method_address,
8168 size_t offset)
8169 : codegen_(&codegen),
8170 base_method_address_(base_method_address),
8171 offset_into_constant_area_(offset) {}
8172
8173 protected:
SetOffset(size_t offset)8174 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8175
8176 CodeGeneratorX86* codegen_;
8177 HX86ComputeBaseMethodAddress* base_method_address_;
8178
8179 private:
Process(const MemoryRegion & region,int pos)8180 void Process(const MemoryRegion& region, int pos) override {
8181 // Patch the correct offset for the instruction. The place to patch is the
8182 // last 4 bytes of the instruction.
8183 // The value to patch is the distance from the offset in the constant area
8184 // from the address computed by the HX86ComputeBaseMethodAddress instruction.
8185 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8186 int32_t relative_position =
8187 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
8188
8189 // Patch in the right value.
8190 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8191 }
8192
8193 // Location in constant area that the fixup refers to.
8194 int32_t offset_into_constant_area_;
8195 };
8196
8197 /**
8198 * Class to handle late fixup of offsets to a jump table that will be created in the
8199 * constant area.
8200 */
8201 class JumpTableRIPFixup : public RIPFixup {
8202 public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)8203 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
8204 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
8205 switch_instr_(switch_instr) {}
8206
CreateJumpTable()8207 void CreateJumpTable() {
8208 X86Assembler* assembler = codegen_->GetAssembler();
8209
8210 // Ensure that the reference to the jump table has the correct offset.
8211 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8212 SetOffset(offset_in_constant_table);
8213
8214 // The label values in the jump table are computed relative to the
8215 // instruction addressing the constant area.
8216 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
8217
8218 // Populate the jump table with the correct values for the jump table.
8219 int32_t num_entries = switch_instr_->GetNumEntries();
8220 HBasicBlock* block = switch_instr_->GetBlock();
8221 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8222 // The value that we want is the target offset - the position of the table.
8223 for (int32_t i = 0; i < num_entries; i++) {
8224 HBasicBlock* b = successors[i];
8225 Label* l = codegen_->GetLabelOf(b);
8226 DCHECK(l->IsBound());
8227 int32_t offset_to_block = l->Position() - relative_offset;
8228 assembler->AppendInt32(offset_to_block);
8229 }
8230 }
8231
8232 private:
8233 const HX86PackedSwitch* switch_instr_;
8234 };
8235
Finalize(CodeAllocator * allocator)8236 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
8237 // Generate the constant area if needed.
8238 X86Assembler* assembler = GetAssembler();
8239 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8240 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
8241 // byte values.
8242 assembler->Align(4, 0);
8243 constant_area_start_ = assembler->CodeSize();
8244
8245 // Populate any jump tables.
8246 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8247 jump_table->CreateJumpTable();
8248 }
8249
8250 // And now add the constant area to the generated code.
8251 assembler->AddConstantArea();
8252 }
8253
8254 // And finish up.
8255 CodeGenerator::Finalize(allocator);
8256 }
8257
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)8258 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
8259 HX86ComputeBaseMethodAddress* method_base,
8260 Register reg) {
8261 AssemblerFixup* fixup =
8262 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
8263 return Address(reg, kDummy32BitOffset, fixup);
8264 }
8265
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)8266 Address CodeGeneratorX86::LiteralFloatAddress(float v,
8267 HX86ComputeBaseMethodAddress* method_base,
8268 Register reg) {
8269 AssemblerFixup* fixup =
8270 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
8271 return Address(reg, kDummy32BitOffset, fixup);
8272 }
8273
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8274 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
8275 HX86ComputeBaseMethodAddress* method_base,
8276 Register reg) {
8277 AssemblerFixup* fixup =
8278 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
8279 return Address(reg, kDummy32BitOffset, fixup);
8280 }
8281
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8282 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
8283 HX86ComputeBaseMethodAddress* method_base,
8284 Register reg) {
8285 AssemblerFixup* fixup =
8286 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
8287 return Address(reg, kDummy32BitOffset, fixup);
8288 }
8289
Load32BitValue(Register dest,int32_t value)8290 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
8291 if (value == 0) {
8292 __ xorl(dest, dest);
8293 } else {
8294 __ movl(dest, Immediate(value));
8295 }
8296 }
8297
Compare32BitValue(Register dest,int32_t value)8298 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
8299 if (value == 0) {
8300 __ testl(dest, dest);
8301 } else {
8302 __ cmpl(dest, Immediate(value));
8303 }
8304 }
8305
GenerateIntCompare(Location lhs,Location rhs)8306 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
8307 Register lhs_reg = lhs.AsRegister<Register>();
8308 GenerateIntCompare(lhs_reg, rhs);
8309 }
8310
GenerateIntCompare(Register lhs,Location rhs)8311 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
8312 if (rhs.IsConstant()) {
8313 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8314 Compare32BitValue(lhs, value);
8315 } else if (rhs.IsStackSlot()) {
8316 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
8317 } else {
8318 __ cmpl(lhs, rhs.AsRegister<Register>());
8319 }
8320 }
8321
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)8322 Address CodeGeneratorX86::ArrayAddress(Register obj,
8323 Location index,
8324 ScaleFactor scale,
8325 uint32_t data_offset) {
8326 return index.IsConstant() ?
8327 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
8328 Address(obj, index.AsRegister<Register>(), scale, data_offset);
8329 }
8330
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)8331 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
8332 Register reg,
8333 Register value) {
8334 // Create a fixup to be used to create and address the jump table.
8335 JumpTableRIPFixup* table_fixup =
8336 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8337
8338 // We have to populate the jump tables.
8339 fixups_to_jump_tables_.push_back(table_fixup);
8340
8341 // We want a scaled address, as we are extracting the correct offset from the table.
8342 return Address(reg, value, TIMES_4, kDummy32BitOffset, table_fixup);
8343 }
8344
8345 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)8346 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
8347 if (!target.IsValid()) {
8348 DCHECK_EQ(type, DataType::Type::kVoid);
8349 return;
8350 }
8351
8352 DCHECK_NE(type, DataType::Type::kVoid);
8353
8354 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
8355 if (target.Equals(return_loc)) {
8356 return;
8357 }
8358
8359 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
8360 // with the else branch.
8361 if (type == DataType::Type::kInt64) {
8362 HParallelMove parallel_move(GetGraph()->GetAllocator());
8363 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
8364 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
8365 GetMoveResolver()->EmitNativeCode(¶llel_move);
8366 } else {
8367 // Let the parallel move resolver take care of all of this.
8368 HParallelMove parallel_move(GetGraph()->GetAllocator());
8369 parallel_move.AddMove(return_loc, target, type, nullptr);
8370 GetMoveResolver()->EmitNativeCode(¶llel_move);
8371 }
8372 }
8373
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8374 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
8375 const uint8_t* roots_data,
8376 const PatchInfo<Label>& info,
8377 uint64_t index_in_table) const {
8378 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8379 uintptr_t address =
8380 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8381 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8382 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8383 dchecked_integral_cast<uint32_t>(address);
8384 }
8385
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8386 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8387 for (const PatchInfo<Label>& info : jit_string_patches_) {
8388 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8389 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8390 PatchJitRootUse(code, roots_data, info, index_in_table);
8391 }
8392
8393 for (const PatchInfo<Label>& info : jit_class_patches_) {
8394 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8395 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8396 PatchJitRootUse(code, roots_data, info, index_in_table);
8397 }
8398 }
8399
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8400 void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8401 ATTRIBUTE_UNUSED) {
8402 LOG(FATAL) << "Unreachable";
8403 }
8404
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8405 void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8406 ATTRIBUTE_UNUSED) {
8407 LOG(FATAL) << "Unreachable";
8408 }
8409
8410 #undef __
8411
8412 } // namespace x86
8413 } // namespace art
8414