1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "arch/x86/jni_frame_x86.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "compiled_method.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "entrypoints/quick/quick_entrypoints_enum.h"
26 #include "gc/accounting/card_table.h"
27 #include "gc/space/image_space.h"
28 #include "heap_poisoning.h"
29 #include "interpreter/mterp/nterp.h"
30 #include "intrinsics.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/var_handle.h"
39 #include "scoped_thread_state_change-inl.h"
40 #include "thread.h"
41 #include "utils/assembler.h"
42 #include "utils/stack_checks.h"
43 #include "utils/x86/assembler_x86.h"
44 #include "utils/x86/managed_register_x86.h"
45
46 namespace art {
47
48 template<class MirrorType>
49 class GcRoot;
50
51 namespace x86 {
52
53 static constexpr int kCurrentMethodStackOffset = 0;
54 static constexpr Register kMethodRegisterArgument = EAX;
55 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
56
57 static constexpr int kC2ConditionMask = 0x400;
58
59 static constexpr int kFakeReturnRegister = Register(8);
60
61 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
62 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
63
OneRegInReferenceOutSaveEverythingCallerSaves()64 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
65 InvokeRuntimeCallingConvention calling_convention;
66 RegisterSet caller_saves = RegisterSet::Empty();
67 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
68 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
69 // that the the kPrimNot result register is the same as the first argument register.
70 return caller_saves;
71 }
72
73 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
74 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
75 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
76
77 class NullCheckSlowPathX86 : public SlowPathCode {
78 public:
NullCheckSlowPathX86(HNullCheck * instruction)79 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
80
EmitNativeCode(CodeGenerator * codegen)81 void EmitNativeCode(CodeGenerator* codegen) override {
82 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
83 __ Bind(GetEntryLabel());
84 if (instruction_->CanThrowIntoCatchBlock()) {
85 // Live registers will be restored in the catch block if caught.
86 SaveLiveRegisters(codegen, instruction_->GetLocations());
87 }
88 x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
89 instruction_,
90 instruction_->GetDexPc(),
91 this);
92 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
93 }
94
IsFatal() const95 bool IsFatal() const override { return true; }
96
GetDescription() const97 const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
98
99 private:
100 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
101 };
102
103 class DivZeroCheckSlowPathX86 : public SlowPathCode {
104 public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)105 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
106
EmitNativeCode(CodeGenerator * codegen)107 void EmitNativeCode(CodeGenerator* codegen) override {
108 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
109 __ Bind(GetEntryLabel());
110 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
111 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
112 }
113
IsFatal() const114 bool IsFatal() const override { return true; }
115
GetDescription() const116 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
117
118 private:
119 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
120 };
121
122 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
123 public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)124 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
125 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
126
EmitNativeCode(CodeGenerator * codegen)127 void EmitNativeCode(CodeGenerator* codegen) override {
128 __ Bind(GetEntryLabel());
129 if (is_div_) {
130 __ negl(reg_);
131 } else {
132 __ movl(reg_, Immediate(0));
133 }
134 __ jmp(GetExitLabel());
135 }
136
GetDescription() const137 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
138
139 private:
140 Register reg_;
141 bool is_div_;
142 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
143 };
144
145 class BoundsCheckSlowPathX86 : public SlowPathCode {
146 public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)147 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
148
EmitNativeCode(CodeGenerator * codegen)149 void EmitNativeCode(CodeGenerator* codegen) override {
150 LocationSummary* locations = instruction_->GetLocations();
151 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
152 __ Bind(GetEntryLabel());
153 if (instruction_->CanThrowIntoCatchBlock()) {
154 // Live registers will be restored in the catch block if caught.
155 SaveLiveRegisters(codegen, locations);
156 }
157
158 Location index_loc = locations->InAt(0);
159 Location length_loc = locations->InAt(1);
160 InvokeRuntimeCallingConvention calling_convention;
161 Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
162 Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
163
164 // Are we using an array length from memory?
165 if (!length_loc.IsValid()) {
166 DCHECK(instruction_->InputAt(1)->IsArrayLength());
167 HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
168 DCHECK(array_length->IsEmittedAtUseSite());
169 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
170 Location array_loc = array_length->GetLocations()->InAt(0);
171 if (!index_loc.Equals(length_arg)) {
172 // The index is not clobbered by loading the length directly to `length_arg`.
173 __ movl(length_arg.AsRegister<Register>(),
174 Address(array_loc.AsRegister<Register>(), len_offset));
175 x86_codegen->Move32(index_arg, index_loc);
176 } else if (!array_loc.Equals(index_arg)) {
177 // The array reference is not clobbered by the index move.
178 x86_codegen->Move32(index_arg, index_loc);
179 __ movl(length_arg.AsRegister<Register>(),
180 Address(array_loc.AsRegister<Register>(), len_offset));
181 } else {
182 // We do not have a temporary we could use, so swap the registers using the
183 // parallel move resolver and replace the array with the length afterwards.
184 codegen->EmitParallelMoves(
185 index_loc,
186 index_arg,
187 DataType::Type::kInt32,
188 array_loc,
189 length_arg,
190 DataType::Type::kReference);
191 __ movl(length_arg.AsRegister<Register>(),
192 Address(length_arg.AsRegister<Register>(), len_offset));
193 }
194 if (mirror::kUseStringCompression && array_length->IsStringLength()) {
195 __ shrl(length_arg.AsRegister<Register>(), Immediate(1));
196 }
197 } else {
198 // We're moving two locations to locations that could overlap,
199 // so we need a parallel move resolver.
200 codegen->EmitParallelMoves(
201 index_loc,
202 index_arg,
203 DataType::Type::kInt32,
204 length_loc,
205 length_arg,
206 DataType::Type::kInt32);
207 }
208
209 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
210 ? kQuickThrowStringBounds
211 : kQuickThrowArrayBounds;
212 x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
213 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
214 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
215 }
216
IsFatal() const217 bool IsFatal() const override { return true; }
218
GetDescription() const219 const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
220
221 private:
222 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
223 };
224
225 class SuspendCheckSlowPathX86 : public SlowPathCode {
226 public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)227 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
228 : SlowPathCode(instruction), successor_(successor) {}
229
EmitNativeCode(CodeGenerator * codegen)230 void EmitNativeCode(CodeGenerator* codegen) override {
231 LocationSummary* locations = instruction_->GetLocations();
232 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
233 __ Bind(GetEntryLabel());
234 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
235 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
236 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
237 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
238 if (successor_ == nullptr) {
239 __ jmp(GetReturnLabel());
240 } else {
241 __ jmp(x86_codegen->GetLabelOf(successor_));
242 }
243 }
244
GetReturnLabel()245 Label* GetReturnLabel() {
246 DCHECK(successor_ == nullptr);
247 return &return_label_;
248 }
249
GetSuccessor() const250 HBasicBlock* GetSuccessor() const {
251 return successor_;
252 }
253
GetDescription() const254 const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
255
256 private:
257 HBasicBlock* const successor_;
258 Label return_label_;
259
260 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
261 };
262
263 class LoadStringSlowPathX86 : public SlowPathCode {
264 public:
LoadStringSlowPathX86(HLoadString * instruction)265 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
266
EmitNativeCode(CodeGenerator * codegen)267 void EmitNativeCode(CodeGenerator* codegen) override {
268 LocationSummary* locations = instruction_->GetLocations();
269 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
270
271 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
272 __ Bind(GetEntryLabel());
273 SaveLiveRegisters(codegen, locations);
274
275 InvokeRuntimeCallingConvention calling_convention;
276 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
277 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
278 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
279 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
280 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
281 RestoreLiveRegisters(codegen, locations);
282
283 __ jmp(GetExitLabel());
284 }
285
GetDescription() const286 const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
287
288 private:
289 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
290 };
291
292 class LoadClassSlowPathX86 : public SlowPathCode {
293 public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)294 LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
295 : SlowPathCode(at), cls_(cls) {
296 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
297 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
298 }
299
EmitNativeCode(CodeGenerator * codegen)300 void EmitNativeCode(CodeGenerator* codegen) override {
301 LocationSummary* locations = instruction_->GetLocations();
302 Location out = locations->Out();
303 const uint32_t dex_pc = instruction_->GetDexPc();
304 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
305 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
306
307 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
308 __ Bind(GetEntryLabel());
309 SaveLiveRegisters(codegen, locations);
310
311 InvokeRuntimeCallingConvention calling_convention;
312 if (must_resolve_type) {
313 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()) ||
314 x86_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
315 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
316 &cls_->GetDexFile()));
317 dex::TypeIndex type_index = cls_->GetTypeIndex();
318 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
319 if (cls_->NeedsAccessCheck()) {
320 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
321 x86_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
322 } else {
323 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
324 x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
325 }
326 // If we also must_do_clinit, the resolved type is now in the correct register.
327 } else {
328 DCHECK(must_do_clinit);
329 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
330 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
331 }
332 if (must_do_clinit) {
333 x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
334 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
335 }
336
337 // Move the class to the desired location.
338 if (out.IsValid()) {
339 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
340 x86_codegen->Move32(out, Location::RegisterLocation(EAX));
341 }
342 RestoreLiveRegisters(codegen, locations);
343 __ jmp(GetExitLabel());
344 }
345
GetDescription() const346 const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
347
348 private:
349 // The class this slow path will load.
350 HLoadClass* const cls_;
351
352 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
353 };
354
355 class TypeCheckSlowPathX86 : public SlowPathCode {
356 public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)357 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
358 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
359
EmitNativeCode(CodeGenerator * codegen)360 void EmitNativeCode(CodeGenerator* codegen) override {
361 LocationSummary* locations = instruction_->GetLocations();
362 DCHECK(instruction_->IsCheckCast()
363 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
364
365 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
366 __ Bind(GetEntryLabel());
367
368 if (kPoisonHeapReferences &&
369 instruction_->IsCheckCast() &&
370 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
371 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
372 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
373 }
374
375 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
376 SaveLiveRegisters(codegen, locations);
377 }
378
379 // We're moving two locations to locations that could overlap, so we need a parallel
380 // move resolver.
381 InvokeRuntimeCallingConvention calling_convention;
382 x86_codegen->EmitParallelMoves(locations->InAt(0),
383 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
384 DataType::Type::kReference,
385 locations->InAt(1),
386 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
387 DataType::Type::kReference);
388 if (instruction_->IsInstanceOf()) {
389 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
390 instruction_,
391 instruction_->GetDexPc(),
392 this);
393 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
394 } else {
395 DCHECK(instruction_->IsCheckCast());
396 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
397 instruction_,
398 instruction_->GetDexPc(),
399 this);
400 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
401 }
402
403 if (!is_fatal_) {
404 if (instruction_->IsInstanceOf()) {
405 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
406 }
407 RestoreLiveRegisters(codegen, locations);
408
409 __ jmp(GetExitLabel());
410 }
411 }
412
GetDescription() const413 const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const414 bool IsFatal() const override { return is_fatal_; }
415
416 private:
417 const bool is_fatal_;
418
419 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
420 };
421
422 class DeoptimizationSlowPathX86 : public SlowPathCode {
423 public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)424 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
425 : SlowPathCode(instruction) {}
426
EmitNativeCode(CodeGenerator * codegen)427 void EmitNativeCode(CodeGenerator* codegen) override {
428 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
429 __ Bind(GetEntryLabel());
430 LocationSummary* locations = instruction_->GetLocations();
431 SaveLiveRegisters(codegen, locations);
432 InvokeRuntimeCallingConvention calling_convention;
433 x86_codegen->Load32BitValue(
434 calling_convention.GetRegisterAt(0),
435 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
436 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
437 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
438 }
439
GetDescription() const440 const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
441
442 private:
443 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
444 };
445
446 class ArraySetSlowPathX86 : public SlowPathCode {
447 public:
ArraySetSlowPathX86(HInstruction * instruction)448 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
449
EmitNativeCode(CodeGenerator * codegen)450 void EmitNativeCode(CodeGenerator* codegen) override {
451 LocationSummary* locations = instruction_->GetLocations();
452 __ Bind(GetEntryLabel());
453 SaveLiveRegisters(codegen, locations);
454
455 InvokeRuntimeCallingConvention calling_convention;
456 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
457 parallel_move.AddMove(
458 locations->InAt(0),
459 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
460 DataType::Type::kReference,
461 nullptr);
462 parallel_move.AddMove(
463 locations->InAt(1),
464 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
465 DataType::Type::kInt32,
466 nullptr);
467 parallel_move.AddMove(
468 locations->InAt(2),
469 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
470 DataType::Type::kReference,
471 nullptr);
472 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
473
474 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
475 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
476 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
477 RestoreLiveRegisters(codegen, locations);
478 __ jmp(GetExitLabel());
479 }
480
GetDescription() const481 const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
482
483 private:
484 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
485 };
486
487 // Slow path marking an object reference `ref` during a read
488 // barrier. The field `obj.field` in the object `obj` holding this
489 // reference does not get updated by this slow path after marking (see
490 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
491 //
492 // This means that after the execution of this slow path, `ref` will
493 // always be up-to-date, but `obj.field` may not; i.e., after the
494 // flip, `ref` will be a to-space reference, but `obj.field` will
495 // probably still be a from-space reference (unless it gets updated by
496 // another thread, or if another thread installed another object
497 // reference (different from `ref`) in `obj.field`).
498 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
499 public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)500 ReadBarrierMarkSlowPathX86(HInstruction* instruction,
501 Location ref,
502 bool unpoison_ref_before_marking)
503 : SlowPathCode(instruction),
504 ref_(ref),
505 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
506 DCHECK(kEmitCompilerReadBarrier);
507 }
508
GetDescription() const509 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
510
EmitNativeCode(CodeGenerator * codegen)511 void EmitNativeCode(CodeGenerator* codegen) override {
512 LocationSummary* locations = instruction_->GetLocations();
513 Register ref_reg = ref_.AsRegister<Register>();
514 DCHECK(locations->CanCall());
515 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
516 DCHECK(instruction_->IsInstanceFieldGet() ||
517 instruction_->IsPredicatedInstanceFieldGet() ||
518 instruction_->IsStaticFieldGet() ||
519 instruction_->IsArrayGet() ||
520 instruction_->IsArraySet() ||
521 instruction_->IsLoadClass() ||
522 instruction_->IsLoadString() ||
523 instruction_->IsInstanceOf() ||
524 instruction_->IsCheckCast() ||
525 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
526 << "Unexpected instruction in read barrier marking slow path: "
527 << instruction_->DebugName();
528
529 __ Bind(GetEntryLabel());
530 if (unpoison_ref_before_marking_) {
531 // Object* ref = ref_addr->AsMirrorPtr()
532 __ MaybeUnpoisonHeapReference(ref_reg);
533 }
534 // No need to save live registers; it's taken care of by the
535 // entrypoint. Also, there is no need to update the stack mask,
536 // as this runtime call will not trigger a garbage collection.
537 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
538 DCHECK_NE(ref_reg, ESP);
539 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
540 // "Compact" slow path, saving two moves.
541 //
542 // Instead of using the standard runtime calling convention (input
543 // and output in EAX):
544 //
545 // EAX <- ref
546 // EAX <- ReadBarrierMark(EAX)
547 // ref <- EAX
548 //
549 // we just use rX (the register containing `ref`) as input and output
550 // of a dedicated entrypoint:
551 //
552 // rX <- ReadBarrierMarkRegX(rX)
553 //
554 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
555 // This runtime call does not require a stack map.
556 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
557 __ jmp(GetExitLabel());
558 }
559
560 private:
561 // The location (register) of the marked object reference.
562 const Location ref_;
563 // Should the reference in `ref_` be unpoisoned prior to marking it?
564 const bool unpoison_ref_before_marking_;
565
566 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
567 };
568
569 // Slow path marking an object reference `ref` during a read barrier,
570 // and if needed, atomically updating the field `obj.field` in the
571 // object `obj` holding this reference after marking (contrary to
572 // ReadBarrierMarkSlowPathX86 above, which never tries to update
573 // `obj.field`).
574 //
575 // This means that after the execution of this slow path, both `ref`
576 // and `obj.field` will be up-to-date; i.e., after the flip, both will
577 // hold the same to-space reference (unless another thread installed
578 // another object reference (different from `ref`) in `obj.field`).
579 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
580 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)581 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
582 Location ref,
583 Register obj,
584 const Address& field_addr,
585 bool unpoison_ref_before_marking,
586 Register temp)
587 : SlowPathCode(instruction),
588 ref_(ref),
589 obj_(obj),
590 field_addr_(field_addr),
591 unpoison_ref_before_marking_(unpoison_ref_before_marking),
592 temp_(temp) {
593 DCHECK(kEmitCompilerReadBarrier);
594 }
595
GetDescription() const596 const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
597
EmitNativeCode(CodeGenerator * codegen)598 void EmitNativeCode(CodeGenerator* codegen) override {
599 LocationSummary* locations = instruction_->GetLocations();
600 Register ref_reg = ref_.AsRegister<Register>();
601 DCHECK(locations->CanCall());
602 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
603 DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
604 << "Unexpected instruction in read barrier marking and field updating slow path: "
605 << instruction_->DebugName();
606 HInvoke* invoke = instruction_->AsInvoke();
607 DCHECK(IsUnsafeCASObject(invoke) || IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
608
609 __ Bind(GetEntryLabel());
610 if (unpoison_ref_before_marking_) {
611 // Object* ref = ref_addr->AsMirrorPtr()
612 __ MaybeUnpoisonHeapReference(ref_reg);
613 }
614
615 // Save the old (unpoisoned) reference.
616 __ movl(temp_, ref_reg);
617
618 // No need to save live registers; it's taken care of by the
619 // entrypoint. Also, there is no need to update the stack mask,
620 // as this runtime call will not trigger a garbage collection.
621 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
622 DCHECK_NE(ref_reg, ESP);
623 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
624 // "Compact" slow path, saving two moves.
625 //
626 // Instead of using the standard runtime calling convention (input
627 // and output in EAX):
628 //
629 // EAX <- ref
630 // EAX <- ReadBarrierMark(EAX)
631 // ref <- EAX
632 //
633 // we just use rX (the register containing `ref`) as input and output
634 // of a dedicated entrypoint:
635 //
636 // rX <- ReadBarrierMarkRegX(rX)
637 //
638 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
639 // This runtime call does not require a stack map.
640 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
641
642 // If the new reference is different from the old reference,
643 // update the field in the holder (`*field_addr`).
644 //
645 // Note that this field could also hold a different object, if
646 // another thread had concurrently changed it. In that case, the
647 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
648 // operation below would abort the CAS, leaving the field as-is.
649 NearLabel done;
650 __ cmpl(temp_, ref_reg);
651 __ j(kEqual, &done);
652
653 // Update the the holder's field atomically. This may fail if
654 // mutator updates before us, but it's OK. This is achieved
655 // using a strong compare-and-set (CAS) operation with relaxed
656 // memory synchronization ordering, where the expected value is
657 // the old reference and the desired value is the new reference.
658 // This operation is implemented with a 32-bit LOCK CMPXLCHG
659 // instruction, which requires the expected value (the old
660 // reference) to be in EAX. Save EAX beforehand, and move the
661 // expected value (stored in `temp_`) into EAX.
662 __ pushl(EAX);
663 __ movl(EAX, temp_);
664
665 // Convenience aliases.
666 Register base = obj_;
667 Register expected = EAX;
668 Register value = ref_reg;
669
670 bool base_equals_value = (base == value);
671 if (kPoisonHeapReferences) {
672 if (base_equals_value) {
673 // If `base` and `value` are the same register location, move
674 // `value` to a temporary register. This way, poisoning
675 // `value` won't invalidate `base`.
676 value = temp_;
677 __ movl(value, base);
678 }
679
680 // Check that the register allocator did not assign the location
681 // of `expected` (EAX) to `value` nor to `base`, so that heap
682 // poisoning (when enabled) works as intended below.
683 // - If `value` were equal to `expected`, both references would
684 // be poisoned twice, meaning they would not be poisoned at
685 // all, as heap poisoning uses address negation.
686 // - If `base` were equal to `expected`, poisoning `expected`
687 // would invalidate `base`.
688 DCHECK_NE(value, expected);
689 DCHECK_NE(base, expected);
690
691 __ PoisonHeapReference(expected);
692 __ PoisonHeapReference(value);
693 }
694
695 __ LockCmpxchgl(field_addr_, value);
696
697 // If heap poisoning is enabled, we need to unpoison the values
698 // that were poisoned earlier.
699 if (kPoisonHeapReferences) {
700 if (base_equals_value) {
701 // `value` has been moved to a temporary register, no need
702 // to unpoison it.
703 } else {
704 __ UnpoisonHeapReference(value);
705 }
706 // No need to unpoison `expected` (EAX), as it is be overwritten below.
707 }
708
709 // Restore EAX.
710 __ popl(EAX);
711
712 __ Bind(&done);
713 __ jmp(GetExitLabel());
714 }
715
716 private:
717 // The location (register) of the marked object reference.
718 const Location ref_;
719 // The register containing the object holding the marked object reference field.
720 const Register obj_;
721 // The address of the marked reference field. The base of this address must be `obj_`.
722 const Address field_addr_;
723
724 // Should the reference in `ref_` be unpoisoned prior to marking it?
725 const bool unpoison_ref_before_marking_;
726
727 const Register temp_;
728
729 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
730 };
731
732 // Slow path generating a read barrier for a heap reference.
733 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
734 public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)735 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
736 Location out,
737 Location ref,
738 Location obj,
739 uint32_t offset,
740 Location index)
741 : SlowPathCode(instruction),
742 out_(out),
743 ref_(ref),
744 obj_(obj),
745 offset_(offset),
746 index_(index) {
747 DCHECK(kEmitCompilerReadBarrier);
748 // If `obj` is equal to `out` or `ref`, it means the initial object
749 // has been overwritten by (or after) the heap object reference load
750 // to be instrumented, e.g.:
751 //
752 // __ movl(out, Address(out, offset));
753 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
754 //
755 // In that case, we have lost the information about the original
756 // object, and the emitted read barrier cannot work properly.
757 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
758 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
759 }
760
EmitNativeCode(CodeGenerator * codegen)761 void EmitNativeCode(CodeGenerator* codegen) override {
762 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
763 LocationSummary* locations = instruction_->GetLocations();
764 Register reg_out = out_.AsRegister<Register>();
765 DCHECK(locations->CanCall());
766 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
767 DCHECK(instruction_->IsInstanceFieldGet() ||
768 instruction_->IsPredicatedInstanceFieldGet() ||
769 instruction_->IsStaticFieldGet() ||
770 instruction_->IsArrayGet() ||
771 instruction_->IsInstanceOf() ||
772 instruction_->IsCheckCast() ||
773 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
774 << "Unexpected instruction in read barrier for heap reference slow path: "
775 << instruction_->DebugName();
776
777 __ Bind(GetEntryLabel());
778 SaveLiveRegisters(codegen, locations);
779
780 // We may have to change the index's value, but as `index_` is a
781 // constant member (like other "inputs" of this slow path),
782 // introduce a copy of it, `index`.
783 Location index = index_;
784 if (index_.IsValid()) {
785 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
786 if (instruction_->IsArrayGet()) {
787 // Compute the actual memory offset and store it in `index`.
788 Register index_reg = index_.AsRegister<Register>();
789 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
790 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
791 // We are about to change the value of `index_reg` (see the
792 // calls to art::x86::X86Assembler::shll and
793 // art::x86::X86Assembler::AddImmediate below), but it has
794 // not been saved by the previous call to
795 // art::SlowPathCode::SaveLiveRegisters, as it is a
796 // callee-save register --
797 // art::SlowPathCode::SaveLiveRegisters does not consider
798 // callee-save registers, as it has been designed with the
799 // assumption that callee-save registers are supposed to be
800 // handled by the called function. So, as a callee-save
801 // register, `index_reg` _would_ eventually be saved onto
802 // the stack, but it would be too late: we would have
803 // changed its value earlier. Therefore, we manually save
804 // it here into another freely available register,
805 // `free_reg`, chosen of course among the caller-save
806 // registers (as a callee-save `free_reg` register would
807 // exhibit the same problem).
808 //
809 // Note we could have requested a temporary register from
810 // the register allocator instead; but we prefer not to, as
811 // this is a slow path, and we know we can find a
812 // caller-save register that is available.
813 Register free_reg = FindAvailableCallerSaveRegister(codegen);
814 __ movl(free_reg, index_reg);
815 index_reg = free_reg;
816 index = Location::RegisterLocation(index_reg);
817 } else {
818 // The initial register stored in `index_` has already been
819 // saved in the call to art::SlowPathCode::SaveLiveRegisters
820 // (as it is not a callee-save register), so we can freely
821 // use it.
822 }
823 // Shifting the index value contained in `index_reg` by the scale
824 // factor (2) cannot overflow in practice, as the runtime is
825 // unable to allocate object arrays with a size larger than
826 // 2^26 - 1 (that is, 2^28 - 4 bytes).
827 __ shll(index_reg, Immediate(TIMES_4));
828 static_assert(
829 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
830 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
831 __ AddImmediate(index_reg, Immediate(offset_));
832 } else {
833 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
834 // intrinsics, `index_` is not shifted by a scale factor of 2
835 // (as in the case of ArrayGet), as it is actually an offset
836 // to an object field within an object.
837 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
838 DCHECK(instruction_->GetLocations()->Intrinsified());
839 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
840 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
841 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) ||
842 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) ||
843 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire))
844 << instruction_->AsInvoke()->GetIntrinsic();
845 DCHECK_EQ(offset_, 0U);
846 DCHECK(index_.IsRegisterPair());
847 // UnsafeGet's offset location is a register pair, the low
848 // part contains the correct offset.
849 index = index_.ToLow();
850 }
851 }
852
853 // We're moving two or three locations to locations that could
854 // overlap, so we need a parallel move resolver.
855 InvokeRuntimeCallingConvention calling_convention;
856 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
857 parallel_move.AddMove(ref_,
858 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
859 DataType::Type::kReference,
860 nullptr);
861 parallel_move.AddMove(obj_,
862 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
863 DataType::Type::kReference,
864 nullptr);
865 if (index.IsValid()) {
866 parallel_move.AddMove(index,
867 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
868 DataType::Type::kInt32,
869 nullptr);
870 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
871 } else {
872 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
873 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
874 }
875 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
876 CheckEntrypointTypes<
877 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
878 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
879
880 RestoreLiveRegisters(codegen, locations);
881 __ jmp(GetExitLabel());
882 }
883
GetDescription() const884 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
885
886 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)887 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
888 size_t ref = static_cast<int>(ref_.AsRegister<Register>());
889 size_t obj = static_cast<int>(obj_.AsRegister<Register>());
890 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
891 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
892 return static_cast<Register>(i);
893 }
894 }
895 // We shall never fail to find a free caller-save register, as
896 // there are more than two core caller-save registers on x86
897 // (meaning it is possible to find one which is different from
898 // `ref` and `obj`).
899 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
900 LOG(FATAL) << "Could not find a free caller-save register";
901 UNREACHABLE();
902 }
903
904 const Location out_;
905 const Location ref_;
906 const Location obj_;
907 const uint32_t offset_;
908 // An additional location containing an index to an array.
909 // Only used for HArrayGet and the UnsafeGetObject &
910 // UnsafeGetObjectVolatile intrinsics.
911 const Location index_;
912
913 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
914 };
915
916 // Slow path generating a read barrier for a GC root.
917 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
918 public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)919 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
920 : SlowPathCode(instruction), out_(out), root_(root) {
921 DCHECK(kEmitCompilerReadBarrier);
922 }
923
EmitNativeCode(CodeGenerator * codegen)924 void EmitNativeCode(CodeGenerator* codegen) override {
925 LocationSummary* locations = instruction_->GetLocations();
926 Register reg_out = out_.AsRegister<Register>();
927 DCHECK(locations->CanCall());
928 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
929 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
930 << "Unexpected instruction in read barrier for GC root slow path: "
931 << instruction_->DebugName();
932
933 __ Bind(GetEntryLabel());
934 SaveLiveRegisters(codegen, locations);
935
936 InvokeRuntimeCallingConvention calling_convention;
937 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
938 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
939 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
940 instruction_,
941 instruction_->GetDexPc(),
942 this);
943 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
944 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
945
946 RestoreLiveRegisters(codegen, locations);
947 __ jmp(GetExitLabel());
948 }
949
GetDescription() const950 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
951
952 private:
953 const Location out_;
954 const Location root_;
955
956 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
957 };
958
959 class MethodEntryExitHooksSlowPathX86 : public SlowPathCode {
960 public:
MethodEntryExitHooksSlowPathX86(HInstruction * instruction)961 explicit MethodEntryExitHooksSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
962
EmitNativeCode(CodeGenerator * codegen)963 void EmitNativeCode(CodeGenerator* codegen) override {
964 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
965 LocationSummary* locations = instruction_->GetLocations();
966 QuickEntrypointEnum entry_point =
967 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
968 __ Bind(GetEntryLabel());
969 SaveLiveRegisters(codegen, locations);
970 x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
971 RestoreLiveRegisters(codegen, locations);
972 __ jmp(GetExitLabel());
973 }
974
GetDescription() const975 const char* GetDescription() const override {
976 return "MethodEntryExitHooksSlowPath";
977 }
978
979 private:
980 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86);
981 };
982
983 class CompileOptimizedSlowPathX86 : public SlowPathCode {
984 public:
CompileOptimizedSlowPathX86()985 CompileOptimizedSlowPathX86() : SlowPathCode(/* instruction= */ nullptr) {}
986
EmitNativeCode(CodeGenerator * codegen)987 void EmitNativeCode(CodeGenerator* codegen) override {
988 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
989 __ Bind(GetEntryLabel());
990 x86_codegen->GenerateInvokeRuntime(
991 GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
992 __ jmp(GetExitLabel());
993 }
994
GetDescription() const995 const char* GetDescription() const override {
996 return "CompileOptimizedSlowPath";
997 }
998
999 private:
1000 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86);
1001 };
1002
1003 #undef __
1004 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1005 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
1006
X86Condition(IfCondition cond)1007 inline Condition X86Condition(IfCondition cond) {
1008 switch (cond) {
1009 case kCondEQ: return kEqual;
1010 case kCondNE: return kNotEqual;
1011 case kCondLT: return kLess;
1012 case kCondLE: return kLessEqual;
1013 case kCondGT: return kGreater;
1014 case kCondGE: return kGreaterEqual;
1015 case kCondB: return kBelow;
1016 case kCondBE: return kBelowEqual;
1017 case kCondA: return kAbove;
1018 case kCondAE: return kAboveEqual;
1019 }
1020 LOG(FATAL) << "Unreachable";
1021 UNREACHABLE();
1022 }
1023
1024 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)1025 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
1026 switch (cond) {
1027 case kCondEQ: return kEqual;
1028 case kCondNE: return kNotEqual;
1029 // Signed to unsigned, and FP to x86 name.
1030 case kCondLT: return kBelow;
1031 case kCondLE: return kBelowEqual;
1032 case kCondGT: return kAbove;
1033 case kCondGE: return kAboveEqual;
1034 // Unsigned remain unchanged.
1035 case kCondB: return kBelow;
1036 case kCondBE: return kBelowEqual;
1037 case kCondA: return kAbove;
1038 case kCondAE: return kAboveEqual;
1039 }
1040 LOG(FATAL) << "Unreachable";
1041 UNREACHABLE();
1042 }
1043
DumpCoreRegister(std::ostream & stream,int reg) const1044 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
1045 stream << Register(reg);
1046 }
1047
DumpFloatingPointRegister(std::ostream & stream,int reg) const1048 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1049 stream << XmmRegister(reg);
1050 }
1051
GetInstructionSetFeatures() const1052 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
1053 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
1054 }
1055
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1056 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1057 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
1058 return kX86WordSize;
1059 }
1060
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1061 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1062 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
1063 return kX86WordSize;
1064 }
1065
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1066 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1067 if (GetGraph()->HasSIMD()) {
1068 __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
1069 } else {
1070 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
1071 }
1072 return GetSlowPathFPWidth();
1073 }
1074
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1075 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1076 if (GetGraph()->HasSIMD()) {
1077 __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
1078 } else {
1079 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1080 }
1081 return GetSlowPathFPWidth();
1082 }
1083
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1084 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1085 HInstruction* instruction,
1086 uint32_t dex_pc,
1087 SlowPathCode* slow_path) {
1088 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1089 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1090 if (EntrypointRequiresStackMap(entrypoint)) {
1091 RecordPcInfo(instruction, dex_pc, slow_path);
1092 }
1093 }
1094
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1095 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1096 HInstruction* instruction,
1097 SlowPathCode* slow_path) {
1098 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1099 GenerateInvokeRuntime(entry_point_offset);
1100 }
1101
GenerateInvokeRuntime(int32_t entry_point_offset)1102 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1103 __ fs()->call(Address::Absolute(entry_point_offset));
1104 }
1105
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1106 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1107 const CompilerOptions& compiler_options,
1108 OptimizingCompilerStats* stats)
1109 : CodeGenerator(graph,
1110 kNumberOfCpuRegisters,
1111 kNumberOfXmmRegisters,
1112 kNumberOfRegisterPairs,
1113 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1114 arraysize(kCoreCalleeSaves))
1115 | (1 << kFakeReturnRegister),
1116 0,
1117 compiler_options,
1118 stats),
1119 block_labels_(nullptr),
1120 location_builder_(graph, this),
1121 instruction_visitor_(graph, this),
1122 move_resolver_(graph->GetAllocator(), this),
1123 assembler_(graph->GetAllocator(),
1124 compiler_options.GetInstructionSetFeatures()->AsX86InstructionSetFeatures()),
1125 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1126 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1127 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1128 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1129 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1130 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1131 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1132 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1133 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1134 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1135 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1136 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1137 constant_area_start_(-1),
1138 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1139 method_address_offset_(std::less<uint32_t>(),
1140 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1141 // Use a fake return address register to mimic Quick.
1142 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1143 }
1144
SetupBlockedRegisters() const1145 void CodeGeneratorX86::SetupBlockedRegisters() const {
1146 // Stack register is always reserved.
1147 blocked_core_registers_[ESP] = true;
1148 }
1149
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1150 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1151 : InstructionCodeGenerator(graph, codegen),
1152 assembler_(codegen->GetAssembler()),
1153 codegen_(codegen) {}
1154
DWARFReg(Register reg)1155 static dwarf::Reg DWARFReg(Register reg) {
1156 return dwarf::Reg::X86Core(static_cast<int>(reg));
1157 }
1158
SetInForReturnValue(HInstruction * ret,LocationSummary * locations)1159 void SetInForReturnValue(HInstruction* ret, LocationSummary* locations) {
1160 switch (ret->InputAt(0)->GetType()) {
1161 case DataType::Type::kReference:
1162 case DataType::Type::kBool:
1163 case DataType::Type::kUint8:
1164 case DataType::Type::kInt8:
1165 case DataType::Type::kUint16:
1166 case DataType::Type::kInt16:
1167 case DataType::Type::kInt32:
1168 locations->SetInAt(0, Location::RegisterLocation(EAX));
1169 break;
1170
1171 case DataType::Type::kInt64:
1172 locations->SetInAt(0, Location::RegisterPairLocation(EAX, EDX));
1173 break;
1174
1175 case DataType::Type::kFloat32:
1176 case DataType::Type::kFloat64:
1177 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1178 break;
1179
1180 case DataType::Type::kVoid:
1181 locations->SetInAt(0, Location::NoLocation());
1182 break;
1183
1184 default:
1185 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
1186 }
1187 }
1188
VisitMethodExitHook(HMethodExitHook * method_hook)1189 void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) {
1190 LocationSummary* locations = new (GetGraph()->GetAllocator())
1191 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1192 SetInForReturnValue(method_hook, locations);
1193 }
1194
GenerateMethodEntryExitHook(HInstruction * instruction)1195 void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) {
1196 SlowPathCode* slow_path =
1197 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
1198 codegen_->AddSlowPath(slow_path);
1199
1200 uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1201 int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
1202 __ cmpb(Address::Absolute(address + offset), Immediate(0));
1203 __ j(kNotEqual, slow_path->GetEntryLabel());
1204 __ Bind(slow_path->GetExitLabel());
1205 }
1206
VisitMethodExitHook(HMethodExitHook * instruction)1207 void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instruction) {
1208 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1209 DCHECK(codegen_->RequiresCurrentMethod());
1210 GenerateMethodEntryExitHook(instruction);
1211 }
1212
VisitMethodEntryHook(HMethodEntryHook * method_hook)1213 void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1214 new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1215 }
1216
VisitMethodEntryHook(HMethodEntryHook * instruction)1217 void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1218 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1219 DCHECK(codegen_->RequiresCurrentMethod());
1220 GenerateMethodEntryExitHook(instruction);
1221 }
1222
MaybeIncrementHotness(bool is_frame_entry)1223 void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
1224 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1225 Register reg = EAX;
1226 if (is_frame_entry) {
1227 reg = kMethodRegisterArgument;
1228 } else {
1229 __ pushl(EAX);
1230 __ cfi().AdjustCFAOffset(4);
1231 __ movl(EAX, Address(ESP, kX86WordSize));
1232 }
1233 NearLabel overflow;
1234 __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1235 Immediate(interpreter::kNterpHotnessValue));
1236 __ j(kEqual, &overflow);
1237 __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(-1));
1238 __ Bind(&overflow);
1239 if (!is_frame_entry) {
1240 __ popl(EAX);
1241 __ cfi().AdjustCFAOffset(-4);
1242 }
1243 }
1244
1245 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1246 SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86();
1247 AddSlowPath(slow_path);
1248 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1249 DCHECK(info != nullptr);
1250 uint32_t address = reinterpret_cast32<uint32_t>(info) +
1251 ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1252 DCHECK(!HasEmptyFrame());
1253 // With multiple threads, this can overflow. This is OK, we will eventually get to see
1254 // it reaching 0. Also, at this point we have no register available to look
1255 // at the counter directly.
1256 __ addw(Address::Absolute(address), Immediate(-1));
1257 __ j(kEqual, slow_path->GetEntryLabel());
1258 __ Bind(slow_path->GetExitLabel());
1259 }
1260 }
1261
GenerateFrameEntry()1262 void CodeGeneratorX86::GenerateFrameEntry() {
1263 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
1264 __ Bind(&frame_entry_label_);
1265 bool skip_overflow_check =
1266 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1267 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1268
1269 if (!skip_overflow_check) {
1270 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1271 __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1272 RecordPcInfo(nullptr, 0);
1273 }
1274
1275 if (!HasEmptyFrame()) {
1276 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1277 Register reg = kCoreCalleeSaves[i];
1278 if (allocated_registers_.ContainsCoreRegister(reg)) {
1279 __ pushl(reg);
1280 __ cfi().AdjustCFAOffset(kX86WordSize);
1281 __ cfi().RelOffset(DWARFReg(reg), 0);
1282 }
1283 }
1284
1285 int adjust = GetFrameSize() - FrameEntrySpillSize();
1286 IncreaseFrame(adjust);
1287 // Save the current method if we need it. Note that we do not
1288 // do this in HCurrentMethod, as the instruction might have been removed
1289 // in the SSA graph.
1290 if (RequiresCurrentMethod()) {
1291 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1292 }
1293
1294 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1295 // Initialize should_deoptimize flag to 0.
1296 __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1297 }
1298 }
1299
1300 MaybeIncrementHotness(/* is_frame_entry= */ true);
1301 }
1302
GenerateFrameExit()1303 void CodeGeneratorX86::GenerateFrameExit() {
1304 __ cfi().RememberState();
1305 if (!HasEmptyFrame()) {
1306 int adjust = GetFrameSize() - FrameEntrySpillSize();
1307 DecreaseFrame(adjust);
1308
1309 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1310 Register reg = kCoreCalleeSaves[i];
1311 if (allocated_registers_.ContainsCoreRegister(reg)) {
1312 __ popl(reg);
1313 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1314 __ cfi().Restore(DWARFReg(reg));
1315 }
1316 }
1317 }
1318 __ ret();
1319 __ cfi().RestoreState();
1320 __ cfi().DefCFAOffset(GetFrameSize());
1321 }
1322
Bind(HBasicBlock * block)1323 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1324 __ Bind(GetLabelOf(block));
1325 }
1326
GetReturnLocation(DataType::Type type) const1327 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1328 switch (type) {
1329 case DataType::Type::kReference:
1330 case DataType::Type::kBool:
1331 case DataType::Type::kUint8:
1332 case DataType::Type::kInt8:
1333 case DataType::Type::kUint16:
1334 case DataType::Type::kInt16:
1335 case DataType::Type::kUint32:
1336 case DataType::Type::kInt32:
1337 return Location::RegisterLocation(EAX);
1338
1339 case DataType::Type::kUint64:
1340 case DataType::Type::kInt64:
1341 return Location::RegisterPairLocation(EAX, EDX);
1342
1343 case DataType::Type::kVoid:
1344 return Location::NoLocation();
1345
1346 case DataType::Type::kFloat64:
1347 case DataType::Type::kFloat32:
1348 return Location::FpuRegisterLocation(XMM0);
1349 }
1350
1351 UNREACHABLE();
1352 }
1353
GetMethodLocation() const1354 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1355 return Location::RegisterLocation(kMethodRegisterArgument);
1356 }
1357
GetNextLocation(DataType::Type type)1358 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1359 switch (type) {
1360 case DataType::Type::kReference:
1361 case DataType::Type::kBool:
1362 case DataType::Type::kUint8:
1363 case DataType::Type::kInt8:
1364 case DataType::Type::kUint16:
1365 case DataType::Type::kInt16:
1366 case DataType::Type::kInt32: {
1367 uint32_t index = gp_index_++;
1368 stack_index_++;
1369 if (index < calling_convention.GetNumberOfRegisters()) {
1370 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1371 } else {
1372 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1373 }
1374 }
1375
1376 case DataType::Type::kInt64: {
1377 uint32_t index = gp_index_;
1378 gp_index_ += 2;
1379 stack_index_ += 2;
1380 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1381 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1382 calling_convention.GetRegisterPairAt(index));
1383 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1384 } else {
1385 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1386 }
1387 }
1388
1389 case DataType::Type::kFloat32: {
1390 uint32_t index = float_index_++;
1391 stack_index_++;
1392 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1393 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1394 } else {
1395 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1396 }
1397 }
1398
1399 case DataType::Type::kFloat64: {
1400 uint32_t index = float_index_++;
1401 stack_index_ += 2;
1402 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1403 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1404 } else {
1405 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1406 }
1407 }
1408
1409 case DataType::Type::kUint32:
1410 case DataType::Type::kUint64:
1411 case DataType::Type::kVoid:
1412 LOG(FATAL) << "Unexpected parameter type " << type;
1413 UNREACHABLE();
1414 }
1415 return Location::NoLocation();
1416 }
1417
GetNextLocation(DataType::Type type)1418 Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1419 DCHECK_NE(type, DataType::Type::kReference);
1420
1421 Location location;
1422 if (DataType::Is64BitType(type)) {
1423 location = Location::DoubleStackSlot(stack_offset_);
1424 stack_offset_ += 2 * kFramePointerSize;
1425 } else {
1426 location = Location::StackSlot(stack_offset_);
1427 stack_offset_ += kFramePointerSize;
1428 }
1429 if (for_register_allocation_) {
1430 location = Location::Any();
1431 }
1432 return location;
1433 }
1434
GetReturnLocation(DataType::Type type) const1435 Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1436 // We perform conversion to the managed ABI return register after the call if needed.
1437 InvokeDexCallingConventionVisitorX86 dex_calling_convention;
1438 return dex_calling_convention.GetReturnLocation(type);
1439 }
1440
GetMethodLocation() const1441 Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
1442 // Pass the method in the hidden argument EAX.
1443 return Location::RegisterLocation(EAX);
1444 }
1445
Move32(Location destination,Location source)1446 void CodeGeneratorX86::Move32(Location destination, Location source) {
1447 if (source.Equals(destination)) {
1448 return;
1449 }
1450 if (destination.IsRegister()) {
1451 if (source.IsRegister()) {
1452 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1453 } else if (source.IsFpuRegister()) {
1454 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1455 } else if (source.IsConstant()) {
1456 int32_t value = GetInt32ValueOf(source.GetConstant());
1457 __ movl(destination.AsRegister<Register>(), Immediate(value));
1458 } else {
1459 DCHECK(source.IsStackSlot());
1460 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1461 }
1462 } else if (destination.IsFpuRegister()) {
1463 if (source.IsRegister()) {
1464 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1465 } else if (source.IsFpuRegister()) {
1466 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1467 } else {
1468 DCHECK(source.IsStackSlot());
1469 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1470 }
1471 } else {
1472 DCHECK(destination.IsStackSlot()) << destination;
1473 if (source.IsRegister()) {
1474 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1475 } else if (source.IsFpuRegister()) {
1476 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1477 } else if (source.IsConstant()) {
1478 HConstant* constant = source.GetConstant();
1479 int32_t value = GetInt32ValueOf(constant);
1480 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1481 } else {
1482 DCHECK(source.IsStackSlot());
1483 __ pushl(Address(ESP, source.GetStackIndex()));
1484 __ popl(Address(ESP, destination.GetStackIndex()));
1485 }
1486 }
1487 }
1488
Move64(Location destination,Location source)1489 void CodeGeneratorX86::Move64(Location destination, Location source) {
1490 if (source.Equals(destination)) {
1491 return;
1492 }
1493 if (destination.IsRegisterPair()) {
1494 if (source.IsRegisterPair()) {
1495 EmitParallelMoves(
1496 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1497 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1498 DataType::Type::kInt32,
1499 Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1500 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1501 DataType::Type::kInt32);
1502 } else if (source.IsFpuRegister()) {
1503 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1504 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1505 __ psrlq(src_reg, Immediate(32));
1506 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1507 } else {
1508 // No conflict possible, so just do the moves.
1509 DCHECK(source.IsDoubleStackSlot());
1510 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1511 __ movl(destination.AsRegisterPairHigh<Register>(),
1512 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1513 }
1514 } else if (destination.IsFpuRegister()) {
1515 if (source.IsFpuRegister()) {
1516 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1517 } else if (source.IsDoubleStackSlot()) {
1518 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1519 } else if (source.IsRegisterPair()) {
1520 size_t elem_size = DataType::Size(DataType::Type::kInt32);
1521 // Push the 2 source registers to the stack.
1522 __ pushl(source.AsRegisterPairHigh<Register>());
1523 __ cfi().AdjustCFAOffset(elem_size);
1524 __ pushl(source.AsRegisterPairLow<Register>());
1525 __ cfi().AdjustCFAOffset(elem_size);
1526 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1527 // And remove the temporary stack space we allocated.
1528 DecreaseFrame(2 * elem_size);
1529 } else {
1530 LOG(FATAL) << "Unimplemented";
1531 }
1532 } else {
1533 DCHECK(destination.IsDoubleStackSlot()) << destination;
1534 if (source.IsRegisterPair()) {
1535 // No conflict possible, so just do the moves.
1536 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1537 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1538 source.AsRegisterPairHigh<Register>());
1539 } else if (source.IsFpuRegister()) {
1540 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1541 } else if (source.IsConstant()) {
1542 HConstant* constant = source.GetConstant();
1543 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1544 int64_t value = GetInt64ValueOf(constant);
1545 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1546 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1547 Immediate(High32Bits(value)));
1548 } else {
1549 DCHECK(source.IsDoubleStackSlot()) << source;
1550 EmitParallelMoves(
1551 Location::StackSlot(source.GetStackIndex()),
1552 Location::StackSlot(destination.GetStackIndex()),
1553 DataType::Type::kInt32,
1554 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1555 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1556 DataType::Type::kInt32);
1557 }
1558 }
1559 }
1560
CreateAddress(Register base,Register index=Register::kNoRegister,ScaleFactor scale=TIMES_1,int32_t disp=0)1561 static Address CreateAddress(Register base,
1562 Register index = Register::kNoRegister,
1563 ScaleFactor scale = TIMES_1,
1564 int32_t disp = 0) {
1565 if (index == Register::kNoRegister) {
1566 return Address(base, disp);
1567 }
1568
1569 return Address(base, index, scale, disp);
1570 }
1571
LoadFromMemoryNoBarrier(DataType::Type dst_type,Location dst,Address src,HInstruction * instr,XmmRegister temp,bool is_atomic_load)1572 void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type,
1573 Location dst,
1574 Address src,
1575 HInstruction* instr,
1576 XmmRegister temp,
1577 bool is_atomic_load) {
1578 switch (dst_type) {
1579 case DataType::Type::kBool:
1580 case DataType::Type::kUint8:
1581 __ movzxb(dst.AsRegister<Register>(), src);
1582 break;
1583 case DataType::Type::kInt8:
1584 __ movsxb(dst.AsRegister<Register>(), src);
1585 break;
1586 case DataType::Type::kInt16:
1587 __ movsxw(dst.AsRegister<Register>(), src);
1588 break;
1589 case DataType::Type::kUint16:
1590 __ movzxw(dst.AsRegister<Register>(), src);
1591 break;
1592 case DataType::Type::kInt32:
1593 __ movl(dst.AsRegister<Register>(), src);
1594 break;
1595 case DataType::Type::kInt64: {
1596 if (is_atomic_load) {
1597 __ movsd(temp, src);
1598 if (instr != nullptr) {
1599 MaybeRecordImplicitNullCheck(instr);
1600 }
1601 __ movd(dst.AsRegisterPairLow<Register>(), temp);
1602 __ psrlq(temp, Immediate(32));
1603 __ movd(dst.AsRegisterPairHigh<Register>(), temp);
1604 } else {
1605 DCHECK_NE(src.GetBaseRegister(), dst.AsRegisterPairLow<Register>());
1606 Address src_high = Address::displace(src, kX86WordSize);
1607 __ movl(dst.AsRegisterPairLow<Register>(), src);
1608 if (instr != nullptr) {
1609 MaybeRecordImplicitNullCheck(instr);
1610 }
1611 __ movl(dst.AsRegisterPairHigh<Register>(), src_high);
1612 }
1613 break;
1614 }
1615 case DataType::Type::kFloat32:
1616 __ movss(dst.AsFpuRegister<XmmRegister>(), src);
1617 break;
1618 case DataType::Type::kFloat64:
1619 __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
1620 break;
1621 case DataType::Type::kReference:
1622 DCHECK(!kEmitCompilerReadBarrier);
1623 __ movl(dst.AsRegister<Register>(), src);
1624 __ MaybeUnpoisonHeapReference(dst.AsRegister<Register>());
1625 break;
1626 default:
1627 LOG(FATAL) << "Unreachable type " << dst_type;
1628 }
1629 if (instr != nullptr && dst_type != DataType::Type::kInt64) {
1630 // kInt64 needs special handling that is done in the above switch.
1631 MaybeRecordImplicitNullCheck(instr);
1632 }
1633 }
1634
MoveToMemory(DataType::Type src_type,Location src,Register dst_base,Register dst_index,ScaleFactor dst_scale,int32_t dst_disp)1635 void CodeGeneratorX86::MoveToMemory(DataType::Type src_type,
1636 Location src,
1637 Register dst_base,
1638 Register dst_index,
1639 ScaleFactor dst_scale,
1640 int32_t dst_disp) {
1641 DCHECK(dst_base != Register::kNoRegister);
1642 Address dst = CreateAddress(dst_base, dst_index, dst_scale, dst_disp);
1643
1644 switch (src_type) {
1645 case DataType::Type::kBool:
1646 case DataType::Type::kUint8:
1647 case DataType::Type::kInt8: {
1648 if (src.IsConstant()) {
1649 __ movb(dst, Immediate(CodeGenerator::GetInt8ValueOf(src.GetConstant())));
1650 } else {
1651 __ movb(dst, src.AsRegister<ByteRegister>());
1652 }
1653 break;
1654 }
1655 case DataType::Type::kUint16:
1656 case DataType::Type::kInt16: {
1657 if (src.IsConstant()) {
1658 __ movw(dst, Immediate(CodeGenerator::GetInt16ValueOf(src.GetConstant())));
1659 } else {
1660 __ movw(dst, src.AsRegister<Register>());
1661 }
1662 break;
1663 }
1664 case DataType::Type::kUint32:
1665 case DataType::Type::kInt32: {
1666 if (src.IsConstant()) {
1667 int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1668 __ movl(dst, Immediate(v));
1669 } else {
1670 __ movl(dst, src.AsRegister<Register>());
1671 }
1672 break;
1673 }
1674 case DataType::Type::kUint64:
1675 case DataType::Type::kInt64: {
1676 Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1677 if (src.IsConstant()) {
1678 int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1679 __ movl(dst, Immediate(Low32Bits(v)));
1680 __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1681 } else {
1682 __ movl(dst, src.AsRegisterPairLow<Register>());
1683 __ movl(dst_next_4_bytes, src.AsRegisterPairHigh<Register>());
1684 }
1685 break;
1686 }
1687 case DataType::Type::kFloat32: {
1688 if (src.IsConstant()) {
1689 int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1690 __ movl(dst, Immediate(v));
1691 } else {
1692 __ movss(dst, src.AsFpuRegister<XmmRegister>());
1693 }
1694 break;
1695 }
1696 case DataType::Type::kFloat64: {
1697 Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1698 if (src.IsConstant()) {
1699 int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1700 __ movl(dst, Immediate(Low32Bits(v)));
1701 __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1702 } else {
1703 __ movsd(dst, src.AsFpuRegister<XmmRegister>());
1704 }
1705 break;
1706 }
1707 case DataType::Type::kVoid:
1708 case DataType::Type::kReference:
1709 LOG(FATAL) << "Unreachable type " << src_type;
1710 }
1711 }
1712
MoveConstant(Location location,int32_t value)1713 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1714 DCHECK(location.IsRegister());
1715 __ movl(location.AsRegister<Register>(), Immediate(value));
1716 }
1717
MoveLocation(Location dst,Location src,DataType::Type dst_type)1718 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1719 HParallelMove move(GetGraph()->GetAllocator());
1720 if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1721 move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1722 move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1723 } else {
1724 move.AddMove(src, dst, dst_type, nullptr);
1725 }
1726 GetMoveResolver()->EmitNativeCode(&move);
1727 }
1728
AddLocationAsTemp(Location location,LocationSummary * locations)1729 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1730 if (location.IsRegister()) {
1731 locations->AddTemp(location);
1732 } else if (location.IsRegisterPair()) {
1733 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1734 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1735 } else {
1736 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1737 }
1738 }
1739
HandleGoto(HInstruction * got,HBasicBlock * successor)1740 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1741 if (successor->IsExitBlock()) {
1742 DCHECK(got->GetPrevious()->AlwaysThrows());
1743 return; // no code needed
1744 }
1745
1746 HBasicBlock* block = got->GetBlock();
1747 HInstruction* previous = got->GetPrevious();
1748
1749 HLoopInformation* info = block->GetLoopInformation();
1750 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1751 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1752 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1753 return;
1754 }
1755
1756 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1757 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1758 }
1759 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1760 __ jmp(codegen_->GetLabelOf(successor));
1761 }
1762 }
1763
VisitGoto(HGoto * got)1764 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1765 got->SetLocations(nullptr);
1766 }
1767
VisitGoto(HGoto * got)1768 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1769 HandleGoto(got, got->GetSuccessor());
1770 }
1771
VisitTryBoundary(HTryBoundary * try_boundary)1772 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1773 try_boundary->SetLocations(nullptr);
1774 }
1775
VisitTryBoundary(HTryBoundary * try_boundary)1776 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1777 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1778 if (!successor->IsExitBlock()) {
1779 HandleGoto(try_boundary, successor);
1780 }
1781 }
1782
VisitExit(HExit * exit)1783 void LocationsBuilderX86::VisitExit(HExit* exit) {
1784 exit->SetLocations(nullptr);
1785 }
1786
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1787 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1788 }
1789
1790 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1791 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1792 LabelType* true_label,
1793 LabelType* false_label) {
1794 if (cond->IsFPConditionTrueIfNaN()) {
1795 __ j(kUnordered, true_label);
1796 } else if (cond->IsFPConditionFalseIfNaN()) {
1797 __ j(kUnordered, false_label);
1798 }
1799 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1800 }
1801
1802 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1803 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1804 LabelType* true_label,
1805 LabelType* false_label) {
1806 LocationSummary* locations = cond->GetLocations();
1807 Location left = locations->InAt(0);
1808 Location right = locations->InAt(1);
1809 IfCondition if_cond = cond->GetCondition();
1810
1811 Register left_high = left.AsRegisterPairHigh<Register>();
1812 Register left_low = left.AsRegisterPairLow<Register>();
1813 IfCondition true_high_cond = if_cond;
1814 IfCondition false_high_cond = cond->GetOppositeCondition();
1815 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part
1816
1817 // Set the conditions for the test, remembering that == needs to be
1818 // decided using the low words.
1819 switch (if_cond) {
1820 case kCondEQ:
1821 case kCondNE:
1822 // Nothing to do.
1823 break;
1824 case kCondLT:
1825 false_high_cond = kCondGT;
1826 break;
1827 case kCondLE:
1828 true_high_cond = kCondLT;
1829 break;
1830 case kCondGT:
1831 false_high_cond = kCondLT;
1832 break;
1833 case kCondGE:
1834 true_high_cond = kCondGT;
1835 break;
1836 case kCondB:
1837 false_high_cond = kCondA;
1838 break;
1839 case kCondBE:
1840 true_high_cond = kCondB;
1841 break;
1842 case kCondA:
1843 false_high_cond = kCondB;
1844 break;
1845 case kCondAE:
1846 true_high_cond = kCondA;
1847 break;
1848 }
1849
1850 if (right.IsConstant()) {
1851 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1852 int32_t val_high = High32Bits(value);
1853 int32_t val_low = Low32Bits(value);
1854
1855 codegen_->Compare32BitValue(left_high, val_high);
1856 if (if_cond == kCondNE) {
1857 __ j(X86Condition(true_high_cond), true_label);
1858 } else if (if_cond == kCondEQ) {
1859 __ j(X86Condition(false_high_cond), false_label);
1860 } else {
1861 __ j(X86Condition(true_high_cond), true_label);
1862 __ j(X86Condition(false_high_cond), false_label);
1863 }
1864 // Must be equal high, so compare the lows.
1865 codegen_->Compare32BitValue(left_low, val_low);
1866 } else if (right.IsRegisterPair()) {
1867 Register right_high = right.AsRegisterPairHigh<Register>();
1868 Register right_low = right.AsRegisterPairLow<Register>();
1869
1870 __ cmpl(left_high, right_high);
1871 if (if_cond == kCondNE) {
1872 __ j(X86Condition(true_high_cond), true_label);
1873 } else if (if_cond == kCondEQ) {
1874 __ j(X86Condition(false_high_cond), false_label);
1875 } else {
1876 __ j(X86Condition(true_high_cond), true_label);
1877 __ j(X86Condition(false_high_cond), false_label);
1878 }
1879 // Must be equal high, so compare the lows.
1880 __ cmpl(left_low, right_low);
1881 } else {
1882 DCHECK(right.IsDoubleStackSlot());
1883 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1884 if (if_cond == kCondNE) {
1885 __ j(X86Condition(true_high_cond), true_label);
1886 } else if (if_cond == kCondEQ) {
1887 __ j(X86Condition(false_high_cond), false_label);
1888 } else {
1889 __ j(X86Condition(true_high_cond), true_label);
1890 __ j(X86Condition(false_high_cond), false_label);
1891 }
1892 // Must be equal high, so compare the lows.
1893 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1894 }
1895 // The last comparison might be unsigned.
1896 __ j(final_condition, true_label);
1897 }
1898
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1899 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1900 Location rhs,
1901 HInstruction* insn,
1902 bool is_double) {
1903 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1904 if (is_double) {
1905 if (rhs.IsFpuRegister()) {
1906 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1907 } else if (const_area != nullptr) {
1908 DCHECK(const_area->IsEmittedAtUseSite());
1909 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1910 codegen_->LiteralDoubleAddress(
1911 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1912 const_area->GetBaseMethodAddress(),
1913 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1914 } else {
1915 DCHECK(rhs.IsDoubleStackSlot());
1916 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1917 }
1918 } else {
1919 if (rhs.IsFpuRegister()) {
1920 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1921 } else if (const_area != nullptr) {
1922 DCHECK(const_area->IsEmittedAtUseSite());
1923 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
1924 codegen_->LiteralFloatAddress(
1925 const_area->GetConstant()->AsFloatConstant()->GetValue(),
1926 const_area->GetBaseMethodAddress(),
1927 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1928 } else {
1929 DCHECK(rhs.IsStackSlot());
1930 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1931 }
1932 }
1933 }
1934
1935 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1936 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
1937 LabelType* true_target_in,
1938 LabelType* false_target_in) {
1939 // Generated branching requires both targets to be explicit. If either of the
1940 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1941 LabelType fallthrough_target;
1942 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1943 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1944
1945 LocationSummary* locations = condition->GetLocations();
1946 Location left = locations->InAt(0);
1947 Location right = locations->InAt(1);
1948
1949 DataType::Type type = condition->InputAt(0)->GetType();
1950 switch (type) {
1951 case DataType::Type::kInt64:
1952 GenerateLongComparesAndJumps(condition, true_target, false_target);
1953 break;
1954 case DataType::Type::kFloat32:
1955 GenerateFPCompare(left, right, condition, false);
1956 GenerateFPJumps(condition, true_target, false_target);
1957 break;
1958 case DataType::Type::kFloat64:
1959 GenerateFPCompare(left, right, condition, true);
1960 GenerateFPJumps(condition, true_target, false_target);
1961 break;
1962 default:
1963 LOG(FATAL) << "Unexpected compare type " << type;
1964 }
1965
1966 if (false_target != &fallthrough_target) {
1967 __ jmp(false_target);
1968 }
1969
1970 if (fallthrough_target.IsLinked()) {
1971 __ Bind(&fallthrough_target);
1972 }
1973 }
1974
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1975 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1976 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1977 // are set only strictly before `branch`. We can't use the eflags on long/FP
1978 // conditions if they are materialized due to the complex branching.
1979 return cond->IsCondition() &&
1980 cond->GetNext() == branch &&
1981 cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
1982 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1983 }
1984
1985 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1986 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
1987 size_t condition_input_index,
1988 LabelType* true_target,
1989 LabelType* false_target) {
1990 HInstruction* cond = instruction->InputAt(condition_input_index);
1991
1992 if (true_target == nullptr && false_target == nullptr) {
1993 // Nothing to do. The code always falls through.
1994 return;
1995 } else if (cond->IsIntConstant()) {
1996 // Constant condition, statically compared against "true" (integer value 1).
1997 if (cond->AsIntConstant()->IsTrue()) {
1998 if (true_target != nullptr) {
1999 __ jmp(true_target);
2000 }
2001 } else {
2002 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2003 if (false_target != nullptr) {
2004 __ jmp(false_target);
2005 }
2006 }
2007 return;
2008 }
2009
2010 // The following code generates these patterns:
2011 // (1) true_target == nullptr && false_target != nullptr
2012 // - opposite condition true => branch to false_target
2013 // (2) true_target != nullptr && false_target == nullptr
2014 // - condition true => branch to true_target
2015 // (3) true_target != nullptr && false_target != nullptr
2016 // - condition true => branch to true_target
2017 // - branch to false_target
2018 if (IsBooleanValueOrMaterializedCondition(cond)) {
2019 if (AreEflagsSetFrom(cond, instruction)) {
2020 if (true_target == nullptr) {
2021 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
2022 } else {
2023 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
2024 }
2025 } else {
2026 // Materialized condition, compare against 0.
2027 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2028 if (lhs.IsRegister()) {
2029 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
2030 } else {
2031 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
2032 }
2033 if (true_target == nullptr) {
2034 __ j(kEqual, false_target);
2035 } else {
2036 __ j(kNotEqual, true_target);
2037 }
2038 }
2039 } else {
2040 // Condition has not been materialized, use its inputs as the comparison and
2041 // its condition as the branch condition.
2042 HCondition* condition = cond->AsCondition();
2043
2044 // If this is a long or FP comparison that has been folded into
2045 // the HCondition, generate the comparison directly.
2046 DataType::Type type = condition->InputAt(0)->GetType();
2047 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2048 GenerateCompareTestAndBranch(condition, true_target, false_target);
2049 return;
2050 }
2051
2052 Location lhs = condition->GetLocations()->InAt(0);
2053 Location rhs = condition->GetLocations()->InAt(1);
2054 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
2055 codegen_->GenerateIntCompare(lhs, rhs);
2056 if (true_target == nullptr) {
2057 __ j(X86Condition(condition->GetOppositeCondition()), false_target);
2058 } else {
2059 __ j(X86Condition(condition->GetCondition()), true_target);
2060 }
2061 }
2062
2063 // If neither branch falls through (case 3), the conditional branch to `true_target`
2064 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2065 if (true_target != nullptr && false_target != nullptr) {
2066 __ jmp(false_target);
2067 }
2068 }
2069
VisitIf(HIf * if_instr)2070 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
2071 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2072 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2073 locations->SetInAt(0, Location::Any());
2074 }
2075 }
2076
VisitIf(HIf * if_instr)2077 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
2078 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2079 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2080 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2081 nullptr : codegen_->GetLabelOf(true_successor);
2082 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2083 nullptr : codegen_->GetLabelOf(false_successor);
2084 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2085 }
2086
VisitDeoptimize(HDeoptimize * deoptimize)2087 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2088 LocationSummary* locations = new (GetGraph()->GetAllocator())
2089 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2090 InvokeRuntimeCallingConvention calling_convention;
2091 RegisterSet caller_saves = RegisterSet::Empty();
2092 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2093 locations->SetCustomSlowPathCallerSaves(caller_saves);
2094 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2095 locations->SetInAt(0, Location::Any());
2096 }
2097 }
2098
VisitDeoptimize(HDeoptimize * deoptimize)2099 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2100 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
2101 GenerateTestAndBranch<Label>(deoptimize,
2102 /* condition_input_index= */ 0,
2103 slow_path->GetEntryLabel(),
2104 /* false_target= */ nullptr);
2105 }
2106
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2107 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2108 LocationSummary* locations = new (GetGraph()->GetAllocator())
2109 LocationSummary(flag, LocationSummary::kNoCall);
2110 locations->SetOut(Location::RequiresRegister());
2111 }
2112
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2113 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2114 __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
2115 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2116 }
2117
SelectCanUseCMOV(HSelect * select)2118 static bool SelectCanUseCMOV(HSelect* select) {
2119 // There are no conditional move instructions for XMMs.
2120 if (DataType::IsFloatingPointType(select->GetType())) {
2121 return false;
2122 }
2123
2124 // A FP condition doesn't generate the single CC that we need.
2125 // In 32 bit mode, a long condition doesn't generate a single CC either.
2126 HInstruction* condition = select->GetCondition();
2127 if (condition->IsCondition()) {
2128 DataType::Type compare_type = condition->InputAt(0)->GetType();
2129 if (compare_type == DataType::Type::kInt64 ||
2130 DataType::IsFloatingPointType(compare_type)) {
2131 return false;
2132 }
2133 }
2134
2135 // We can generate a CMOV for this Select.
2136 return true;
2137 }
2138
VisitSelect(HSelect * select)2139 void LocationsBuilderX86::VisitSelect(HSelect* select) {
2140 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2141 if (DataType::IsFloatingPointType(select->GetType())) {
2142 locations->SetInAt(0, Location::RequiresFpuRegister());
2143 locations->SetInAt(1, Location::Any());
2144 } else {
2145 locations->SetInAt(0, Location::RequiresRegister());
2146 if (SelectCanUseCMOV(select)) {
2147 if (select->InputAt(1)->IsConstant()) {
2148 // Cmov can't handle a constant value.
2149 locations->SetInAt(1, Location::RequiresRegister());
2150 } else {
2151 locations->SetInAt(1, Location::Any());
2152 }
2153 } else {
2154 locations->SetInAt(1, Location::Any());
2155 }
2156 }
2157 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2158 locations->SetInAt(2, Location::RequiresRegister());
2159 }
2160 locations->SetOut(Location::SameAsFirstInput());
2161 }
2162
VisitSelect(HSelect * select)2163 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
2164 LocationSummary* locations = select->GetLocations();
2165 DCHECK(locations->InAt(0).Equals(locations->Out()));
2166 if (SelectCanUseCMOV(select)) {
2167 // If both the condition and the source types are integer, we can generate
2168 // a CMOV to implement Select.
2169
2170 HInstruction* select_condition = select->GetCondition();
2171 Condition cond = kNotEqual;
2172
2173 // Figure out how to test the 'condition'.
2174 if (select_condition->IsCondition()) {
2175 HCondition* condition = select_condition->AsCondition();
2176 if (!condition->IsEmittedAtUseSite()) {
2177 // This was a previously materialized condition.
2178 // Can we use the existing condition code?
2179 if (AreEflagsSetFrom(condition, select)) {
2180 // Materialization was the previous instruction. Condition codes are right.
2181 cond = X86Condition(condition->GetCondition());
2182 } else {
2183 // No, we have to recreate the condition code.
2184 Register cond_reg = locations->InAt(2).AsRegister<Register>();
2185 __ testl(cond_reg, cond_reg);
2186 }
2187 } else {
2188 // We can't handle FP or long here.
2189 DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
2190 DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
2191 LocationSummary* cond_locations = condition->GetLocations();
2192 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
2193 cond = X86Condition(condition->GetCondition());
2194 }
2195 } else {
2196 // Must be a Boolean condition, which needs to be compared to 0.
2197 Register cond_reg = locations->InAt(2).AsRegister<Register>();
2198 __ testl(cond_reg, cond_reg);
2199 }
2200
2201 // If the condition is true, overwrite the output, which already contains false.
2202 Location false_loc = locations->InAt(0);
2203 Location true_loc = locations->InAt(1);
2204 if (select->GetType() == DataType::Type::kInt64) {
2205 // 64 bit conditional move.
2206 Register false_high = false_loc.AsRegisterPairHigh<Register>();
2207 Register false_low = false_loc.AsRegisterPairLow<Register>();
2208 if (true_loc.IsRegisterPair()) {
2209 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
2210 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
2211 } else {
2212 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
2213 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
2214 }
2215 } else {
2216 // 32 bit conditional move.
2217 Register false_reg = false_loc.AsRegister<Register>();
2218 if (true_loc.IsRegister()) {
2219 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
2220 } else {
2221 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
2222 }
2223 }
2224 } else {
2225 NearLabel false_target;
2226 GenerateTestAndBranch<NearLabel>(
2227 select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
2228 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2229 __ Bind(&false_target);
2230 }
2231 }
2232
VisitNativeDebugInfo(HNativeDebugInfo * info)2233 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
2234 new (GetGraph()->GetAllocator()) LocationSummary(info);
2235 }
2236
VisitNativeDebugInfo(HNativeDebugInfo *)2237 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
2238 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
2239 }
2240
IncreaseFrame(size_t adjustment)2241 void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
2242 __ subl(ESP, Immediate(adjustment));
2243 __ cfi().AdjustCFAOffset(adjustment);
2244 }
2245
DecreaseFrame(size_t adjustment)2246 void CodeGeneratorX86::DecreaseFrame(size_t adjustment) {
2247 __ addl(ESP, Immediate(adjustment));
2248 __ cfi().AdjustCFAOffset(-adjustment);
2249 }
2250
GenerateNop()2251 void CodeGeneratorX86::GenerateNop() {
2252 __ nop();
2253 }
2254
HandleCondition(HCondition * cond)2255 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
2256 LocationSummary* locations =
2257 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2258 // Handle the long/FP comparisons made in instruction simplification.
2259 switch (cond->InputAt(0)->GetType()) {
2260 case DataType::Type::kInt64: {
2261 locations->SetInAt(0, Location::RequiresRegister());
2262 locations->SetInAt(1, Location::Any());
2263 if (!cond->IsEmittedAtUseSite()) {
2264 locations->SetOut(Location::RequiresRegister());
2265 }
2266 break;
2267 }
2268 case DataType::Type::kFloat32:
2269 case DataType::Type::kFloat64: {
2270 locations->SetInAt(0, Location::RequiresFpuRegister());
2271 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
2272 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
2273 } else if (cond->InputAt(1)->IsConstant()) {
2274 locations->SetInAt(1, Location::RequiresFpuRegister());
2275 } else {
2276 locations->SetInAt(1, Location::Any());
2277 }
2278 if (!cond->IsEmittedAtUseSite()) {
2279 locations->SetOut(Location::RequiresRegister());
2280 }
2281 break;
2282 }
2283 default:
2284 locations->SetInAt(0, Location::RequiresRegister());
2285 locations->SetInAt(1, Location::Any());
2286 if (!cond->IsEmittedAtUseSite()) {
2287 // We need a byte register.
2288 locations->SetOut(Location::RegisterLocation(ECX));
2289 }
2290 break;
2291 }
2292 }
2293
HandleCondition(HCondition * cond)2294 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
2295 if (cond->IsEmittedAtUseSite()) {
2296 return;
2297 }
2298
2299 LocationSummary* locations = cond->GetLocations();
2300 Location lhs = locations->InAt(0);
2301 Location rhs = locations->InAt(1);
2302 Register reg = locations->Out().AsRegister<Register>();
2303 NearLabel true_label, false_label;
2304
2305 switch (cond->InputAt(0)->GetType()) {
2306 default: {
2307 // Integer case.
2308
2309 // Clear output register: setb only sets the low byte.
2310 __ xorl(reg, reg);
2311 codegen_->GenerateIntCompare(lhs, rhs);
2312 __ setb(X86Condition(cond->GetCondition()), reg);
2313 return;
2314 }
2315 case DataType::Type::kInt64:
2316 GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2317 break;
2318 case DataType::Type::kFloat32:
2319 GenerateFPCompare(lhs, rhs, cond, false);
2320 GenerateFPJumps(cond, &true_label, &false_label);
2321 break;
2322 case DataType::Type::kFloat64:
2323 GenerateFPCompare(lhs, rhs, cond, true);
2324 GenerateFPJumps(cond, &true_label, &false_label);
2325 break;
2326 }
2327
2328 // Convert the jumps into the result.
2329 NearLabel done_label;
2330
2331 // False case: result = 0.
2332 __ Bind(&false_label);
2333 __ xorl(reg, reg);
2334 __ jmp(&done_label);
2335
2336 // True case: result = 1.
2337 __ Bind(&true_label);
2338 __ movl(reg, Immediate(1));
2339 __ Bind(&done_label);
2340 }
2341
VisitEqual(HEqual * comp)2342 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2343 HandleCondition(comp);
2344 }
2345
VisitEqual(HEqual * comp)2346 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2347 HandleCondition(comp);
2348 }
2349
VisitNotEqual(HNotEqual * comp)2350 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2351 HandleCondition(comp);
2352 }
2353
VisitNotEqual(HNotEqual * comp)2354 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2355 HandleCondition(comp);
2356 }
2357
VisitLessThan(HLessThan * comp)2358 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2359 HandleCondition(comp);
2360 }
2361
VisitLessThan(HLessThan * comp)2362 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2363 HandleCondition(comp);
2364 }
2365
VisitLessThanOrEqual(HLessThanOrEqual * comp)2366 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2367 HandleCondition(comp);
2368 }
2369
VisitLessThanOrEqual(HLessThanOrEqual * comp)2370 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2371 HandleCondition(comp);
2372 }
2373
VisitGreaterThan(HGreaterThan * comp)2374 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2375 HandleCondition(comp);
2376 }
2377
VisitGreaterThan(HGreaterThan * comp)2378 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2379 HandleCondition(comp);
2380 }
2381
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2382 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2383 HandleCondition(comp);
2384 }
2385
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2386 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2387 HandleCondition(comp);
2388 }
2389
VisitBelow(HBelow * comp)2390 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2391 HandleCondition(comp);
2392 }
2393
VisitBelow(HBelow * comp)2394 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2395 HandleCondition(comp);
2396 }
2397
VisitBelowOrEqual(HBelowOrEqual * comp)2398 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2399 HandleCondition(comp);
2400 }
2401
VisitBelowOrEqual(HBelowOrEqual * comp)2402 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2403 HandleCondition(comp);
2404 }
2405
VisitAbove(HAbove * comp)2406 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2407 HandleCondition(comp);
2408 }
2409
VisitAbove(HAbove * comp)2410 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2411 HandleCondition(comp);
2412 }
2413
VisitAboveOrEqual(HAboveOrEqual * comp)2414 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2415 HandleCondition(comp);
2416 }
2417
VisitAboveOrEqual(HAboveOrEqual * comp)2418 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2419 HandleCondition(comp);
2420 }
2421
VisitIntConstant(HIntConstant * constant)2422 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2423 LocationSummary* locations =
2424 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2425 locations->SetOut(Location::ConstantLocation(constant));
2426 }
2427
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2428 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2429 // Will be generated at use site.
2430 }
2431
VisitNullConstant(HNullConstant * constant)2432 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2433 LocationSummary* locations =
2434 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2435 locations->SetOut(Location::ConstantLocation(constant));
2436 }
2437
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2438 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2439 // Will be generated at use site.
2440 }
2441
VisitLongConstant(HLongConstant * constant)2442 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2443 LocationSummary* locations =
2444 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2445 locations->SetOut(Location::ConstantLocation(constant));
2446 }
2447
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2448 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2449 // Will be generated at use site.
2450 }
2451
VisitFloatConstant(HFloatConstant * constant)2452 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2453 LocationSummary* locations =
2454 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2455 locations->SetOut(Location::ConstantLocation(constant));
2456 }
2457
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2458 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2459 // Will be generated at use site.
2460 }
2461
VisitDoubleConstant(HDoubleConstant * constant)2462 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2463 LocationSummary* locations =
2464 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2465 locations->SetOut(Location::ConstantLocation(constant));
2466 }
2467
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2468 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2469 // Will be generated at use site.
2470 }
2471
VisitConstructorFence(HConstructorFence * constructor_fence)2472 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2473 constructor_fence->SetLocations(nullptr);
2474 }
2475
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2476 void InstructionCodeGeneratorX86::VisitConstructorFence(
2477 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2478 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2479 }
2480
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2481 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2482 memory_barrier->SetLocations(nullptr);
2483 }
2484
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2485 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2486 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2487 }
2488
VisitReturnVoid(HReturnVoid * ret)2489 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2490 ret->SetLocations(nullptr);
2491 }
2492
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2493 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2494 codegen_->GenerateFrameExit();
2495 }
2496
VisitReturn(HReturn * ret)2497 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2498 LocationSummary* locations =
2499 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2500 SetInForReturnValue(ret, locations);
2501 }
2502
VisitReturn(HReturn * ret)2503 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2504 switch (ret->InputAt(0)->GetType()) {
2505 case DataType::Type::kReference:
2506 case DataType::Type::kBool:
2507 case DataType::Type::kUint8:
2508 case DataType::Type::kInt8:
2509 case DataType::Type::kUint16:
2510 case DataType::Type::kInt16:
2511 case DataType::Type::kInt32:
2512 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2513 break;
2514
2515 case DataType::Type::kInt64:
2516 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2517 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2518 break;
2519
2520 case DataType::Type::kFloat32:
2521 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2522 if (GetGraph()->IsCompilingOsr()) {
2523 // To simplify callers of an OSR method, we put the return value in both
2524 // floating point and core registers.
2525 __ movd(EAX, XMM0);
2526 }
2527 break;
2528
2529 case DataType::Type::kFloat64:
2530 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2531 if (GetGraph()->IsCompilingOsr()) {
2532 // To simplify callers of an OSR method, we put the return value in both
2533 // floating point and core registers.
2534 __ movd(EAX, XMM0);
2535 // Use XMM1 as temporary register to not clobber XMM0.
2536 __ movaps(XMM1, XMM0);
2537 __ psrlq(XMM1, Immediate(32));
2538 __ movd(EDX, XMM1);
2539 }
2540 break;
2541
2542 default:
2543 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2544 }
2545 codegen_->GenerateFrameExit();
2546 }
2547
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2548 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2549 // The trampoline uses the same calling convention as dex calling conventions,
2550 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2551 // the method_idx.
2552 HandleInvoke(invoke);
2553 }
2554
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2555 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2556 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2557 }
2558
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2559 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2560 // Explicit clinit checks triggered by static invokes must have been pruned by
2561 // art::PrepareForRegisterAllocation.
2562 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2563
2564 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2565 if (intrinsic.TryDispatch(invoke)) {
2566 if (invoke->GetLocations()->CanCall() &&
2567 invoke->HasPcRelativeMethodLoadKind() &&
2568 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2569 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2570 }
2571 return;
2572 }
2573
2574 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2575 CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
2576 /*for_register_allocation=*/ true);
2577 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2578 } else {
2579 HandleInvoke(invoke);
2580 }
2581
2582 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2583 if (invoke->HasPcRelativeMethodLoadKind()) {
2584 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2585 }
2586 }
2587
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2588 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2589 if (invoke->GetLocations()->Intrinsified()) {
2590 IntrinsicCodeGeneratorX86 intrinsic(codegen);
2591 intrinsic.Dispatch(invoke);
2592 return true;
2593 }
2594 return false;
2595 }
2596
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2597 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2598 // Explicit clinit checks triggered by static invokes must have been pruned by
2599 // art::PrepareForRegisterAllocation.
2600 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2601
2602 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2603 return;
2604 }
2605
2606 LocationSummary* locations = invoke->GetLocations();
2607 codegen_->GenerateStaticOrDirectCall(
2608 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2609 }
2610
VisitInvokeVirtual(HInvokeVirtual * invoke)2611 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2612 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2613 if (intrinsic.TryDispatch(invoke)) {
2614 return;
2615 }
2616
2617 HandleInvoke(invoke);
2618
2619 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2620 // Add one temporary for inline cache update.
2621 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2622 }
2623 }
2624
HandleInvoke(HInvoke * invoke)2625 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2626 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2627 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2628 }
2629
VisitInvokeVirtual(HInvokeVirtual * invoke)2630 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2631 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2632 return;
2633 }
2634
2635 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2636 DCHECK(!codegen_->IsLeafMethod());
2637 }
2638
VisitInvokeInterface(HInvokeInterface * invoke)2639 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2640 // This call to HandleInvoke allocates a temporary (core) register
2641 // which is also used to transfer the hidden argument from FP to
2642 // core register.
2643 HandleInvoke(invoke);
2644 // Add the hidden argument.
2645 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2646
2647 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2648 // Add one temporary for inline cache update.
2649 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2650 }
2651
2652 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2653 if (IsPcRelativeMethodLoadKind(invoke->GetHiddenArgumentLoadKind())) {
2654 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2655 }
2656
2657 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2658 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2659 Location::RequiresRegister());
2660 }
2661 }
2662
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2663 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2664 DCHECK_EQ(EAX, klass);
2665 // We know the destination of an intrinsic, so no need to record inline
2666 // caches (also the intrinsic location builder doesn't request an additional
2667 // temporary).
2668 if (!instruction->GetLocations()->Intrinsified() &&
2669 GetGraph()->IsCompilingBaseline() &&
2670 !Runtime::Current()->IsAotCompiler()) {
2671 DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
2672 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2673 DCHECK(info != nullptr);
2674 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2675 uint32_t address = reinterpret_cast32<uint32_t>(cache);
2676 if (kIsDebugBuild) {
2677 uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2678 CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2679 }
2680 Register temp = EBP;
2681 NearLabel done;
2682 __ movl(temp, Immediate(address));
2683 // Fast path for a monomorphic cache.
2684 __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2685 __ j(kEqual, &done);
2686 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2687 __ Bind(&done);
2688 }
2689 }
2690
VisitInvokeInterface(HInvokeInterface * invoke)2691 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2692 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2693 LocationSummary* locations = invoke->GetLocations();
2694 Register temp = locations->GetTemp(0).AsRegister<Register>();
2695 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2696 Location receiver = locations->InAt(0);
2697 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2698
2699 // Set the hidden argument. This is safe to do this here, as XMM7
2700 // won't be modified thereafter, before the `call` instruction.
2701 DCHECK_EQ(XMM7, hidden_reg);
2702 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2703 __ movd(hidden_reg, locations->InAt(invoke->GetNumberOfArguments() - 1).AsRegister<Register>());
2704 } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2705 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), locations->GetTemp(0), invoke);
2706 __ movd(hidden_reg, temp);
2707 }
2708
2709 if (receiver.IsStackSlot()) {
2710 __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2711 // /* HeapReference<Class> */ temp = temp->klass_
2712 __ movl(temp, Address(temp, class_offset));
2713 } else {
2714 // /* HeapReference<Class> */ temp = receiver->klass_
2715 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2716 }
2717 codegen_->MaybeRecordImplicitNullCheck(invoke);
2718 // Instead of simply (possibly) unpoisoning `temp` here, we should
2719 // emit a read barrier for the previous class reference load.
2720 // However this is not required in practice, as this is an
2721 // intermediate/temporary reference and because the current
2722 // concurrent copying collector keeps the from-space memory
2723 // intact/accessible until the end of the marking phase (the
2724 // concurrent copying collector may not in the future).
2725 __ MaybeUnpoisonHeapReference(temp);
2726
2727 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2728
2729 // temp = temp->GetAddressOfIMT()
2730 __ movl(temp,
2731 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2732 // temp = temp->GetImtEntryAt(method_offset);
2733 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2734 invoke->GetImtIndex(), kX86PointerSize));
2735 __ movl(temp, Address(temp, method_offset));
2736 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2737 // We pass the method from the IMT in case of a conflict. This will ensure
2738 // we go into the runtime to resolve the actual method.
2739 __ movd(hidden_reg, temp);
2740 }
2741 // call temp->GetEntryPoint();
2742 __ call(Address(temp,
2743 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2744
2745 DCHECK(!codegen_->IsLeafMethod());
2746 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2747 }
2748
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2749 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2750 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2751 if (intrinsic.TryDispatch(invoke)) {
2752 return;
2753 }
2754 HandleInvoke(invoke);
2755 }
2756
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2757 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2758 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2759 return;
2760 }
2761 codegen_->GenerateInvokePolymorphicCall(invoke);
2762 }
2763
VisitInvokeCustom(HInvokeCustom * invoke)2764 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2765 HandleInvoke(invoke);
2766 }
2767
VisitInvokeCustom(HInvokeCustom * invoke)2768 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2769 codegen_->GenerateInvokeCustomCall(invoke);
2770 }
2771
VisitNeg(HNeg * neg)2772 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2773 LocationSummary* locations =
2774 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2775 switch (neg->GetResultType()) {
2776 case DataType::Type::kInt32:
2777 case DataType::Type::kInt64:
2778 locations->SetInAt(0, Location::RequiresRegister());
2779 locations->SetOut(Location::SameAsFirstInput());
2780 break;
2781
2782 case DataType::Type::kFloat32:
2783 locations->SetInAt(0, Location::RequiresFpuRegister());
2784 locations->SetOut(Location::SameAsFirstInput());
2785 locations->AddTemp(Location::RequiresRegister());
2786 locations->AddTemp(Location::RequiresFpuRegister());
2787 break;
2788
2789 case DataType::Type::kFloat64:
2790 locations->SetInAt(0, Location::RequiresFpuRegister());
2791 locations->SetOut(Location::SameAsFirstInput());
2792 locations->AddTemp(Location::RequiresFpuRegister());
2793 break;
2794
2795 default:
2796 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2797 }
2798 }
2799
VisitNeg(HNeg * neg)2800 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2801 LocationSummary* locations = neg->GetLocations();
2802 Location out = locations->Out();
2803 Location in = locations->InAt(0);
2804 switch (neg->GetResultType()) {
2805 case DataType::Type::kInt32:
2806 DCHECK(in.IsRegister());
2807 DCHECK(in.Equals(out));
2808 __ negl(out.AsRegister<Register>());
2809 break;
2810
2811 case DataType::Type::kInt64:
2812 DCHECK(in.IsRegisterPair());
2813 DCHECK(in.Equals(out));
2814 __ negl(out.AsRegisterPairLow<Register>());
2815 // Negation is similar to subtraction from zero. The least
2816 // significant byte triggers a borrow when it is different from
2817 // zero; to take it into account, add 1 to the most significant
2818 // byte if the carry flag (CF) is set to 1 after the first NEGL
2819 // operation.
2820 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2821 __ negl(out.AsRegisterPairHigh<Register>());
2822 break;
2823
2824 case DataType::Type::kFloat32: {
2825 DCHECK(in.Equals(out));
2826 Register constant = locations->GetTemp(0).AsRegister<Register>();
2827 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2828 // Implement float negation with an exclusive or with value
2829 // 0x80000000 (mask for bit 31, representing the sign of a
2830 // single-precision floating-point number).
2831 __ movl(constant, Immediate(INT32_C(0x80000000)));
2832 __ movd(mask, constant);
2833 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2834 break;
2835 }
2836
2837 case DataType::Type::kFloat64: {
2838 DCHECK(in.Equals(out));
2839 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2840 // Implement double negation with an exclusive or with value
2841 // 0x8000000000000000 (mask for bit 63, representing the sign of
2842 // a double-precision floating-point number).
2843 __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2844 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2845 break;
2846 }
2847
2848 default:
2849 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2850 }
2851 }
2852
VisitX86FPNeg(HX86FPNeg * neg)2853 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2854 LocationSummary* locations =
2855 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2856 DCHECK(DataType::IsFloatingPointType(neg->GetType()));
2857 locations->SetInAt(0, Location::RequiresFpuRegister());
2858 locations->SetInAt(1, Location::RequiresRegister());
2859 locations->SetOut(Location::SameAsFirstInput());
2860 locations->AddTemp(Location::RequiresFpuRegister());
2861 }
2862
VisitX86FPNeg(HX86FPNeg * neg)2863 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2864 LocationSummary* locations = neg->GetLocations();
2865 Location out = locations->Out();
2866 DCHECK(locations->InAt(0).Equals(out));
2867
2868 Register constant_area = locations->InAt(1).AsRegister<Register>();
2869 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2870 if (neg->GetType() == DataType::Type::kFloat32) {
2871 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2872 neg->GetBaseMethodAddress(),
2873 constant_area));
2874 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2875 } else {
2876 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2877 neg->GetBaseMethodAddress(),
2878 constant_area));
2879 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2880 }
2881 }
2882
VisitTypeConversion(HTypeConversion * conversion)2883 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2884 DataType::Type result_type = conversion->GetResultType();
2885 DataType::Type input_type = conversion->GetInputType();
2886 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2887 << input_type << " -> " << result_type;
2888
2889 // The float-to-long and double-to-long type conversions rely on a
2890 // call to the runtime.
2891 LocationSummary::CallKind call_kind =
2892 ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
2893 && result_type == DataType::Type::kInt64)
2894 ? LocationSummary::kCallOnMainOnly
2895 : LocationSummary::kNoCall;
2896 LocationSummary* locations =
2897 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
2898
2899 switch (result_type) {
2900 case DataType::Type::kUint8:
2901 case DataType::Type::kInt8:
2902 switch (input_type) {
2903 case DataType::Type::kUint8:
2904 case DataType::Type::kInt8:
2905 case DataType::Type::kUint16:
2906 case DataType::Type::kInt16:
2907 case DataType::Type::kInt32:
2908 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2909 // Make the output overlap to please the register allocator. This greatly simplifies
2910 // the validation of the linear scan implementation
2911 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2912 break;
2913 case DataType::Type::kInt64: {
2914 HInstruction* input = conversion->InputAt(0);
2915 Location input_location = input->IsConstant()
2916 ? Location::ConstantLocation(input->AsConstant())
2917 : Location::RegisterPairLocation(EAX, EDX);
2918 locations->SetInAt(0, input_location);
2919 // Make the output overlap to please the register allocator. This greatly simplifies
2920 // the validation of the linear scan implementation
2921 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2922 break;
2923 }
2924
2925 default:
2926 LOG(FATAL) << "Unexpected type conversion from " << input_type
2927 << " to " << result_type;
2928 }
2929 break;
2930
2931 case DataType::Type::kUint16:
2932 case DataType::Type::kInt16:
2933 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2934 locations->SetInAt(0, Location::Any());
2935 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2936 break;
2937
2938 case DataType::Type::kInt32:
2939 switch (input_type) {
2940 case DataType::Type::kInt64:
2941 locations->SetInAt(0, Location::Any());
2942 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2943 break;
2944
2945 case DataType::Type::kFloat32:
2946 locations->SetInAt(0, Location::RequiresFpuRegister());
2947 locations->SetOut(Location::RequiresRegister());
2948 locations->AddTemp(Location::RequiresFpuRegister());
2949 break;
2950
2951 case DataType::Type::kFloat64:
2952 locations->SetInAt(0, Location::RequiresFpuRegister());
2953 locations->SetOut(Location::RequiresRegister());
2954 locations->AddTemp(Location::RequiresFpuRegister());
2955 break;
2956
2957 default:
2958 LOG(FATAL) << "Unexpected type conversion from " << input_type
2959 << " to " << result_type;
2960 }
2961 break;
2962
2963 case DataType::Type::kInt64:
2964 switch (input_type) {
2965 case DataType::Type::kBool:
2966 case DataType::Type::kUint8:
2967 case DataType::Type::kInt8:
2968 case DataType::Type::kUint16:
2969 case DataType::Type::kInt16:
2970 case DataType::Type::kInt32:
2971 locations->SetInAt(0, Location::RegisterLocation(EAX));
2972 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2973 break;
2974
2975 case DataType::Type::kFloat32:
2976 case DataType::Type::kFloat64: {
2977 InvokeRuntimeCallingConvention calling_convention;
2978 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
2979 locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
2980
2981 // The runtime helper puts the result in EAX, EDX.
2982 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2983 }
2984 break;
2985
2986 default:
2987 LOG(FATAL) << "Unexpected type conversion from " << input_type
2988 << " to " << result_type;
2989 }
2990 break;
2991
2992 case DataType::Type::kFloat32:
2993 switch (input_type) {
2994 case DataType::Type::kBool:
2995 case DataType::Type::kUint8:
2996 case DataType::Type::kInt8:
2997 case DataType::Type::kUint16:
2998 case DataType::Type::kInt16:
2999 case DataType::Type::kInt32:
3000 locations->SetInAt(0, Location::RequiresRegister());
3001 locations->SetOut(Location::RequiresFpuRegister());
3002 break;
3003
3004 case DataType::Type::kInt64:
3005 locations->SetInAt(0, Location::Any());
3006 locations->SetOut(Location::Any());
3007 break;
3008
3009 case DataType::Type::kFloat64:
3010 locations->SetInAt(0, Location::RequiresFpuRegister());
3011 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3012 break;
3013
3014 default:
3015 LOG(FATAL) << "Unexpected type conversion from " << input_type
3016 << " to " << result_type;
3017 }
3018 break;
3019
3020 case DataType::Type::kFloat64:
3021 switch (input_type) {
3022 case DataType::Type::kBool:
3023 case DataType::Type::kUint8:
3024 case DataType::Type::kInt8:
3025 case DataType::Type::kUint16:
3026 case DataType::Type::kInt16:
3027 case DataType::Type::kInt32:
3028 locations->SetInAt(0, Location::RequiresRegister());
3029 locations->SetOut(Location::RequiresFpuRegister());
3030 break;
3031
3032 case DataType::Type::kInt64:
3033 locations->SetInAt(0, Location::Any());
3034 locations->SetOut(Location::Any());
3035 break;
3036
3037 case DataType::Type::kFloat32:
3038 locations->SetInAt(0, Location::RequiresFpuRegister());
3039 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3040 break;
3041
3042 default:
3043 LOG(FATAL) << "Unexpected type conversion from " << input_type
3044 << " to " << result_type;
3045 }
3046 break;
3047
3048 default:
3049 LOG(FATAL) << "Unexpected type conversion from " << input_type
3050 << " to " << result_type;
3051 }
3052 }
3053
VisitTypeConversion(HTypeConversion * conversion)3054 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
3055 LocationSummary* locations = conversion->GetLocations();
3056 Location out = locations->Out();
3057 Location in = locations->InAt(0);
3058 DataType::Type result_type = conversion->GetResultType();
3059 DataType::Type input_type = conversion->GetInputType();
3060 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3061 << input_type << " -> " << result_type;
3062 switch (result_type) {
3063 case DataType::Type::kUint8:
3064 switch (input_type) {
3065 case DataType::Type::kInt8:
3066 case DataType::Type::kUint16:
3067 case DataType::Type::kInt16:
3068 case DataType::Type::kInt32:
3069 if (in.IsRegister()) {
3070 __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3071 } else {
3072 DCHECK(in.GetConstant()->IsIntConstant());
3073 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3074 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3075 }
3076 break;
3077 case DataType::Type::kInt64:
3078 if (in.IsRegisterPair()) {
3079 __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3080 } else {
3081 DCHECK(in.GetConstant()->IsLongConstant());
3082 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3083 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3084 }
3085 break;
3086
3087 default:
3088 LOG(FATAL) << "Unexpected type conversion from " << input_type
3089 << " to " << result_type;
3090 }
3091 break;
3092
3093 case DataType::Type::kInt8:
3094 switch (input_type) {
3095 case DataType::Type::kUint8:
3096 case DataType::Type::kUint16:
3097 case DataType::Type::kInt16:
3098 case DataType::Type::kInt32:
3099 if (in.IsRegister()) {
3100 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3101 } else {
3102 DCHECK(in.GetConstant()->IsIntConstant());
3103 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3104 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3105 }
3106 break;
3107 case DataType::Type::kInt64:
3108 if (in.IsRegisterPair()) {
3109 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3110 } else {
3111 DCHECK(in.GetConstant()->IsLongConstant());
3112 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3113 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3114 }
3115 break;
3116
3117 default:
3118 LOG(FATAL) << "Unexpected type conversion from " << input_type
3119 << " to " << result_type;
3120 }
3121 break;
3122
3123 case DataType::Type::kUint16:
3124 switch (input_type) {
3125 case DataType::Type::kInt8:
3126 case DataType::Type::kInt16:
3127 case DataType::Type::kInt32:
3128 if (in.IsRegister()) {
3129 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3130 } else if (in.IsStackSlot()) {
3131 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3132 } else {
3133 DCHECK(in.GetConstant()->IsIntConstant());
3134 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3135 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3136 }
3137 break;
3138 case DataType::Type::kInt64:
3139 if (in.IsRegisterPair()) {
3140 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3141 } else if (in.IsDoubleStackSlot()) {
3142 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3143 } else {
3144 DCHECK(in.GetConstant()->IsLongConstant());
3145 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3146 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3147 }
3148 break;
3149
3150 default:
3151 LOG(FATAL) << "Unexpected type conversion from " << input_type
3152 << " to " << result_type;
3153 }
3154 break;
3155
3156 case DataType::Type::kInt16:
3157 switch (input_type) {
3158 case DataType::Type::kUint16:
3159 case DataType::Type::kInt32:
3160 if (in.IsRegister()) {
3161 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3162 } else if (in.IsStackSlot()) {
3163 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3164 } else {
3165 DCHECK(in.GetConstant()->IsIntConstant());
3166 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3167 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3168 }
3169 break;
3170 case DataType::Type::kInt64:
3171 if (in.IsRegisterPair()) {
3172 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3173 } else if (in.IsDoubleStackSlot()) {
3174 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3175 } else {
3176 DCHECK(in.GetConstant()->IsLongConstant());
3177 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3178 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3179 }
3180 break;
3181
3182 default:
3183 LOG(FATAL) << "Unexpected type conversion from " << input_type
3184 << " to " << result_type;
3185 }
3186 break;
3187
3188 case DataType::Type::kInt32:
3189 switch (input_type) {
3190 case DataType::Type::kInt64:
3191 if (in.IsRegisterPair()) {
3192 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3193 } else if (in.IsDoubleStackSlot()) {
3194 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3195 } else {
3196 DCHECK(in.IsConstant());
3197 DCHECK(in.GetConstant()->IsLongConstant());
3198 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3199 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
3200 }
3201 break;
3202
3203 case DataType::Type::kFloat32: {
3204 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3205 Register output = out.AsRegister<Register>();
3206 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3207 NearLabel done, nan;
3208
3209 __ movl(output, Immediate(kPrimIntMax));
3210 // temp = int-to-float(output)
3211 __ cvtsi2ss(temp, output);
3212 // if input >= temp goto done
3213 __ comiss(input, temp);
3214 __ j(kAboveEqual, &done);
3215 // if input == NaN goto nan
3216 __ j(kUnordered, &nan);
3217 // output = float-to-int-truncate(input)
3218 __ cvttss2si(output, input);
3219 __ jmp(&done);
3220 __ Bind(&nan);
3221 // output = 0
3222 __ xorl(output, output);
3223 __ Bind(&done);
3224 break;
3225 }
3226
3227 case DataType::Type::kFloat64: {
3228 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3229 Register output = out.AsRegister<Register>();
3230 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3231 NearLabel done, nan;
3232
3233 __ movl(output, Immediate(kPrimIntMax));
3234 // temp = int-to-double(output)
3235 __ cvtsi2sd(temp, output);
3236 // if input >= temp goto done
3237 __ comisd(input, temp);
3238 __ j(kAboveEqual, &done);
3239 // if input == NaN goto nan
3240 __ j(kUnordered, &nan);
3241 // output = double-to-int-truncate(input)
3242 __ cvttsd2si(output, input);
3243 __ jmp(&done);
3244 __ Bind(&nan);
3245 // output = 0
3246 __ xorl(output, output);
3247 __ Bind(&done);
3248 break;
3249 }
3250
3251 default:
3252 LOG(FATAL) << "Unexpected type conversion from " << input_type
3253 << " to " << result_type;
3254 }
3255 break;
3256
3257 case DataType::Type::kInt64:
3258 switch (input_type) {
3259 case DataType::Type::kBool:
3260 case DataType::Type::kUint8:
3261 case DataType::Type::kInt8:
3262 case DataType::Type::kUint16:
3263 case DataType::Type::kInt16:
3264 case DataType::Type::kInt32:
3265 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
3266 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
3267 DCHECK_EQ(in.AsRegister<Register>(), EAX);
3268 __ cdq();
3269 break;
3270
3271 case DataType::Type::kFloat32:
3272 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
3273 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3274 break;
3275
3276 case DataType::Type::kFloat64:
3277 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
3278 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3279 break;
3280
3281 default:
3282 LOG(FATAL) << "Unexpected type conversion from " << input_type
3283 << " to " << result_type;
3284 }
3285 break;
3286
3287 case DataType::Type::kFloat32:
3288 switch (input_type) {
3289 case DataType::Type::kBool:
3290 case DataType::Type::kUint8:
3291 case DataType::Type::kInt8:
3292 case DataType::Type::kUint16:
3293 case DataType::Type::kInt16:
3294 case DataType::Type::kInt32:
3295 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3296 break;
3297
3298 case DataType::Type::kInt64: {
3299 size_t adjustment = 0;
3300
3301 // Create stack space for the call to
3302 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
3303 // TODO: enhance register allocator to ask for stack temporaries.
3304 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
3305 adjustment = DataType::Size(DataType::Type::kInt64);
3306 codegen_->IncreaseFrame(adjustment);
3307 }
3308
3309 // Load the value to the FP stack, using temporaries if needed.
3310 PushOntoFPStack(in, 0, adjustment, false, true);
3311
3312 if (out.IsStackSlot()) {
3313 __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
3314 } else {
3315 __ fstps(Address(ESP, 0));
3316 Location stack_temp = Location::StackSlot(0);
3317 codegen_->Move32(out, stack_temp);
3318 }
3319
3320 // Remove the temporary stack space we allocated.
3321 if (adjustment != 0) {
3322 codegen_->DecreaseFrame(adjustment);
3323 }
3324 break;
3325 }
3326
3327 case DataType::Type::kFloat64:
3328 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3329 break;
3330
3331 default:
3332 LOG(FATAL) << "Unexpected type conversion from " << input_type
3333 << " to " << result_type;
3334 }
3335 break;
3336
3337 case DataType::Type::kFloat64:
3338 switch (input_type) {
3339 case DataType::Type::kBool:
3340 case DataType::Type::kUint8:
3341 case DataType::Type::kInt8:
3342 case DataType::Type::kUint16:
3343 case DataType::Type::kInt16:
3344 case DataType::Type::kInt32:
3345 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3346 break;
3347
3348 case DataType::Type::kInt64: {
3349 size_t adjustment = 0;
3350
3351 // Create stack space for the call to
3352 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3353 // TODO: enhance register allocator to ask for stack temporaries.
3354 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3355 adjustment = DataType::Size(DataType::Type::kInt64);
3356 codegen_->IncreaseFrame(adjustment);
3357 }
3358
3359 // Load the value to the FP stack, using temporaries if needed.
3360 PushOntoFPStack(in, 0, adjustment, false, true);
3361
3362 if (out.IsDoubleStackSlot()) {
3363 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3364 } else {
3365 __ fstpl(Address(ESP, 0));
3366 Location stack_temp = Location::DoubleStackSlot(0);
3367 codegen_->Move64(out, stack_temp);
3368 }
3369
3370 // Remove the temporary stack space we allocated.
3371 if (adjustment != 0) {
3372 codegen_->DecreaseFrame(adjustment);
3373 }
3374 break;
3375 }
3376
3377 case DataType::Type::kFloat32:
3378 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3379 break;
3380
3381 default:
3382 LOG(FATAL) << "Unexpected type conversion from " << input_type
3383 << " to " << result_type;
3384 }
3385 break;
3386
3387 default:
3388 LOG(FATAL) << "Unexpected type conversion from " << input_type
3389 << " to " << result_type;
3390 }
3391 }
3392
VisitAdd(HAdd * add)3393 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3394 LocationSummary* locations =
3395 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3396 switch (add->GetResultType()) {
3397 case DataType::Type::kInt32: {
3398 locations->SetInAt(0, Location::RequiresRegister());
3399 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3400 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3401 break;
3402 }
3403
3404 case DataType::Type::kInt64: {
3405 locations->SetInAt(0, Location::RequiresRegister());
3406 locations->SetInAt(1, Location::Any());
3407 locations->SetOut(Location::SameAsFirstInput());
3408 break;
3409 }
3410
3411 case DataType::Type::kFloat32:
3412 case DataType::Type::kFloat64: {
3413 locations->SetInAt(0, Location::RequiresFpuRegister());
3414 if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3415 DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3416 } else if (add->InputAt(1)->IsConstant()) {
3417 locations->SetInAt(1, Location::RequiresFpuRegister());
3418 } else {
3419 locations->SetInAt(1, Location::Any());
3420 }
3421 locations->SetOut(Location::SameAsFirstInput());
3422 break;
3423 }
3424
3425 default:
3426 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3427 UNREACHABLE();
3428 }
3429 }
3430
VisitAdd(HAdd * add)3431 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3432 LocationSummary* locations = add->GetLocations();
3433 Location first = locations->InAt(0);
3434 Location second = locations->InAt(1);
3435 Location out = locations->Out();
3436
3437 switch (add->GetResultType()) {
3438 case DataType::Type::kInt32: {
3439 if (second.IsRegister()) {
3440 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3441 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3442 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3443 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3444 } else {
3445 __ leal(out.AsRegister<Register>(), Address(
3446 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3447 }
3448 } else if (second.IsConstant()) {
3449 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3450 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3451 __ addl(out.AsRegister<Register>(), Immediate(value));
3452 } else {
3453 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3454 }
3455 } else {
3456 DCHECK(first.Equals(locations->Out()));
3457 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3458 }
3459 break;
3460 }
3461
3462 case DataType::Type::kInt64: {
3463 if (second.IsRegisterPair()) {
3464 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3465 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3466 } else if (second.IsDoubleStackSlot()) {
3467 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3468 __ adcl(first.AsRegisterPairHigh<Register>(),
3469 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3470 } else {
3471 DCHECK(second.IsConstant()) << second;
3472 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3473 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3474 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3475 }
3476 break;
3477 }
3478
3479 case DataType::Type::kFloat32: {
3480 if (second.IsFpuRegister()) {
3481 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3482 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3483 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3484 DCHECK(const_area->IsEmittedAtUseSite());
3485 __ addss(first.AsFpuRegister<XmmRegister>(),
3486 codegen_->LiteralFloatAddress(
3487 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3488 const_area->GetBaseMethodAddress(),
3489 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3490 } else {
3491 DCHECK(second.IsStackSlot());
3492 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3493 }
3494 break;
3495 }
3496
3497 case DataType::Type::kFloat64: {
3498 if (second.IsFpuRegister()) {
3499 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3500 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3501 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3502 DCHECK(const_area->IsEmittedAtUseSite());
3503 __ addsd(first.AsFpuRegister<XmmRegister>(),
3504 codegen_->LiteralDoubleAddress(
3505 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3506 const_area->GetBaseMethodAddress(),
3507 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3508 } else {
3509 DCHECK(second.IsDoubleStackSlot());
3510 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3511 }
3512 break;
3513 }
3514
3515 default:
3516 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3517 }
3518 }
3519
VisitSub(HSub * sub)3520 void LocationsBuilderX86::VisitSub(HSub* sub) {
3521 LocationSummary* locations =
3522 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3523 switch (sub->GetResultType()) {
3524 case DataType::Type::kInt32:
3525 case DataType::Type::kInt64: {
3526 locations->SetInAt(0, Location::RequiresRegister());
3527 locations->SetInAt(1, Location::Any());
3528 locations->SetOut(Location::SameAsFirstInput());
3529 break;
3530 }
3531 case DataType::Type::kFloat32:
3532 case DataType::Type::kFloat64: {
3533 locations->SetInAt(0, Location::RequiresFpuRegister());
3534 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3535 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3536 } else if (sub->InputAt(1)->IsConstant()) {
3537 locations->SetInAt(1, Location::RequiresFpuRegister());
3538 } else {
3539 locations->SetInAt(1, Location::Any());
3540 }
3541 locations->SetOut(Location::SameAsFirstInput());
3542 break;
3543 }
3544
3545 default:
3546 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3547 }
3548 }
3549
VisitSub(HSub * sub)3550 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3551 LocationSummary* locations = sub->GetLocations();
3552 Location first = locations->InAt(0);
3553 Location second = locations->InAt(1);
3554 DCHECK(first.Equals(locations->Out()));
3555 switch (sub->GetResultType()) {
3556 case DataType::Type::kInt32: {
3557 if (second.IsRegister()) {
3558 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3559 } else if (second.IsConstant()) {
3560 __ subl(first.AsRegister<Register>(),
3561 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3562 } else {
3563 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3564 }
3565 break;
3566 }
3567
3568 case DataType::Type::kInt64: {
3569 if (second.IsRegisterPair()) {
3570 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3571 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3572 } else if (second.IsDoubleStackSlot()) {
3573 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3574 __ sbbl(first.AsRegisterPairHigh<Register>(),
3575 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3576 } else {
3577 DCHECK(second.IsConstant()) << second;
3578 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3579 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3580 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3581 }
3582 break;
3583 }
3584
3585 case DataType::Type::kFloat32: {
3586 if (second.IsFpuRegister()) {
3587 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3588 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3589 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3590 DCHECK(const_area->IsEmittedAtUseSite());
3591 __ subss(first.AsFpuRegister<XmmRegister>(),
3592 codegen_->LiteralFloatAddress(
3593 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3594 const_area->GetBaseMethodAddress(),
3595 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3596 } else {
3597 DCHECK(second.IsStackSlot());
3598 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3599 }
3600 break;
3601 }
3602
3603 case DataType::Type::kFloat64: {
3604 if (second.IsFpuRegister()) {
3605 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3606 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3607 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3608 DCHECK(const_area->IsEmittedAtUseSite());
3609 __ subsd(first.AsFpuRegister<XmmRegister>(),
3610 codegen_->LiteralDoubleAddress(
3611 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3612 const_area->GetBaseMethodAddress(),
3613 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3614 } else {
3615 DCHECK(second.IsDoubleStackSlot());
3616 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3617 }
3618 break;
3619 }
3620
3621 default:
3622 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3623 }
3624 }
3625
VisitMul(HMul * mul)3626 void LocationsBuilderX86::VisitMul(HMul* mul) {
3627 LocationSummary* locations =
3628 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3629 switch (mul->GetResultType()) {
3630 case DataType::Type::kInt32:
3631 locations->SetInAt(0, Location::RequiresRegister());
3632 locations->SetInAt(1, Location::Any());
3633 if (mul->InputAt(1)->IsIntConstant()) {
3634 // Can use 3 operand multiply.
3635 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3636 } else {
3637 locations->SetOut(Location::SameAsFirstInput());
3638 }
3639 break;
3640 case DataType::Type::kInt64: {
3641 locations->SetInAt(0, Location::RequiresRegister());
3642 locations->SetInAt(1, Location::Any());
3643 locations->SetOut(Location::SameAsFirstInput());
3644 // Needed for imul on 32bits with 64bits output.
3645 locations->AddTemp(Location::RegisterLocation(EAX));
3646 locations->AddTemp(Location::RegisterLocation(EDX));
3647 break;
3648 }
3649 case DataType::Type::kFloat32:
3650 case DataType::Type::kFloat64: {
3651 locations->SetInAt(0, Location::RequiresFpuRegister());
3652 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3653 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3654 } else if (mul->InputAt(1)->IsConstant()) {
3655 locations->SetInAt(1, Location::RequiresFpuRegister());
3656 } else {
3657 locations->SetInAt(1, Location::Any());
3658 }
3659 locations->SetOut(Location::SameAsFirstInput());
3660 break;
3661 }
3662
3663 default:
3664 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3665 }
3666 }
3667
VisitMul(HMul * mul)3668 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3669 LocationSummary* locations = mul->GetLocations();
3670 Location first = locations->InAt(0);
3671 Location second = locations->InAt(1);
3672 Location out = locations->Out();
3673
3674 switch (mul->GetResultType()) {
3675 case DataType::Type::kInt32:
3676 // The constant may have ended up in a register, so test explicitly to avoid
3677 // problems where the output may not be the same as the first operand.
3678 if (mul->InputAt(1)->IsIntConstant()) {
3679 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3680 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3681 } else if (second.IsRegister()) {
3682 DCHECK(first.Equals(out));
3683 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3684 } else {
3685 DCHECK(second.IsStackSlot());
3686 DCHECK(first.Equals(out));
3687 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3688 }
3689 break;
3690
3691 case DataType::Type::kInt64: {
3692 Register in1_hi = first.AsRegisterPairHigh<Register>();
3693 Register in1_lo = first.AsRegisterPairLow<Register>();
3694 Register eax = locations->GetTemp(0).AsRegister<Register>();
3695 Register edx = locations->GetTemp(1).AsRegister<Register>();
3696
3697 DCHECK_EQ(EAX, eax);
3698 DCHECK_EQ(EDX, edx);
3699
3700 // input: in1 - 64 bits, in2 - 64 bits.
3701 // output: in1
3702 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3703 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3704 // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3705 if (second.IsConstant()) {
3706 DCHECK(second.GetConstant()->IsLongConstant());
3707
3708 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3709 int32_t low_value = Low32Bits(value);
3710 int32_t high_value = High32Bits(value);
3711 Immediate low(low_value);
3712 Immediate high(high_value);
3713
3714 __ movl(eax, high);
3715 // eax <- in1.lo * in2.hi
3716 __ imull(eax, in1_lo);
3717 // in1.hi <- in1.hi * in2.lo
3718 __ imull(in1_hi, low);
3719 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3720 __ addl(in1_hi, eax);
3721 // move in2_lo to eax to prepare for double precision
3722 __ movl(eax, low);
3723 // edx:eax <- in1.lo * in2.lo
3724 __ mull(in1_lo);
3725 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3726 __ addl(in1_hi, edx);
3727 // in1.lo <- (in1.lo * in2.lo)[31:0];
3728 __ movl(in1_lo, eax);
3729 } else if (second.IsRegisterPair()) {
3730 Register in2_hi = second.AsRegisterPairHigh<Register>();
3731 Register in2_lo = second.AsRegisterPairLow<Register>();
3732
3733 __ movl(eax, in2_hi);
3734 // eax <- in1.lo * in2.hi
3735 __ imull(eax, in1_lo);
3736 // in1.hi <- in1.hi * in2.lo
3737 __ imull(in1_hi, in2_lo);
3738 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3739 __ addl(in1_hi, eax);
3740 // move in1_lo to eax to prepare for double precision
3741 __ movl(eax, in1_lo);
3742 // edx:eax <- in1.lo * in2.lo
3743 __ mull(in2_lo);
3744 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3745 __ addl(in1_hi, edx);
3746 // in1.lo <- (in1.lo * in2.lo)[31:0];
3747 __ movl(in1_lo, eax);
3748 } else {
3749 DCHECK(second.IsDoubleStackSlot()) << second;
3750 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3751 Address in2_lo(ESP, second.GetStackIndex());
3752
3753 __ movl(eax, in2_hi);
3754 // eax <- in1.lo * in2.hi
3755 __ imull(eax, in1_lo);
3756 // in1.hi <- in1.hi * in2.lo
3757 __ imull(in1_hi, in2_lo);
3758 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3759 __ addl(in1_hi, eax);
3760 // move in1_lo to eax to prepare for double precision
3761 __ movl(eax, in1_lo);
3762 // edx:eax <- in1.lo * in2.lo
3763 __ mull(in2_lo);
3764 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3765 __ addl(in1_hi, edx);
3766 // in1.lo <- (in1.lo * in2.lo)[31:0];
3767 __ movl(in1_lo, eax);
3768 }
3769
3770 break;
3771 }
3772
3773 case DataType::Type::kFloat32: {
3774 DCHECK(first.Equals(locations->Out()));
3775 if (second.IsFpuRegister()) {
3776 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3777 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3778 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3779 DCHECK(const_area->IsEmittedAtUseSite());
3780 __ mulss(first.AsFpuRegister<XmmRegister>(),
3781 codegen_->LiteralFloatAddress(
3782 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3783 const_area->GetBaseMethodAddress(),
3784 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3785 } else {
3786 DCHECK(second.IsStackSlot());
3787 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3788 }
3789 break;
3790 }
3791
3792 case DataType::Type::kFloat64: {
3793 DCHECK(first.Equals(locations->Out()));
3794 if (second.IsFpuRegister()) {
3795 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3796 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3797 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3798 DCHECK(const_area->IsEmittedAtUseSite());
3799 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3800 codegen_->LiteralDoubleAddress(
3801 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3802 const_area->GetBaseMethodAddress(),
3803 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3804 } else {
3805 DCHECK(second.IsDoubleStackSlot());
3806 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3807 }
3808 break;
3809 }
3810
3811 default:
3812 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3813 }
3814 }
3815
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3816 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3817 uint32_t temp_offset,
3818 uint32_t stack_adjustment,
3819 bool is_fp,
3820 bool is_wide) {
3821 if (source.IsStackSlot()) {
3822 DCHECK(!is_wide);
3823 if (is_fp) {
3824 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3825 } else {
3826 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3827 }
3828 } else if (source.IsDoubleStackSlot()) {
3829 DCHECK(is_wide);
3830 if (is_fp) {
3831 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3832 } else {
3833 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3834 }
3835 } else {
3836 // Write the value to the temporary location on the stack and load to FP stack.
3837 if (!is_wide) {
3838 Location stack_temp = Location::StackSlot(temp_offset);
3839 codegen_->Move32(stack_temp, source);
3840 if (is_fp) {
3841 __ flds(Address(ESP, temp_offset));
3842 } else {
3843 __ filds(Address(ESP, temp_offset));
3844 }
3845 } else {
3846 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3847 codegen_->Move64(stack_temp, source);
3848 if (is_fp) {
3849 __ fldl(Address(ESP, temp_offset));
3850 } else {
3851 __ fildl(Address(ESP, temp_offset));
3852 }
3853 }
3854 }
3855 }
3856
GenerateRemFP(HRem * rem)3857 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3858 DataType::Type type = rem->GetResultType();
3859 bool is_float = type == DataType::Type::kFloat32;
3860 size_t elem_size = DataType::Size(type);
3861 LocationSummary* locations = rem->GetLocations();
3862 Location first = locations->InAt(0);
3863 Location second = locations->InAt(1);
3864 Location out = locations->Out();
3865
3866 // Create stack space for 2 elements.
3867 // TODO: enhance register allocator to ask for stack temporaries.
3868 codegen_->IncreaseFrame(2 * elem_size);
3869
3870 // Load the values to the FP stack in reverse order, using temporaries if needed.
3871 const bool is_wide = !is_float;
3872 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
3873 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
3874
3875 // Loop doing FPREM until we stabilize.
3876 NearLabel retry;
3877 __ Bind(&retry);
3878 __ fprem();
3879
3880 // Move FP status to AX.
3881 __ fstsw();
3882
3883 // And see if the argument reduction is complete. This is signaled by the
3884 // C2 FPU flag bit set to 0.
3885 __ andl(EAX, Immediate(kC2ConditionMask));
3886 __ j(kNotEqual, &retry);
3887
3888 // We have settled on the final value. Retrieve it into an XMM register.
3889 // Store FP top of stack to real stack.
3890 if (is_float) {
3891 __ fsts(Address(ESP, 0));
3892 } else {
3893 __ fstl(Address(ESP, 0));
3894 }
3895
3896 // Pop the 2 items from the FP stack.
3897 __ fucompp();
3898
3899 // Load the value from the stack into an XMM register.
3900 DCHECK(out.IsFpuRegister()) << out;
3901 if (is_float) {
3902 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3903 } else {
3904 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3905 }
3906
3907 // And remove the temporary stack space we allocated.
3908 codegen_->DecreaseFrame(2 * elem_size);
3909 }
3910
3911
DivRemOneOrMinusOne(HBinaryOperation * instruction)3912 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3913 DCHECK(instruction->IsDiv() || instruction->IsRem());
3914
3915 LocationSummary* locations = instruction->GetLocations();
3916 DCHECK(locations->InAt(1).IsConstant());
3917 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3918
3919 Register out_register = locations->Out().AsRegister<Register>();
3920 Register input_register = locations->InAt(0).AsRegister<Register>();
3921 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3922
3923 DCHECK(imm == 1 || imm == -1);
3924
3925 if (instruction->IsRem()) {
3926 __ xorl(out_register, out_register);
3927 } else {
3928 __ movl(out_register, input_register);
3929 if (imm == -1) {
3930 __ negl(out_register);
3931 }
3932 }
3933 }
3934
RemByPowerOfTwo(HRem * instruction)3935 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
3936 LocationSummary* locations = instruction->GetLocations();
3937 Location second = locations->InAt(1);
3938
3939 Register out = locations->Out().AsRegister<Register>();
3940 Register numerator = locations->InAt(0).AsRegister<Register>();
3941
3942 int32_t imm = Int64FromConstant(second.GetConstant());
3943 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3944 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3945
3946 Register tmp = locations->GetTemp(0).AsRegister<Register>();
3947 NearLabel done;
3948 __ movl(out, numerator);
3949 __ andl(out, Immediate(abs_imm-1));
3950 __ j(Condition::kZero, &done);
3951 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3952 __ testl(numerator, numerator);
3953 __ cmovl(Condition::kLess, out, tmp);
3954 __ Bind(&done);
3955 }
3956
DivByPowerOfTwo(HDiv * instruction)3957 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
3958 LocationSummary* locations = instruction->GetLocations();
3959
3960 Register out_register = locations->Out().AsRegister<Register>();
3961 Register input_register = locations->InAt(0).AsRegister<Register>();
3962 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3963 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3964 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3965
3966 Register num = locations->GetTemp(0).AsRegister<Register>();
3967
3968 __ leal(num, Address(input_register, abs_imm - 1));
3969 __ testl(input_register, input_register);
3970 __ cmovl(kGreaterEqual, num, input_register);
3971 int shift = CTZ(imm);
3972 __ sarl(num, Immediate(shift));
3973
3974 if (imm < 0) {
3975 __ negl(num);
3976 }
3977
3978 __ movl(out_register, num);
3979 }
3980
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3981 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3982 DCHECK(instruction->IsDiv() || instruction->IsRem());
3983
3984 LocationSummary* locations = instruction->GetLocations();
3985 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3986
3987 Register eax = locations->InAt(0).AsRegister<Register>();
3988 Register out = locations->Out().AsRegister<Register>();
3989 Register num;
3990 Register edx;
3991
3992 if (instruction->IsDiv()) {
3993 edx = locations->GetTemp(0).AsRegister<Register>();
3994 num = locations->GetTemp(1).AsRegister<Register>();
3995 } else {
3996 edx = locations->Out().AsRegister<Register>();
3997 num = locations->GetTemp(0).AsRegister<Register>();
3998 }
3999
4000 DCHECK_EQ(EAX, eax);
4001 DCHECK_EQ(EDX, edx);
4002 if (instruction->IsDiv()) {
4003 DCHECK_EQ(EAX, out);
4004 } else {
4005 DCHECK_EQ(EDX, out);
4006 }
4007
4008 int64_t magic;
4009 int shift;
4010 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4011
4012 // Save the numerator.
4013 __ movl(num, eax);
4014
4015 // EAX = magic
4016 __ movl(eax, Immediate(magic));
4017
4018 // EDX:EAX = magic * numerator
4019 __ imull(num);
4020
4021 if (imm > 0 && magic < 0) {
4022 // EDX += num
4023 __ addl(edx, num);
4024 } else if (imm < 0 && magic > 0) {
4025 __ subl(edx, num);
4026 }
4027
4028 // Shift if needed.
4029 if (shift != 0) {
4030 __ sarl(edx, Immediate(shift));
4031 }
4032
4033 // EDX += 1 if EDX < 0
4034 __ movl(eax, edx);
4035 __ shrl(edx, Immediate(31));
4036 __ addl(edx, eax);
4037
4038 if (instruction->IsRem()) {
4039 __ movl(eax, num);
4040 __ imull(edx, Immediate(imm));
4041 __ subl(eax, edx);
4042 __ movl(edx, eax);
4043 } else {
4044 __ movl(eax, edx);
4045 }
4046 }
4047
GenerateDivRemIntegral(HBinaryOperation * instruction)4048 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4049 DCHECK(instruction->IsDiv() || instruction->IsRem());
4050
4051 LocationSummary* locations = instruction->GetLocations();
4052 Location out = locations->Out();
4053 Location first = locations->InAt(0);
4054 Location second = locations->InAt(1);
4055 bool is_div = instruction->IsDiv();
4056
4057 switch (instruction->GetResultType()) {
4058 case DataType::Type::kInt32: {
4059 DCHECK_EQ(EAX, first.AsRegister<Register>());
4060 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
4061
4062 if (second.IsConstant()) {
4063 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
4064
4065 if (imm == 0) {
4066 // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
4067 } else if (imm == 1 || imm == -1) {
4068 DivRemOneOrMinusOne(instruction);
4069 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4070 if (is_div) {
4071 DivByPowerOfTwo(instruction->AsDiv());
4072 } else {
4073 RemByPowerOfTwo(instruction->AsRem());
4074 }
4075 } else {
4076 DCHECK(imm <= -2 || imm >= 2);
4077 GenerateDivRemWithAnyConstant(instruction);
4078 }
4079 } else {
4080 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
4081 instruction, out.AsRegister<Register>(), is_div);
4082 codegen_->AddSlowPath(slow_path);
4083
4084 Register second_reg = second.AsRegister<Register>();
4085 // 0x80000000/-1 triggers an arithmetic exception!
4086 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
4087 // it's safe to just use negl instead of more complex comparisons.
4088
4089 __ cmpl(second_reg, Immediate(-1));
4090 __ j(kEqual, slow_path->GetEntryLabel());
4091
4092 // edx:eax <- sign-extended of eax
4093 __ cdq();
4094 // eax = quotient, edx = remainder
4095 __ idivl(second_reg);
4096 __ Bind(slow_path->GetExitLabel());
4097 }
4098 break;
4099 }
4100
4101 case DataType::Type::kInt64: {
4102 InvokeRuntimeCallingConvention calling_convention;
4103 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
4104 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
4105 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
4106 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
4107 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
4108 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
4109
4110 if (is_div) {
4111 codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
4112 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4113 } else {
4114 codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
4115 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4116 }
4117 break;
4118 }
4119
4120 default:
4121 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
4122 }
4123 }
4124
VisitDiv(HDiv * div)4125 void LocationsBuilderX86::VisitDiv(HDiv* div) {
4126 LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
4127 ? LocationSummary::kCallOnMainOnly
4128 : LocationSummary::kNoCall;
4129 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4130
4131 switch (div->GetResultType()) {
4132 case DataType::Type::kInt32: {
4133 locations->SetInAt(0, Location::RegisterLocation(EAX));
4134 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4135 locations->SetOut(Location::SameAsFirstInput());
4136 // Intel uses edx:eax as the dividend.
4137 locations->AddTemp(Location::RegisterLocation(EDX));
4138 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4139 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
4140 // output and request another temp.
4141 if (div->InputAt(1)->IsIntConstant()) {
4142 locations->AddTemp(Location::RequiresRegister());
4143 }
4144 break;
4145 }
4146 case DataType::Type::kInt64: {
4147 InvokeRuntimeCallingConvention calling_convention;
4148 locations->SetInAt(0, Location::RegisterPairLocation(
4149 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4150 locations->SetInAt(1, Location::RegisterPairLocation(
4151 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4152 // Runtime helper puts the result in EAX, EDX.
4153 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4154 break;
4155 }
4156 case DataType::Type::kFloat32:
4157 case DataType::Type::kFloat64: {
4158 locations->SetInAt(0, Location::RequiresFpuRegister());
4159 if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4160 DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
4161 } else if (div->InputAt(1)->IsConstant()) {
4162 locations->SetInAt(1, Location::RequiresFpuRegister());
4163 } else {
4164 locations->SetInAt(1, Location::Any());
4165 }
4166 locations->SetOut(Location::SameAsFirstInput());
4167 break;
4168 }
4169
4170 default:
4171 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4172 }
4173 }
4174
VisitDiv(HDiv * div)4175 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
4176 LocationSummary* locations = div->GetLocations();
4177 Location first = locations->InAt(0);
4178 Location second = locations->InAt(1);
4179
4180 switch (div->GetResultType()) {
4181 case DataType::Type::kInt32:
4182 case DataType::Type::kInt64: {
4183 GenerateDivRemIntegral(div);
4184 break;
4185 }
4186
4187 case DataType::Type::kFloat32: {
4188 if (second.IsFpuRegister()) {
4189 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4190 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4191 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4192 DCHECK(const_area->IsEmittedAtUseSite());
4193 __ divss(first.AsFpuRegister<XmmRegister>(),
4194 codegen_->LiteralFloatAddress(
4195 const_area->GetConstant()->AsFloatConstant()->GetValue(),
4196 const_area->GetBaseMethodAddress(),
4197 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4198 } else {
4199 DCHECK(second.IsStackSlot());
4200 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4201 }
4202 break;
4203 }
4204
4205 case DataType::Type::kFloat64: {
4206 if (second.IsFpuRegister()) {
4207 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4208 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4209 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4210 DCHECK(const_area->IsEmittedAtUseSite());
4211 __ divsd(first.AsFpuRegister<XmmRegister>(),
4212 codegen_->LiteralDoubleAddress(
4213 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
4214 const_area->GetBaseMethodAddress(),
4215 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4216 } else {
4217 DCHECK(second.IsDoubleStackSlot());
4218 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4219 }
4220 break;
4221 }
4222
4223 default:
4224 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4225 }
4226 }
4227
VisitRem(HRem * rem)4228 void LocationsBuilderX86::VisitRem(HRem* rem) {
4229 DataType::Type type = rem->GetResultType();
4230
4231 LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
4232 ? LocationSummary::kCallOnMainOnly
4233 : LocationSummary::kNoCall;
4234 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4235
4236 switch (type) {
4237 case DataType::Type::kInt32: {
4238 locations->SetInAt(0, Location::RegisterLocation(EAX));
4239 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4240 locations->SetOut(Location::RegisterLocation(EDX));
4241 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4242 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
4243 // output and request another temp.
4244 if (rem->InputAt(1)->IsIntConstant()) {
4245 locations->AddTemp(Location::RequiresRegister());
4246 }
4247 break;
4248 }
4249 case DataType::Type::kInt64: {
4250 InvokeRuntimeCallingConvention calling_convention;
4251 locations->SetInAt(0, Location::RegisterPairLocation(
4252 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4253 locations->SetInAt(1, Location::RegisterPairLocation(
4254 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4255 // Runtime helper puts the result in EAX, EDX.
4256 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4257 break;
4258 }
4259 case DataType::Type::kFloat64:
4260 case DataType::Type::kFloat32: {
4261 locations->SetInAt(0, Location::Any());
4262 locations->SetInAt(1, Location::Any());
4263 locations->SetOut(Location::RequiresFpuRegister());
4264 locations->AddTemp(Location::RegisterLocation(EAX));
4265 break;
4266 }
4267
4268 default:
4269 LOG(FATAL) << "Unexpected rem type " << type;
4270 }
4271 }
4272
VisitRem(HRem * rem)4273 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
4274 DataType::Type type = rem->GetResultType();
4275 switch (type) {
4276 case DataType::Type::kInt32:
4277 case DataType::Type::kInt64: {
4278 GenerateDivRemIntegral(rem);
4279 break;
4280 }
4281 case DataType::Type::kFloat32:
4282 case DataType::Type::kFloat64: {
4283 GenerateRemFP(rem);
4284 break;
4285 }
4286 default:
4287 LOG(FATAL) << "Unexpected rem type " << type;
4288 }
4289 }
4290
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4291 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4292 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4293 switch (minmax->GetResultType()) {
4294 case DataType::Type::kInt32:
4295 locations->SetInAt(0, Location::RequiresRegister());
4296 locations->SetInAt(1, Location::RequiresRegister());
4297 locations->SetOut(Location::SameAsFirstInput());
4298 break;
4299 case DataType::Type::kInt64:
4300 locations->SetInAt(0, Location::RequiresRegister());
4301 locations->SetInAt(1, Location::RequiresRegister());
4302 locations->SetOut(Location::SameAsFirstInput());
4303 // Register to use to perform a long subtract to set cc.
4304 locations->AddTemp(Location::RequiresRegister());
4305 break;
4306 case DataType::Type::kFloat32:
4307 locations->SetInAt(0, Location::RequiresFpuRegister());
4308 locations->SetInAt(1, Location::RequiresFpuRegister());
4309 locations->SetOut(Location::SameAsFirstInput());
4310 locations->AddTemp(Location::RequiresRegister());
4311 break;
4312 case DataType::Type::kFloat64:
4313 locations->SetInAt(0, Location::RequiresFpuRegister());
4314 locations->SetInAt(1, Location::RequiresFpuRegister());
4315 locations->SetOut(Location::SameAsFirstInput());
4316 break;
4317 default:
4318 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4319 }
4320 }
4321
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4322 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4323 bool is_min,
4324 DataType::Type type) {
4325 Location op1_loc = locations->InAt(0);
4326 Location op2_loc = locations->InAt(1);
4327
4328 // Shortcut for same input locations.
4329 if (op1_loc.Equals(op2_loc)) {
4330 // Can return immediately, as op1_loc == out_loc.
4331 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4332 // a copy here.
4333 DCHECK(locations->Out().Equals(op1_loc));
4334 return;
4335 }
4336
4337 if (type == DataType::Type::kInt64) {
4338 // Need to perform a subtract to get the sign right.
4339 // op1 is already in the same location as the output.
4340 Location output = locations->Out();
4341 Register output_lo = output.AsRegisterPairLow<Register>();
4342 Register output_hi = output.AsRegisterPairHigh<Register>();
4343
4344 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4345 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4346
4347 // The comparison is performed by subtracting the second operand from
4348 // the first operand and then setting the status flags in the same
4349 // manner as the SUB instruction."
4350 __ cmpl(output_lo, op2_lo);
4351
4352 // Now use a temp and the borrow to finish the subtraction of op2_hi.
4353 Register temp = locations->GetTemp(0).AsRegister<Register>();
4354 __ movl(temp, output_hi);
4355 __ sbbl(temp, op2_hi);
4356
4357 // Now the condition code is correct.
4358 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4359 __ cmovl(cond, output_lo, op2_lo);
4360 __ cmovl(cond, output_hi, op2_hi);
4361 } else {
4362 DCHECK_EQ(type, DataType::Type::kInt32);
4363 Register out = locations->Out().AsRegister<Register>();
4364 Register op2 = op2_loc.AsRegister<Register>();
4365
4366 // (out := op1)
4367 // out <=? op2
4368 // if out is min jmp done
4369 // out := op2
4370 // done:
4371
4372 __ cmpl(out, op2);
4373 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4374 __ cmovl(cond, out, op2);
4375 }
4376 }
4377
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4378 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4379 bool is_min,
4380 DataType::Type type) {
4381 Location op1_loc = locations->InAt(0);
4382 Location op2_loc = locations->InAt(1);
4383 Location out_loc = locations->Out();
4384 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4385
4386 // Shortcut for same input locations.
4387 if (op1_loc.Equals(op2_loc)) {
4388 DCHECK(out_loc.Equals(op1_loc));
4389 return;
4390 }
4391
4392 // (out := op1)
4393 // out <=? op2
4394 // if Nan jmp Nan_label
4395 // if out is min jmp done
4396 // if op2 is min jmp op2_label
4397 // handle -0/+0
4398 // jmp done
4399 // Nan_label:
4400 // out := NaN
4401 // op2_label:
4402 // out := op2
4403 // done:
4404 //
4405 // This removes one jmp, but needs to copy one input (op1) to out.
4406 //
4407 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4408
4409 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4410
4411 NearLabel nan, done, op2_label;
4412 if (type == DataType::Type::kFloat64) {
4413 __ ucomisd(out, op2);
4414 } else {
4415 DCHECK_EQ(type, DataType::Type::kFloat32);
4416 __ ucomiss(out, op2);
4417 }
4418
4419 __ j(Condition::kParityEven, &nan);
4420
4421 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4422 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4423
4424 // Handle 0.0/-0.0.
4425 if (is_min) {
4426 if (type == DataType::Type::kFloat64) {
4427 __ orpd(out, op2);
4428 } else {
4429 __ orps(out, op2);
4430 }
4431 } else {
4432 if (type == DataType::Type::kFloat64) {
4433 __ andpd(out, op2);
4434 } else {
4435 __ andps(out, op2);
4436 }
4437 }
4438 __ jmp(&done);
4439
4440 // NaN handling.
4441 __ Bind(&nan);
4442 if (type == DataType::Type::kFloat64) {
4443 // TODO: Use a constant from the constant table (requires extra input).
4444 __ LoadLongConstant(out, kDoubleNaN);
4445 } else {
4446 Register constant = locations->GetTemp(0).AsRegister<Register>();
4447 __ movl(constant, Immediate(kFloatNaN));
4448 __ movd(out, constant);
4449 }
4450 __ jmp(&done);
4451
4452 // out := op2;
4453 __ Bind(&op2_label);
4454 if (type == DataType::Type::kFloat64) {
4455 __ movsd(out, op2);
4456 } else {
4457 __ movss(out, op2);
4458 }
4459
4460 // Done.
4461 __ Bind(&done);
4462 }
4463
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4464 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4465 DataType::Type type = minmax->GetResultType();
4466 switch (type) {
4467 case DataType::Type::kInt32:
4468 case DataType::Type::kInt64:
4469 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4470 break;
4471 case DataType::Type::kFloat32:
4472 case DataType::Type::kFloat64:
4473 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4474 break;
4475 default:
4476 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4477 }
4478 }
4479
VisitMin(HMin * min)4480 void LocationsBuilderX86::VisitMin(HMin* min) {
4481 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4482 }
4483
VisitMin(HMin * min)4484 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4485 GenerateMinMax(min, /*is_min*/ true);
4486 }
4487
VisitMax(HMax * max)4488 void LocationsBuilderX86::VisitMax(HMax* max) {
4489 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4490 }
4491
VisitMax(HMax * max)4492 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4493 GenerateMinMax(max, /*is_min*/ false);
4494 }
4495
VisitAbs(HAbs * abs)4496 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4497 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4498 switch (abs->GetResultType()) {
4499 case DataType::Type::kInt32:
4500 locations->SetInAt(0, Location::RegisterLocation(EAX));
4501 locations->SetOut(Location::SameAsFirstInput());
4502 locations->AddTemp(Location::RegisterLocation(EDX));
4503 break;
4504 case DataType::Type::kInt64:
4505 locations->SetInAt(0, Location::RequiresRegister());
4506 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4507 locations->AddTemp(Location::RequiresRegister());
4508 break;
4509 case DataType::Type::kFloat32:
4510 locations->SetInAt(0, Location::RequiresFpuRegister());
4511 locations->SetOut(Location::SameAsFirstInput());
4512 locations->AddTemp(Location::RequiresFpuRegister());
4513 locations->AddTemp(Location::RequiresRegister());
4514 break;
4515 case DataType::Type::kFloat64:
4516 locations->SetInAt(0, Location::RequiresFpuRegister());
4517 locations->SetOut(Location::SameAsFirstInput());
4518 locations->AddTemp(Location::RequiresFpuRegister());
4519 break;
4520 default:
4521 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4522 }
4523 }
4524
VisitAbs(HAbs * abs)4525 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4526 LocationSummary* locations = abs->GetLocations();
4527 switch (abs->GetResultType()) {
4528 case DataType::Type::kInt32: {
4529 Register out = locations->Out().AsRegister<Register>();
4530 DCHECK_EQ(out, EAX);
4531 Register temp = locations->GetTemp(0).AsRegister<Register>();
4532 DCHECK_EQ(temp, EDX);
4533 // Sign extend EAX into EDX.
4534 __ cdq();
4535 // XOR EAX with sign.
4536 __ xorl(EAX, EDX);
4537 // Subtract out sign to correct.
4538 __ subl(EAX, EDX);
4539 // The result is in EAX.
4540 break;
4541 }
4542 case DataType::Type::kInt64: {
4543 Location input = locations->InAt(0);
4544 Register input_lo = input.AsRegisterPairLow<Register>();
4545 Register input_hi = input.AsRegisterPairHigh<Register>();
4546 Location output = locations->Out();
4547 Register output_lo = output.AsRegisterPairLow<Register>();
4548 Register output_hi = output.AsRegisterPairHigh<Register>();
4549 Register temp = locations->GetTemp(0).AsRegister<Register>();
4550 // Compute the sign into the temporary.
4551 __ movl(temp, input_hi);
4552 __ sarl(temp, Immediate(31));
4553 // Store the sign into the output.
4554 __ movl(output_lo, temp);
4555 __ movl(output_hi, temp);
4556 // XOR the input to the output.
4557 __ xorl(output_lo, input_lo);
4558 __ xorl(output_hi, input_hi);
4559 // Subtract the sign.
4560 __ subl(output_lo, temp);
4561 __ sbbl(output_hi, temp);
4562 break;
4563 }
4564 case DataType::Type::kFloat32: {
4565 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4566 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4567 Register constant = locations->GetTemp(1).AsRegister<Register>();
4568 __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4569 __ movd(temp, constant);
4570 __ andps(out, temp);
4571 break;
4572 }
4573 case DataType::Type::kFloat64: {
4574 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4575 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4576 // TODO: Use a constant from the constant table (requires extra input).
4577 __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4578 __ andpd(out, temp);
4579 break;
4580 }
4581 default:
4582 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4583 }
4584 }
4585
VisitDivZeroCheck(HDivZeroCheck * instruction)4586 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4587 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4588 switch (instruction->GetType()) {
4589 case DataType::Type::kBool:
4590 case DataType::Type::kUint8:
4591 case DataType::Type::kInt8:
4592 case DataType::Type::kUint16:
4593 case DataType::Type::kInt16:
4594 case DataType::Type::kInt32: {
4595 locations->SetInAt(0, Location::Any());
4596 break;
4597 }
4598 case DataType::Type::kInt64: {
4599 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4600 if (!instruction->IsConstant()) {
4601 locations->AddTemp(Location::RequiresRegister());
4602 }
4603 break;
4604 }
4605 default:
4606 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4607 }
4608 }
4609
VisitDivZeroCheck(HDivZeroCheck * instruction)4610 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4611 SlowPathCode* slow_path =
4612 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4613 codegen_->AddSlowPath(slow_path);
4614
4615 LocationSummary* locations = instruction->GetLocations();
4616 Location value = locations->InAt(0);
4617
4618 switch (instruction->GetType()) {
4619 case DataType::Type::kBool:
4620 case DataType::Type::kUint8:
4621 case DataType::Type::kInt8:
4622 case DataType::Type::kUint16:
4623 case DataType::Type::kInt16:
4624 case DataType::Type::kInt32: {
4625 if (value.IsRegister()) {
4626 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4627 __ j(kEqual, slow_path->GetEntryLabel());
4628 } else if (value.IsStackSlot()) {
4629 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4630 __ j(kEqual, slow_path->GetEntryLabel());
4631 } else {
4632 DCHECK(value.IsConstant()) << value;
4633 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4634 __ jmp(slow_path->GetEntryLabel());
4635 }
4636 }
4637 break;
4638 }
4639 case DataType::Type::kInt64: {
4640 if (value.IsRegisterPair()) {
4641 Register temp = locations->GetTemp(0).AsRegister<Register>();
4642 __ movl(temp, value.AsRegisterPairLow<Register>());
4643 __ orl(temp, value.AsRegisterPairHigh<Register>());
4644 __ j(kEqual, slow_path->GetEntryLabel());
4645 } else {
4646 DCHECK(value.IsConstant()) << value;
4647 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4648 __ jmp(slow_path->GetEntryLabel());
4649 }
4650 }
4651 break;
4652 }
4653 default:
4654 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4655 }
4656 }
4657
HandleShift(HBinaryOperation * op)4658 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4659 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4660
4661 LocationSummary* locations =
4662 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4663
4664 switch (op->GetResultType()) {
4665 case DataType::Type::kInt32:
4666 case DataType::Type::kInt64: {
4667 // Can't have Location::Any() and output SameAsFirstInput()
4668 locations->SetInAt(0, Location::RequiresRegister());
4669 // The shift count needs to be in CL or a constant.
4670 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4671 locations->SetOut(Location::SameAsFirstInput());
4672 break;
4673 }
4674 default:
4675 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4676 }
4677 }
4678
HandleShift(HBinaryOperation * op)4679 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4680 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4681
4682 LocationSummary* locations = op->GetLocations();
4683 Location first = locations->InAt(0);
4684 Location second = locations->InAt(1);
4685 DCHECK(first.Equals(locations->Out()));
4686
4687 switch (op->GetResultType()) {
4688 case DataType::Type::kInt32: {
4689 DCHECK(first.IsRegister());
4690 Register first_reg = first.AsRegister<Register>();
4691 if (second.IsRegister()) {
4692 Register second_reg = second.AsRegister<Register>();
4693 DCHECK_EQ(ECX, second_reg);
4694 if (op->IsShl()) {
4695 __ shll(first_reg, second_reg);
4696 } else if (op->IsShr()) {
4697 __ sarl(first_reg, second_reg);
4698 } else {
4699 __ shrl(first_reg, second_reg);
4700 }
4701 } else {
4702 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4703 if (shift == 0) {
4704 return;
4705 }
4706 Immediate imm(shift);
4707 if (op->IsShl()) {
4708 __ shll(first_reg, imm);
4709 } else if (op->IsShr()) {
4710 __ sarl(first_reg, imm);
4711 } else {
4712 __ shrl(first_reg, imm);
4713 }
4714 }
4715 break;
4716 }
4717 case DataType::Type::kInt64: {
4718 if (second.IsRegister()) {
4719 Register second_reg = second.AsRegister<Register>();
4720 DCHECK_EQ(ECX, second_reg);
4721 if (op->IsShl()) {
4722 GenerateShlLong(first, second_reg);
4723 } else if (op->IsShr()) {
4724 GenerateShrLong(first, second_reg);
4725 } else {
4726 GenerateUShrLong(first, second_reg);
4727 }
4728 } else {
4729 // Shift by a constant.
4730 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4731 // Nothing to do if the shift is 0, as the input is already the output.
4732 if (shift != 0) {
4733 if (op->IsShl()) {
4734 GenerateShlLong(first, shift);
4735 } else if (op->IsShr()) {
4736 GenerateShrLong(first, shift);
4737 } else {
4738 GenerateUShrLong(first, shift);
4739 }
4740 }
4741 }
4742 break;
4743 }
4744 default:
4745 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4746 }
4747 }
4748
GenerateShlLong(const Location & loc,int shift)4749 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4750 Register low = loc.AsRegisterPairLow<Register>();
4751 Register high = loc.AsRegisterPairHigh<Register>();
4752 if (shift == 1) {
4753 // This is just an addition.
4754 __ addl(low, low);
4755 __ adcl(high, high);
4756 } else if (shift == 32) {
4757 // Shift by 32 is easy. High gets low, and low gets 0.
4758 codegen_->EmitParallelMoves(
4759 loc.ToLow(),
4760 loc.ToHigh(),
4761 DataType::Type::kInt32,
4762 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4763 loc.ToLow(),
4764 DataType::Type::kInt32);
4765 } else if (shift > 32) {
4766 // Low part becomes 0. High part is low part << (shift-32).
4767 __ movl(high, low);
4768 __ shll(high, Immediate(shift - 32));
4769 __ xorl(low, low);
4770 } else {
4771 // Between 1 and 31.
4772 __ shld(high, low, Immediate(shift));
4773 __ shll(low, Immediate(shift));
4774 }
4775 }
4776
GenerateShlLong(const Location & loc,Register shifter)4777 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4778 NearLabel done;
4779 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4780 __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4781 __ testl(shifter, Immediate(32));
4782 __ j(kEqual, &done);
4783 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4784 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4785 __ Bind(&done);
4786 }
4787
GenerateShrLong(const Location & loc,int shift)4788 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4789 Register low = loc.AsRegisterPairLow<Register>();
4790 Register high = loc.AsRegisterPairHigh<Register>();
4791 if (shift == 32) {
4792 // Need to copy the sign.
4793 DCHECK_NE(low, high);
4794 __ movl(low, high);
4795 __ sarl(high, Immediate(31));
4796 } else if (shift > 32) {
4797 DCHECK_NE(low, high);
4798 // High part becomes sign. Low part is shifted by shift - 32.
4799 __ movl(low, high);
4800 __ sarl(high, Immediate(31));
4801 __ sarl(low, Immediate(shift - 32));
4802 } else {
4803 // Between 1 and 31.
4804 __ shrd(low, high, Immediate(shift));
4805 __ sarl(high, Immediate(shift));
4806 }
4807 }
4808
GenerateShrLong(const Location & loc,Register shifter)4809 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4810 NearLabel done;
4811 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4812 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4813 __ testl(shifter, Immediate(32));
4814 __ j(kEqual, &done);
4815 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4816 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4817 __ Bind(&done);
4818 }
4819
GenerateUShrLong(const Location & loc,int shift)4820 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4821 Register low = loc.AsRegisterPairLow<Register>();
4822 Register high = loc.AsRegisterPairHigh<Register>();
4823 if (shift == 32) {
4824 // Shift by 32 is easy. Low gets high, and high gets 0.
4825 codegen_->EmitParallelMoves(
4826 loc.ToHigh(),
4827 loc.ToLow(),
4828 DataType::Type::kInt32,
4829 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4830 loc.ToHigh(),
4831 DataType::Type::kInt32);
4832 } else if (shift > 32) {
4833 // Low part is high >> (shift - 32). High part becomes 0.
4834 __ movl(low, high);
4835 __ shrl(low, Immediate(shift - 32));
4836 __ xorl(high, high);
4837 } else {
4838 // Between 1 and 31.
4839 __ shrd(low, high, Immediate(shift));
4840 __ shrl(high, Immediate(shift));
4841 }
4842 }
4843
GenerateUShrLong(const Location & loc,Register shifter)4844 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4845 NearLabel done;
4846 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4847 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4848 __ testl(shifter, Immediate(32));
4849 __ j(kEqual, &done);
4850 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4851 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4852 __ Bind(&done);
4853 }
4854
VisitRor(HRor * ror)4855 void LocationsBuilderX86::VisitRor(HRor* ror) {
4856 LocationSummary* locations =
4857 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4858
4859 switch (ror->GetResultType()) {
4860 case DataType::Type::kInt64:
4861 // Add the temporary needed.
4862 locations->AddTemp(Location::RequiresRegister());
4863 FALLTHROUGH_INTENDED;
4864 case DataType::Type::kInt32:
4865 locations->SetInAt(0, Location::RequiresRegister());
4866 // The shift count needs to be in CL (unless it is a constant).
4867 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4868 locations->SetOut(Location::SameAsFirstInput());
4869 break;
4870 default:
4871 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4872 UNREACHABLE();
4873 }
4874 }
4875
VisitRor(HRor * ror)4876 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4877 LocationSummary* locations = ror->GetLocations();
4878 Location first = locations->InAt(0);
4879 Location second = locations->InAt(1);
4880
4881 if (ror->GetResultType() == DataType::Type::kInt32) {
4882 Register first_reg = first.AsRegister<Register>();
4883 if (second.IsRegister()) {
4884 Register second_reg = second.AsRegister<Register>();
4885 __ rorl(first_reg, second_reg);
4886 } else {
4887 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4888 __ rorl(first_reg, imm);
4889 }
4890 return;
4891 }
4892
4893 DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64);
4894 Register first_reg_lo = first.AsRegisterPairLow<Register>();
4895 Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4896 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4897 if (second.IsRegister()) {
4898 Register second_reg = second.AsRegister<Register>();
4899 DCHECK_EQ(second_reg, ECX);
4900 __ movl(temp_reg, first_reg_hi);
4901 __ shrd(first_reg_hi, first_reg_lo, second_reg);
4902 __ shrd(first_reg_lo, temp_reg, second_reg);
4903 __ movl(temp_reg, first_reg_hi);
4904 __ testl(second_reg, Immediate(32));
4905 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4906 __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4907 } else {
4908 int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4909 if (shift_amt == 0) {
4910 // Already fine.
4911 return;
4912 }
4913 if (shift_amt == 32) {
4914 // Just swap.
4915 __ movl(temp_reg, first_reg_lo);
4916 __ movl(first_reg_lo, first_reg_hi);
4917 __ movl(first_reg_hi, temp_reg);
4918 return;
4919 }
4920
4921 Immediate imm(shift_amt);
4922 // Save the constents of the low value.
4923 __ movl(temp_reg, first_reg_lo);
4924
4925 // Shift right into low, feeding bits from high.
4926 __ shrd(first_reg_lo, first_reg_hi, imm);
4927
4928 // Shift right into high, feeding bits from the original low.
4929 __ shrd(first_reg_hi, temp_reg, imm);
4930
4931 // Swap if needed.
4932 if (shift_amt > 32) {
4933 __ movl(temp_reg, first_reg_lo);
4934 __ movl(first_reg_lo, first_reg_hi);
4935 __ movl(first_reg_hi, temp_reg);
4936 }
4937 }
4938 }
4939
VisitShl(HShl * shl)4940 void LocationsBuilderX86::VisitShl(HShl* shl) {
4941 HandleShift(shl);
4942 }
4943
VisitShl(HShl * shl)4944 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
4945 HandleShift(shl);
4946 }
4947
VisitShr(HShr * shr)4948 void LocationsBuilderX86::VisitShr(HShr* shr) {
4949 HandleShift(shr);
4950 }
4951
VisitShr(HShr * shr)4952 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
4953 HandleShift(shr);
4954 }
4955
VisitUShr(HUShr * ushr)4956 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
4957 HandleShift(ushr);
4958 }
4959
VisitUShr(HUShr * ushr)4960 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
4961 HandleShift(ushr);
4962 }
4963
VisitNewInstance(HNewInstance * instruction)4964 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
4965 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4966 instruction, LocationSummary::kCallOnMainOnly);
4967 locations->SetOut(Location::RegisterLocation(EAX));
4968 InvokeRuntimeCallingConvention calling_convention;
4969 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4970 }
4971
VisitNewInstance(HNewInstance * instruction)4972 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
4973 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4974 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4975 DCHECK(!codegen_->IsLeafMethod());
4976 }
4977
VisitNewArray(HNewArray * instruction)4978 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
4979 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4980 instruction, LocationSummary::kCallOnMainOnly);
4981 locations->SetOut(Location::RegisterLocation(EAX));
4982 InvokeRuntimeCallingConvention calling_convention;
4983 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4984 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4985 }
4986
VisitNewArray(HNewArray * instruction)4987 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
4988 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4989 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4990 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4991 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4992 DCHECK(!codegen_->IsLeafMethod());
4993 }
4994
VisitParameterValue(HParameterValue * instruction)4995 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
4996 LocationSummary* locations =
4997 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4998 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4999 if (location.IsStackSlot()) {
5000 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5001 } else if (location.IsDoubleStackSlot()) {
5002 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5003 }
5004 locations->SetOut(location);
5005 }
5006
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5007 void InstructionCodeGeneratorX86::VisitParameterValue(
5008 HParameterValue* instruction ATTRIBUTE_UNUSED) {
5009 }
5010
VisitCurrentMethod(HCurrentMethod * instruction)5011 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
5012 LocationSummary* locations =
5013 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5014 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5015 }
5016
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5017 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5018 }
5019
VisitClassTableGet(HClassTableGet * instruction)5020 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
5021 LocationSummary* locations =
5022 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5023 locations->SetInAt(0, Location::RequiresRegister());
5024 locations->SetOut(Location::RequiresRegister());
5025 }
5026
VisitClassTableGet(HClassTableGet * instruction)5027 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
5028 LocationSummary* locations = instruction->GetLocations();
5029 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5030 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5031 instruction->GetIndex(), kX86PointerSize).SizeValue();
5032 __ movl(locations->Out().AsRegister<Register>(),
5033 Address(locations->InAt(0).AsRegister<Register>(), method_offset));
5034 } else {
5035 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5036 instruction->GetIndex(), kX86PointerSize));
5037 __ movl(locations->Out().AsRegister<Register>(),
5038 Address(locations->InAt(0).AsRegister<Register>(),
5039 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
5040 // temp = temp->GetImtEntryAt(method_offset);
5041 __ movl(locations->Out().AsRegister<Register>(),
5042 Address(locations->Out().AsRegister<Register>(), method_offset));
5043 }
5044 }
5045
VisitNot(HNot * not_)5046 void LocationsBuilderX86::VisitNot(HNot* not_) {
5047 LocationSummary* locations =
5048 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5049 locations->SetInAt(0, Location::RequiresRegister());
5050 locations->SetOut(Location::SameAsFirstInput());
5051 }
5052
VisitNot(HNot * not_)5053 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
5054 LocationSummary* locations = not_->GetLocations();
5055 Location in = locations->InAt(0);
5056 Location out = locations->Out();
5057 DCHECK(in.Equals(out));
5058 switch (not_->GetResultType()) {
5059 case DataType::Type::kInt32:
5060 __ notl(out.AsRegister<Register>());
5061 break;
5062
5063 case DataType::Type::kInt64:
5064 __ notl(out.AsRegisterPairLow<Register>());
5065 __ notl(out.AsRegisterPairHigh<Register>());
5066 break;
5067
5068 default:
5069 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5070 }
5071 }
5072
VisitBooleanNot(HBooleanNot * bool_not)5073 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
5074 LocationSummary* locations =
5075 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5076 locations->SetInAt(0, Location::RequiresRegister());
5077 locations->SetOut(Location::SameAsFirstInput());
5078 }
5079
VisitBooleanNot(HBooleanNot * bool_not)5080 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
5081 LocationSummary* locations = bool_not->GetLocations();
5082 Location in = locations->InAt(0);
5083 Location out = locations->Out();
5084 DCHECK(in.Equals(out));
5085 __ xorl(out.AsRegister<Register>(), Immediate(1));
5086 }
5087
VisitCompare(HCompare * compare)5088 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
5089 LocationSummary* locations =
5090 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5091 switch (compare->InputAt(0)->GetType()) {
5092 case DataType::Type::kBool:
5093 case DataType::Type::kUint8:
5094 case DataType::Type::kInt8:
5095 case DataType::Type::kUint16:
5096 case DataType::Type::kInt16:
5097 case DataType::Type::kInt32:
5098 case DataType::Type::kInt64: {
5099 locations->SetInAt(0, Location::RequiresRegister());
5100 locations->SetInAt(1, Location::Any());
5101 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5102 break;
5103 }
5104 case DataType::Type::kFloat32:
5105 case DataType::Type::kFloat64: {
5106 locations->SetInAt(0, Location::RequiresFpuRegister());
5107 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
5108 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
5109 } else if (compare->InputAt(1)->IsConstant()) {
5110 locations->SetInAt(1, Location::RequiresFpuRegister());
5111 } else {
5112 locations->SetInAt(1, Location::Any());
5113 }
5114 locations->SetOut(Location::RequiresRegister());
5115 break;
5116 }
5117 default:
5118 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5119 }
5120 }
5121
VisitCompare(HCompare * compare)5122 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
5123 LocationSummary* locations = compare->GetLocations();
5124 Register out = locations->Out().AsRegister<Register>();
5125 Location left = locations->InAt(0);
5126 Location right = locations->InAt(1);
5127
5128 NearLabel less, greater, done;
5129 Condition less_cond = kLess;
5130
5131 switch (compare->InputAt(0)->GetType()) {
5132 case DataType::Type::kBool:
5133 case DataType::Type::kUint8:
5134 case DataType::Type::kInt8:
5135 case DataType::Type::kUint16:
5136 case DataType::Type::kInt16:
5137 case DataType::Type::kInt32: {
5138 codegen_->GenerateIntCompare(left, right);
5139 break;
5140 }
5141 case DataType::Type::kInt64: {
5142 Register left_low = left.AsRegisterPairLow<Register>();
5143 Register left_high = left.AsRegisterPairHigh<Register>();
5144 int32_t val_low = 0;
5145 int32_t val_high = 0;
5146 bool right_is_const = false;
5147
5148 if (right.IsConstant()) {
5149 DCHECK(right.GetConstant()->IsLongConstant());
5150 right_is_const = true;
5151 int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
5152 val_low = Low32Bits(val);
5153 val_high = High32Bits(val);
5154 }
5155
5156 if (right.IsRegisterPair()) {
5157 __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
5158 } else if (right.IsDoubleStackSlot()) {
5159 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
5160 } else {
5161 DCHECK(right_is_const) << right;
5162 codegen_->Compare32BitValue(left_high, val_high);
5163 }
5164 __ j(kLess, &less); // Signed compare.
5165 __ j(kGreater, &greater); // Signed compare.
5166 if (right.IsRegisterPair()) {
5167 __ cmpl(left_low, right.AsRegisterPairLow<Register>());
5168 } else if (right.IsDoubleStackSlot()) {
5169 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
5170 } else {
5171 DCHECK(right_is_const) << right;
5172 codegen_->Compare32BitValue(left_low, val_low);
5173 }
5174 less_cond = kBelow; // for CF (unsigned).
5175 break;
5176 }
5177 case DataType::Type::kFloat32: {
5178 GenerateFPCompare(left, right, compare, false);
5179 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5180 less_cond = kBelow; // for CF (floats).
5181 break;
5182 }
5183 case DataType::Type::kFloat64: {
5184 GenerateFPCompare(left, right, compare, true);
5185 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5186 less_cond = kBelow; // for CF (floats).
5187 break;
5188 }
5189 default:
5190 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5191 }
5192
5193 __ movl(out, Immediate(0));
5194 __ j(kEqual, &done);
5195 __ j(less_cond, &less);
5196
5197 __ Bind(&greater);
5198 __ movl(out, Immediate(1));
5199 __ jmp(&done);
5200
5201 __ Bind(&less);
5202 __ movl(out, Immediate(-1));
5203
5204 __ Bind(&done);
5205 }
5206
VisitPhi(HPhi * instruction)5207 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
5208 LocationSummary* locations =
5209 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5210 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5211 locations->SetInAt(i, Location::Any());
5212 }
5213 locations->SetOut(Location::Any());
5214 }
5215
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5216 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5217 LOG(FATAL) << "Unreachable";
5218 }
5219
GenerateMemoryBarrier(MemBarrierKind kind)5220 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
5221 /*
5222 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
5223 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
5224 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5225 */
5226 switch (kind) {
5227 case MemBarrierKind::kAnyAny: {
5228 MemoryFence();
5229 break;
5230 }
5231 case MemBarrierKind::kAnyStore:
5232 case MemBarrierKind::kLoadAny:
5233 case MemBarrierKind::kStoreStore: {
5234 // nop
5235 break;
5236 }
5237 case MemBarrierKind::kNTStoreStore:
5238 // Non-Temporal Store/Store needs an explicit fence.
5239 MemoryFence(/* non-temporal= */ true);
5240 break;
5241 }
5242 }
5243
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)5244 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
5245 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
5246 ArtMethod* method ATTRIBUTE_UNUSED) {
5247 return desired_dispatch_info;
5248 }
5249
GetInvokeExtraParameter(HInvoke * invoke,Register temp)5250 Register CodeGeneratorX86::GetInvokeExtraParameter(HInvoke* invoke, Register temp) {
5251 if (invoke->IsInvokeStaticOrDirect()) {
5252 return GetInvokeStaticOrDirectExtraParameter(invoke->AsInvokeStaticOrDirect(), temp);
5253 }
5254 DCHECK(invoke->IsInvokeInterface());
5255 Location location =
5256 invoke->GetLocations()->InAt(invoke->AsInvokeInterface()->GetSpecialInputIndex());
5257 return location.AsRegister<Register>();
5258 }
5259
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)5260 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
5261 Register temp) {
5262 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
5263 if (!invoke->GetLocations()->Intrinsified()) {
5264 return location.AsRegister<Register>();
5265 }
5266 // For intrinsics we allow any location, so it may be on the stack.
5267 if (!location.IsRegister()) {
5268 __ movl(temp, Address(ESP, location.GetStackIndex()));
5269 return temp;
5270 }
5271 // For register locations, check if the register was saved. If so, get it from the stack.
5272 // Note: There is a chance that the register was saved but not overwritten, so we could
5273 // save one load. However, since this is just an intrinsic slow path we prefer this
5274 // simple and more robust approach rather that trying to determine if that's the case.
5275 SlowPathCode* slow_path = GetCurrentSlowPath();
5276 DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
5277 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
5278 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
5279 __ movl(temp, Address(ESP, stack_offset));
5280 return temp;
5281 }
5282 return location.AsRegister<Register>();
5283 }
5284
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)5285 void CodeGeneratorX86::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
5286 switch (load_kind) {
5287 case MethodLoadKind::kBootImageLinkTimePcRelative: {
5288 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5289 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5290 __ leal(temp.AsRegister<Register>(),
5291 Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5292 RecordBootImageMethodPatch(invoke);
5293 break;
5294 }
5295 case MethodLoadKind::kBootImageRelRo: {
5296 size_t index = invoke->IsInvokeInterface()
5297 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5298 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5299 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5300 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5301 RecordBootImageRelRoPatch(
5302 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress(),
5303 GetBootImageOffset(invoke));
5304 break;
5305 }
5306 case MethodLoadKind::kBssEntry: {
5307 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5308 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5309 RecordMethodBssEntryPatch(invoke);
5310 // No need for memory fence, thanks to the x86 memory model.
5311 break;
5312 }
5313 case MethodLoadKind::kJitDirectAddress: {
5314 __ movl(temp.AsRegister<Register>(),
5315 Immediate(reinterpret_cast32<uint32_t>(invoke->GetResolvedMethod())));
5316 break;
5317 }
5318 case MethodLoadKind::kRuntimeCall: {
5319 // Test situation, don't do anything.
5320 break;
5321 }
5322 default: {
5323 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
5324 UNREACHABLE();
5325 }
5326 }
5327 }
5328
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)5329 void CodeGeneratorX86::GenerateStaticOrDirectCall(
5330 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
5331 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
5332 switch (invoke->GetMethodLoadKind()) {
5333 case MethodLoadKind::kStringInit: {
5334 // temp = thread->string_init_entrypoint
5335 uint32_t offset =
5336 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
5337 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
5338 break;
5339 }
5340 case MethodLoadKind::kRecursive: {
5341 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
5342 break;
5343 }
5344 case MethodLoadKind::kRuntimeCall: {
5345 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5346 return; // No code pointer retrieval; the runtime performs the call directly.
5347 }
5348 case MethodLoadKind::kBootImageLinkTimePcRelative:
5349 // For kCallCriticalNative we skip loading the method and do the call directly.
5350 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
5351 break;
5352 }
5353 FALLTHROUGH_INTENDED;
5354 default: {
5355 LoadMethod(invoke->GetMethodLoadKind(), callee_method, invoke);
5356 }
5357 }
5358
5359 switch (invoke->GetCodePtrLocation()) {
5360 case CodePtrLocation::kCallSelf:
5361 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
5362 __ call(GetFrameEntryLabel());
5363 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5364 break;
5365 case CodePtrLocation::kCallCriticalNative: {
5366 size_t out_frame_size =
5367 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
5368 kNativeStackAlignment,
5369 GetCriticalNativeDirectCallFrameSize>(invoke);
5370 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
5371 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5372 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5373 __ call(Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5374 RecordBootImageJniEntrypointPatch(invoke);
5375 } else {
5376 // (callee_method + offset_of_jni_entry_point)()
5377 __ call(Address(callee_method.AsRegister<Register>(),
5378 ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
5379 }
5380 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5381 if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
5382 // Create space for conversion.
5383 out_frame_size = 8u;
5384 IncreaseFrame(out_frame_size);
5385 }
5386 // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
5387 switch (invoke->GetType()) {
5388 case DataType::Type::kBool:
5389 __ movzxb(EAX, AL);
5390 break;
5391 case DataType::Type::kInt8:
5392 __ movsxb(EAX, AL);
5393 break;
5394 case DataType::Type::kUint16:
5395 __ movzxw(EAX, EAX);
5396 break;
5397 case DataType::Type::kInt16:
5398 __ movsxw(EAX, EAX);
5399 break;
5400 case DataType::Type::kFloat32:
5401 __ fstps(Address(ESP, 0));
5402 __ movss(XMM0, Address(ESP, 0));
5403 break;
5404 case DataType::Type::kFloat64:
5405 __ fstpl(Address(ESP, 0));
5406 __ movsd(XMM0, Address(ESP, 0));
5407 break;
5408 case DataType::Type::kInt32:
5409 case DataType::Type::kInt64:
5410 case DataType::Type::kVoid:
5411 break;
5412 default:
5413 DCHECK(false) << invoke->GetType();
5414 break;
5415 }
5416 if (out_frame_size != 0u) {
5417 DecreaseFrame(out_frame_size);
5418 }
5419 break;
5420 }
5421 case CodePtrLocation::kCallArtMethod:
5422 // (callee_method + offset_of_quick_compiled_code)()
5423 __ call(Address(callee_method.AsRegister<Register>(),
5424 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5425 kX86PointerSize).Int32Value()));
5426 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5427 break;
5428 }
5429
5430 DCHECK(!IsLeafMethod());
5431 }
5432
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5433 void CodeGeneratorX86::GenerateVirtualCall(
5434 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5435 Register temp = temp_in.AsRegister<Register>();
5436 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5437 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5438
5439 // Use the calling convention instead of the location of the receiver, as
5440 // intrinsics may have put the receiver in a different register. In the intrinsics
5441 // slow path, the arguments have been moved to the right place, so here we are
5442 // guaranteed that the receiver is the first register of the calling convention.
5443 InvokeDexCallingConvention calling_convention;
5444 Register receiver = calling_convention.GetRegisterAt(0);
5445 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5446 // /* HeapReference<Class> */ temp = receiver->klass_
5447 __ movl(temp, Address(receiver, class_offset));
5448 MaybeRecordImplicitNullCheck(invoke);
5449 // Instead of simply (possibly) unpoisoning `temp` here, we should
5450 // emit a read barrier for the previous class reference load.
5451 // However this is not required in practice, as this is an
5452 // intermediate/temporary reference and because the current
5453 // concurrent copying collector keeps the from-space memory
5454 // intact/accessible until the end of the marking phase (the
5455 // concurrent copying collector may not in the future).
5456 __ MaybeUnpoisonHeapReference(temp);
5457
5458 MaybeGenerateInlineCacheCheck(invoke, temp);
5459
5460 // temp = temp->GetMethodAt(method_offset);
5461 __ movl(temp, Address(temp, method_offset));
5462 // call temp->GetEntryPoint();
5463 __ call(Address(
5464 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5465 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5466 }
5467
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5468 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5469 uint32_t intrinsic_data) {
5470 boot_image_other_patches_.emplace_back(
5471 method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5472 __ Bind(&boot_image_other_patches_.back().label);
5473 }
5474
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5475 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5476 uint32_t boot_image_offset) {
5477 boot_image_other_patches_.emplace_back(
5478 method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5479 __ Bind(&boot_image_other_patches_.back().label);
5480 }
5481
RecordBootImageMethodPatch(HInvoke * invoke)5482 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvoke* invoke) {
5483 size_t index = invoke->IsInvokeInterface()
5484 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5485 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5486 HX86ComputeBaseMethodAddress* method_address =
5487 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5488 boot_image_method_patches_.emplace_back(
5489 method_address,
5490 invoke->GetResolvedMethodReference().dex_file,
5491 invoke->GetResolvedMethodReference().index);
5492 __ Bind(&boot_image_method_patches_.back().label);
5493 }
5494
RecordMethodBssEntryPatch(HInvoke * invoke)5495 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvoke* invoke) {
5496 size_t index = invoke->IsInvokeInterface()
5497 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5498 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5499 DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
5500 GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
5501 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
5502 invoke->GetMethodReference().dex_file));
5503 HX86ComputeBaseMethodAddress* method_address =
5504 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5505 // Add the patch entry and bind its label at the end of the instruction.
5506 method_bss_entry_patches_.emplace_back(
5507 method_address,
5508 invoke->GetMethodReference().dex_file,
5509 invoke->GetMethodReference().index);
5510 __ Bind(&method_bss_entry_patches_.back().label);
5511 }
5512
RecordBootImageTypePatch(HLoadClass * load_class)5513 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5514 HX86ComputeBaseMethodAddress* method_address =
5515 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5516 boot_image_type_patches_.emplace_back(
5517 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5518 __ Bind(&boot_image_type_patches_.back().label);
5519 }
5520
NewTypeBssEntryPatch(HLoadClass * load_class)5521 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5522 HX86ComputeBaseMethodAddress* method_address =
5523 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5524 ArenaDeque<X86PcRelativePatchInfo>* patches = nullptr;
5525 switch (load_class->GetLoadKind()) {
5526 case HLoadClass::LoadKind::kBssEntry:
5527 patches = &type_bss_entry_patches_;
5528 break;
5529 case HLoadClass::LoadKind::kBssEntryPublic:
5530 patches = &public_type_bss_entry_patches_;
5531 break;
5532 case HLoadClass::LoadKind::kBssEntryPackage:
5533 patches = &package_type_bss_entry_patches_;
5534 break;
5535 default:
5536 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5537 UNREACHABLE();
5538 }
5539 patches->emplace_back(
5540 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5541 return &patches->back().label;
5542 }
5543
RecordBootImageStringPatch(HLoadString * load_string)5544 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5545 HX86ComputeBaseMethodAddress* method_address =
5546 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5547 boot_image_string_patches_.emplace_back(
5548 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5549 __ Bind(&boot_image_string_patches_.back().label);
5550 }
5551
NewStringBssEntryPatch(HLoadString * load_string)5552 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5553 HX86ComputeBaseMethodAddress* method_address =
5554 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5555 string_bss_entry_patches_.emplace_back(
5556 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5557 return &string_bss_entry_patches_.back().label;
5558 }
5559
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)5560 void CodeGeneratorX86::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
5561 HX86ComputeBaseMethodAddress* method_address =
5562 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5563 boot_image_jni_entrypoint_patches_.emplace_back(
5564 method_address,
5565 invoke->GetResolvedMethodReference().dex_file,
5566 invoke->GetResolvedMethodReference().index);
5567 __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
5568 }
5569
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5570 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5571 uint32_t boot_image_reference,
5572 HInvokeStaticOrDirect* invoke) {
5573 if (GetCompilerOptions().IsBootImage()) {
5574 HX86ComputeBaseMethodAddress* method_address =
5575 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5576 DCHECK(method_address != nullptr);
5577 Register method_address_reg =
5578 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5579 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5580 RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5581 } else if (GetCompilerOptions().GetCompilePic()) {
5582 HX86ComputeBaseMethodAddress* method_address =
5583 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5584 DCHECK(method_address != nullptr);
5585 Register method_address_reg =
5586 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5587 __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5588 RecordBootImageRelRoPatch(method_address, boot_image_reference);
5589 } else {
5590 DCHECK(GetCompilerOptions().IsJitCompiler());
5591 gc::Heap* heap = Runtime::Current()->GetHeap();
5592 DCHECK(!heap->GetBootImageSpaces().empty());
5593 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5594 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5595 }
5596 }
5597
LoadIntrinsicDeclaringClass(Register reg,HInvokeStaticOrDirect * invoke)5598 void CodeGeneratorX86::LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke) {
5599 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5600 if (GetCompilerOptions().IsBootImage()) {
5601 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5602 HX86ComputeBaseMethodAddress* method_address =
5603 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5604 DCHECK(method_address != nullptr);
5605 Register method_address_reg =
5606 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5607 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5608 MethodReference target_method = invoke->GetResolvedMethodReference();
5609 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5610 boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5611 __ Bind(&boot_image_type_patches_.back().label);
5612 } else {
5613 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5614 LoadBootImageAddress(reg, boot_image_offset, invoke);
5615 }
5616 }
5617
5618 // The label points to the end of the "movl" or another instruction but the literal offset
5619 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5620 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5621
5622 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5623 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5624 const ArenaDeque<X86PcRelativePatchInfo>& infos,
5625 ArenaVector<linker::LinkerPatch>* linker_patches) {
5626 for (const X86PcRelativePatchInfo& info : infos) {
5627 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5628 linker_patches->push_back(Factory(literal_offset,
5629 info.target_dex_file,
5630 GetMethodAddressOffset(info.method_address),
5631 info.offset_or_index));
5632 }
5633 }
5634
5635 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5636 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5637 const DexFile* target_dex_file,
5638 uint32_t pc_insn_offset,
5639 uint32_t boot_image_offset) {
5640 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
5641 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5642 }
5643
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5644 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5645 DCHECK(linker_patches->empty());
5646 size_t size =
5647 boot_image_method_patches_.size() +
5648 method_bss_entry_patches_.size() +
5649 boot_image_type_patches_.size() +
5650 type_bss_entry_patches_.size() +
5651 public_type_bss_entry_patches_.size() +
5652 package_type_bss_entry_patches_.size() +
5653 boot_image_string_patches_.size() +
5654 string_bss_entry_patches_.size() +
5655 boot_image_jni_entrypoint_patches_.size() +
5656 boot_image_other_patches_.size();
5657 linker_patches->reserve(size);
5658 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5659 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5660 boot_image_method_patches_, linker_patches);
5661 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5662 boot_image_type_patches_, linker_patches);
5663 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5664 boot_image_string_patches_, linker_patches);
5665 } else {
5666 DCHECK(boot_image_method_patches_.empty());
5667 DCHECK(boot_image_type_patches_.empty());
5668 DCHECK(boot_image_string_patches_.empty());
5669 }
5670 if (GetCompilerOptions().IsBootImage()) {
5671 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5672 boot_image_other_patches_, linker_patches);
5673 } else {
5674 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
5675 boot_image_other_patches_, linker_patches);
5676 }
5677 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5678 method_bss_entry_patches_, linker_patches);
5679 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5680 type_bss_entry_patches_, linker_patches);
5681 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5682 public_type_bss_entry_patches_, linker_patches);
5683 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5684 package_type_bss_entry_patches_, linker_patches);
5685 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5686 string_bss_entry_patches_, linker_patches);
5687 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5688 boot_image_jni_entrypoint_patches_, linker_patches);
5689 DCHECK_EQ(size, linker_patches->size());
5690 }
5691
MarkGCCard(Register temp,Register card,Register object,Register value,bool value_can_be_null)5692 void CodeGeneratorX86::MarkGCCard(Register temp,
5693 Register card,
5694 Register object,
5695 Register value,
5696 bool value_can_be_null) {
5697 NearLabel is_null;
5698 if (value_can_be_null) {
5699 __ testl(value, value);
5700 __ j(kEqual, &is_null);
5701 }
5702 // Load the address of the card table into `card`.
5703 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5704 // Calculate the offset (in the card table) of the card corresponding to
5705 // `object`.
5706 __ movl(temp, object);
5707 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5708 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5709 // `object`'s card.
5710 //
5711 // Register `card` contains the address of the card table. Note that the card
5712 // table's base is biased during its creation so that it always starts at an
5713 // address whose least-significant byte is equal to `kCardDirty` (see
5714 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5715 // below writes the `kCardDirty` (byte) value into the `object`'s card
5716 // (located at `card + object >> kCardShift`).
5717 //
5718 // This dual use of the value in register `card` (1. to calculate the location
5719 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5720 // (no need to explicitly load `kCardDirty` as an immediate value).
5721 __ movb(Address(temp, card, TIMES_1, 0),
5722 X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
5723 if (value_can_be_null) {
5724 __ Bind(&is_null);
5725 }
5726 }
5727
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5728 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
5729 DCHECK(instruction->IsInstanceFieldGet() ||
5730 instruction->IsStaticFieldGet() ||
5731 instruction->IsPredicatedInstanceFieldGet());
5732
5733 bool object_field_get_with_read_barrier =
5734 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5735 bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
5736 LocationSummary* locations =
5737 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5738 kEmitCompilerReadBarrier
5739 ? LocationSummary::kCallOnSlowPath
5740 : LocationSummary::kNoCall);
5741 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5742 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5743 }
5744 // receiver_input
5745 locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
5746 if (is_predicated) {
5747 if (DataType::IsFloatingPointType(instruction->GetType())) {
5748 locations->SetInAt(0, Location::RequiresFpuRegister());
5749 } else {
5750 locations->SetInAt(0, Location::RequiresRegister());
5751 }
5752 }
5753 if (DataType::IsFloatingPointType(instruction->GetType())) {
5754 locations->SetOut(is_predicated ? Location::SameAsFirstInput()
5755 : Location::RequiresFpuRegister());
5756 } else {
5757 // The output overlaps in case of long: we don't want the low move
5758 // to overwrite the object's location. Likewise, in the case of
5759 // an object field get with read barriers enabled, we do not want
5760 // the move to overwrite the object's location, as we need it to emit
5761 // the read barrier.
5762 locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(),
5763 (object_field_get_with_read_barrier ||
5764 instruction->GetType() == DataType::Type::kInt64 ||
5765 is_predicated)
5766 ? Location::kOutputOverlap
5767 : Location::kNoOutputOverlap);
5768 }
5769
5770 if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
5771 // Long values can be loaded atomically into an XMM using movsd.
5772 // So we use an XMM register as a temp to achieve atomicity (first
5773 // load the temp into the XMM and then copy the XMM into the
5774 // output, 32 bits at a time).
5775 locations->AddTemp(Location::RequiresFpuRegister());
5776 }
5777 }
5778
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5779 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
5780 const FieldInfo& field_info) {
5781 DCHECK(instruction->IsInstanceFieldGet() ||
5782 instruction->IsStaticFieldGet() ||
5783 instruction->IsPredicatedInstanceFieldGet());
5784
5785 LocationSummary* locations = instruction->GetLocations();
5786 Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
5787 Register base = base_loc.AsRegister<Register>();
5788 Location out = locations->Out();
5789 bool is_volatile = field_info.IsVolatile();
5790 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5791 DataType::Type load_type = instruction->GetType();
5792 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5793
5794 if (load_type == DataType::Type::kReference) {
5795 // /* HeapReference<Object> */ out = *(base + offset)
5796 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5797 // Note that a potential implicit null check is handled in this
5798 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
5799 codegen_->GenerateFieldLoadWithBakerReadBarrier(
5800 instruction, out, base, offset, /* needs_null_check= */ true);
5801 if (is_volatile) {
5802 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5803 }
5804 } else {
5805 __ movl(out.AsRegister<Register>(), Address(base, offset));
5806 codegen_->MaybeRecordImplicitNullCheck(instruction);
5807 if (is_volatile) {
5808 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5809 }
5810 // If read barriers are enabled, emit read barriers other than
5811 // Baker's using a slow path (and also unpoison the loaded
5812 // reference, if heap poisoning is enabled).
5813 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5814 }
5815 } else {
5816 Address src(base, offset);
5817 XmmRegister temp = (load_type == DataType::Type::kInt64 && is_volatile)
5818 ? locations->GetTemp(0).AsFpuRegister<XmmRegister>()
5819 : kNoXmmRegister;
5820 codegen_->LoadFromMemoryNoBarrier(load_type, out, src, instruction, temp, is_volatile);
5821 if (is_volatile) {
5822 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5823 }
5824 }
5825 }
5826
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5827 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
5828 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5829
5830 LocationSummary* locations =
5831 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5832 locations->SetInAt(0, Location::RequiresRegister());
5833 bool is_volatile = field_info.IsVolatile();
5834 DataType::Type field_type = field_info.GetFieldType();
5835 bool is_byte_type = DataType::Size(field_type) == 1u;
5836
5837 // The register allocator does not support multiple
5838 // inputs that die at entry with one in a specific register.
5839 if (is_byte_type) {
5840 // Ensure the value is in a byte register.
5841 locations->SetInAt(1, Location::RegisterLocation(EAX));
5842 } else if (DataType::IsFloatingPointType(field_type)) {
5843 if (is_volatile && field_type == DataType::Type::kFloat64) {
5844 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5845 locations->SetInAt(1, Location::RequiresFpuRegister());
5846 } else {
5847 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5848 }
5849 } else if (is_volatile && field_type == DataType::Type::kInt64) {
5850 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5851 locations->SetInAt(1, Location::RequiresRegister());
5852
5853 // 64bits value can be atomically written to an address with movsd and an XMM register.
5854 // We need two XMM registers because there's no easier way to (bit) copy a register pair
5855 // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
5856 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
5857 // isolated cases when we need this it isn't worth adding the extra complexity.
5858 locations->AddTemp(Location::RequiresFpuRegister());
5859 locations->AddTemp(Location::RequiresFpuRegister());
5860 } else {
5861 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5862
5863 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5864 // Temporary registers for the write barrier.
5865 locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too.
5866 // Ensure the card is in a byte register.
5867 locations->AddTemp(Location::RegisterLocation(ECX));
5868 }
5869 }
5870 }
5871
HandleFieldSet(HInstruction * instruction,uint32_t value_index,DataType::Type field_type,Address field_addr,Register base,bool is_volatile,bool value_can_be_null)5872 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
5873 uint32_t value_index,
5874 DataType::Type field_type,
5875 Address field_addr,
5876 Register base,
5877 bool is_volatile,
5878 bool value_can_be_null) {
5879 LocationSummary* locations = instruction->GetLocations();
5880 Location value = locations->InAt(value_index);
5881 bool needs_write_barrier =
5882 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index));
5883
5884 if (is_volatile) {
5885 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5886 }
5887
5888 bool maybe_record_implicit_null_check_done = false;
5889
5890 switch (field_type) {
5891 case DataType::Type::kBool:
5892 case DataType::Type::kUint8:
5893 case DataType::Type::kInt8: {
5894 if (value.IsConstant()) {
5895 __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5896 } else {
5897 __ movb(field_addr, value.AsRegister<ByteRegister>());
5898 }
5899 break;
5900 }
5901
5902 case DataType::Type::kUint16:
5903 case DataType::Type::kInt16: {
5904 if (value.IsConstant()) {
5905 __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5906 } else {
5907 __ movw(field_addr, value.AsRegister<Register>());
5908 }
5909 break;
5910 }
5911
5912 case DataType::Type::kInt32:
5913 case DataType::Type::kReference: {
5914 if (kPoisonHeapReferences && needs_write_barrier) {
5915 // Note that in the case where `value` is a null reference,
5916 // we do not enter this block, as the reference does not
5917 // need poisoning.
5918 DCHECK_EQ(field_type, DataType::Type::kReference);
5919 Register temp = locations->GetTemp(0).AsRegister<Register>();
5920 __ movl(temp, value.AsRegister<Register>());
5921 __ PoisonHeapReference(temp);
5922 __ movl(field_addr, temp);
5923 } else if (value.IsConstant()) {
5924 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5925 __ movl(field_addr, Immediate(v));
5926 } else {
5927 DCHECK(value.IsRegister()) << value;
5928 __ movl(field_addr, value.AsRegister<Register>());
5929 }
5930 break;
5931 }
5932
5933 case DataType::Type::kInt64: {
5934 if (is_volatile) {
5935 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5936 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
5937 __ movd(temp1, value.AsRegisterPairLow<Register>());
5938 __ movd(temp2, value.AsRegisterPairHigh<Register>());
5939 __ punpckldq(temp1, temp2);
5940 __ movsd(field_addr, temp1);
5941 codegen_->MaybeRecordImplicitNullCheck(instruction);
5942 } else if (value.IsConstant()) {
5943 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5944 __ movl(field_addr, Immediate(Low32Bits(v)));
5945 codegen_->MaybeRecordImplicitNullCheck(instruction);
5946 __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
5947 } else {
5948 __ movl(field_addr, value.AsRegisterPairLow<Register>());
5949 codegen_->MaybeRecordImplicitNullCheck(instruction);
5950 __ movl(Address::displace(field_addr, kX86WordSize), value.AsRegisterPairHigh<Register>());
5951 }
5952 maybe_record_implicit_null_check_done = true;
5953 break;
5954 }
5955
5956 case DataType::Type::kFloat32: {
5957 if (value.IsConstant()) {
5958 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5959 __ movl(field_addr, Immediate(v));
5960 } else {
5961 __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
5962 }
5963 break;
5964 }
5965
5966 case DataType::Type::kFloat64: {
5967 if (value.IsConstant()) {
5968 DCHECK(!is_volatile);
5969 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5970 __ movl(field_addr, Immediate(Low32Bits(v)));
5971 codegen_->MaybeRecordImplicitNullCheck(instruction);
5972 __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
5973 maybe_record_implicit_null_check_done = true;
5974 } else {
5975 __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
5976 }
5977 break;
5978 }
5979
5980 case DataType::Type::kUint32:
5981 case DataType::Type::kUint64:
5982 case DataType::Type::kVoid:
5983 LOG(FATAL) << "Unreachable type " << field_type;
5984 UNREACHABLE();
5985 }
5986
5987 if (!maybe_record_implicit_null_check_done) {
5988 codegen_->MaybeRecordImplicitNullCheck(instruction);
5989 }
5990
5991 if (needs_write_barrier) {
5992 Register temp = locations->GetTemp(0).AsRegister<Register>();
5993 Register card = locations->GetTemp(1).AsRegister<Register>();
5994 codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
5995 }
5996
5997 if (is_volatile) {
5998 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5999 }
6000 }
6001
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)6002 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
6003 const FieldInfo& field_info,
6004 bool value_can_be_null) {
6005 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6006
6007 LocationSummary* locations = instruction->GetLocations();
6008 Register base = locations->InAt(0).AsRegister<Register>();
6009 bool is_volatile = field_info.IsVolatile();
6010 DataType::Type field_type = field_info.GetFieldType();
6011 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6012 bool is_predicated =
6013 instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
6014
6015 Address field_addr(base, offset);
6016
6017 NearLabel pred_is_null;
6018 if (is_predicated) {
6019 __ testl(base, base);
6020 __ j(kEqual, &pred_is_null);
6021 }
6022
6023 HandleFieldSet(instruction,
6024 /* value_index= */ 1,
6025 field_type,
6026 field_addr,
6027 base,
6028 is_volatile,
6029 value_can_be_null);
6030
6031 if (is_predicated) {
6032 __ Bind(&pred_is_null);
6033 }
6034 }
6035
VisitStaticFieldGet(HStaticFieldGet * instruction)6036 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6037 HandleFieldGet(instruction, instruction->GetFieldInfo());
6038 }
6039
VisitStaticFieldGet(HStaticFieldGet * instruction)6040 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6041 HandleFieldGet(instruction, instruction->GetFieldInfo());
6042 }
6043
VisitStaticFieldSet(HStaticFieldSet * instruction)6044 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6045 HandleFieldSet(instruction, instruction->GetFieldInfo());
6046 }
6047
VisitStaticFieldSet(HStaticFieldSet * instruction)6048 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6049 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
6050 }
6051
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6052 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6053 HandleFieldSet(instruction, instruction->GetFieldInfo());
6054 }
6055
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6056 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6057 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
6058 }
6059
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6060 void LocationsBuilderX86::VisitPredicatedInstanceFieldGet(
6061 HPredicatedInstanceFieldGet* instruction) {
6062 HandleFieldGet(instruction, instruction->GetFieldInfo());
6063 }
6064
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6065 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6066 HandleFieldGet(instruction, instruction->GetFieldInfo());
6067 }
6068
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6069 void InstructionCodeGeneratorX86::VisitPredicatedInstanceFieldGet(
6070 HPredicatedInstanceFieldGet* instruction) {
6071 NearLabel finish;
6072 LocationSummary* locations = instruction->GetLocations();
6073 Register recv = locations->InAt(1).AsRegister<Register>();
6074 __ testl(recv, recv);
6075 __ j(kZero, &finish);
6076 HandleFieldGet(instruction, instruction->GetFieldInfo());
6077 __ Bind(&finish);
6078 }
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6079 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6080 HandleFieldGet(instruction, instruction->GetFieldInfo());
6081 }
6082
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6083 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6084 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
6085 }
6086
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6087 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6088 __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
6089 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6090 }
6091
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6092 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
6093 HUnresolvedInstanceFieldGet* instruction) {
6094 FieldAccessCallingConventionX86 calling_convention;
6095 codegen_->CreateUnresolvedFieldLocationSummary(
6096 instruction, instruction->GetFieldType(), calling_convention);
6097 }
6098
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6099 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
6100 HUnresolvedInstanceFieldGet* instruction) {
6101 FieldAccessCallingConventionX86 calling_convention;
6102 codegen_->GenerateUnresolvedFieldAccess(instruction,
6103 instruction->GetFieldType(),
6104 instruction->GetFieldIndex(),
6105 instruction->GetDexPc(),
6106 calling_convention);
6107 }
6108
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6109 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
6110 HUnresolvedInstanceFieldSet* instruction) {
6111 FieldAccessCallingConventionX86 calling_convention;
6112 codegen_->CreateUnresolvedFieldLocationSummary(
6113 instruction, instruction->GetFieldType(), calling_convention);
6114 }
6115
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6116 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
6117 HUnresolvedInstanceFieldSet* instruction) {
6118 FieldAccessCallingConventionX86 calling_convention;
6119 codegen_->GenerateUnresolvedFieldAccess(instruction,
6120 instruction->GetFieldType(),
6121 instruction->GetFieldIndex(),
6122 instruction->GetDexPc(),
6123 calling_convention);
6124 }
6125
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6126 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
6127 HUnresolvedStaticFieldGet* instruction) {
6128 FieldAccessCallingConventionX86 calling_convention;
6129 codegen_->CreateUnresolvedFieldLocationSummary(
6130 instruction, instruction->GetFieldType(), calling_convention);
6131 }
6132
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6133 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
6134 HUnresolvedStaticFieldGet* instruction) {
6135 FieldAccessCallingConventionX86 calling_convention;
6136 codegen_->GenerateUnresolvedFieldAccess(instruction,
6137 instruction->GetFieldType(),
6138 instruction->GetFieldIndex(),
6139 instruction->GetDexPc(),
6140 calling_convention);
6141 }
6142
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6143 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
6144 HUnresolvedStaticFieldSet* instruction) {
6145 FieldAccessCallingConventionX86 calling_convention;
6146 codegen_->CreateUnresolvedFieldLocationSummary(
6147 instruction, instruction->GetFieldType(), calling_convention);
6148 }
6149
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6150 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
6151 HUnresolvedStaticFieldSet* instruction) {
6152 FieldAccessCallingConventionX86 calling_convention;
6153 codegen_->GenerateUnresolvedFieldAccess(instruction,
6154 instruction->GetFieldType(),
6155 instruction->GetFieldIndex(),
6156 instruction->GetDexPc(),
6157 calling_convention);
6158 }
6159
VisitNullCheck(HNullCheck * instruction)6160 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
6161 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6162 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
6163 ? Location::RequiresRegister()
6164 : Location::Any();
6165 locations->SetInAt(0, loc);
6166 }
6167
GenerateImplicitNullCheck(HNullCheck * instruction)6168 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
6169 if (CanMoveNullCheckToUser(instruction)) {
6170 return;
6171 }
6172 LocationSummary* locations = instruction->GetLocations();
6173 Location obj = locations->InAt(0);
6174
6175 __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
6176 RecordPcInfo(instruction, instruction->GetDexPc());
6177 }
6178
GenerateExplicitNullCheck(HNullCheck * instruction)6179 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
6180 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
6181 AddSlowPath(slow_path);
6182
6183 LocationSummary* locations = instruction->GetLocations();
6184 Location obj = locations->InAt(0);
6185
6186 if (obj.IsRegister()) {
6187 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
6188 } else if (obj.IsStackSlot()) {
6189 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
6190 } else {
6191 DCHECK(obj.IsConstant()) << obj;
6192 DCHECK(obj.GetConstant()->IsNullConstant());
6193 __ jmp(slow_path->GetEntryLabel());
6194 return;
6195 }
6196 __ j(kEqual, slow_path->GetEntryLabel());
6197 }
6198
VisitNullCheck(HNullCheck * instruction)6199 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
6200 codegen_->GenerateNullCheck(instruction);
6201 }
6202
VisitArrayGet(HArrayGet * instruction)6203 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
6204 bool object_array_get_with_read_barrier =
6205 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
6206 LocationSummary* locations =
6207 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6208 object_array_get_with_read_barrier
6209 ? LocationSummary::kCallOnSlowPath
6210 : LocationSummary::kNoCall);
6211 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6212 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6213 }
6214 locations->SetInAt(0, Location::RequiresRegister());
6215 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6216 if (DataType::IsFloatingPointType(instruction->GetType())) {
6217 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6218 } else {
6219 // The output overlaps in case of long: we don't want the low move
6220 // to overwrite the array's location. Likewise, in the case of an
6221 // object array get with read barriers enabled, we do not want the
6222 // move to overwrite the array's location, as we need it to emit
6223 // the read barrier.
6224 locations->SetOut(
6225 Location::RequiresRegister(),
6226 (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
6227 ? Location::kOutputOverlap
6228 : Location::kNoOutputOverlap);
6229 }
6230 }
6231
VisitArrayGet(HArrayGet * instruction)6232 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
6233 LocationSummary* locations = instruction->GetLocations();
6234 Location obj_loc = locations->InAt(0);
6235 Register obj = obj_loc.AsRegister<Register>();
6236 Location index = locations->InAt(1);
6237 Location out_loc = locations->Out();
6238 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6239
6240 DataType::Type type = instruction->GetType();
6241 if (type == DataType::Type::kReference) {
6242 static_assert(
6243 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6244 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6245 // /* HeapReference<Object> */ out =
6246 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6247 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
6248 // Note that a potential implicit null check is handled in this
6249 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
6250 codegen_->GenerateArrayLoadWithBakerReadBarrier(
6251 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
6252 } else {
6253 Register out = out_loc.AsRegister<Register>();
6254 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6255 codegen_->MaybeRecordImplicitNullCheck(instruction);
6256 // If read barriers are enabled, emit read barriers other than
6257 // Baker's using a slow path (and also unpoison the loaded
6258 // reference, if heap poisoning is enabled).
6259 if (index.IsConstant()) {
6260 uint32_t offset =
6261 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
6262 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6263 } else {
6264 codegen_->MaybeGenerateReadBarrierSlow(
6265 instruction, out_loc, out_loc, obj_loc, data_offset, index);
6266 }
6267 }
6268 } else if (type == DataType::Type::kUint16
6269 && mirror::kUseStringCompression
6270 && instruction->IsStringCharAt()) {
6271 // Branch cases into compressed and uncompressed for each index's type.
6272 Register out = out_loc.AsRegister<Register>();
6273 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6274 NearLabel done, not_compressed;
6275 __ testb(Address(obj, count_offset), Immediate(1));
6276 codegen_->MaybeRecordImplicitNullCheck(instruction);
6277 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6278 "Expecting 0=compressed, 1=uncompressed");
6279 __ j(kNotZero, ¬_compressed);
6280 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6281 __ jmp(&done);
6282 __ Bind(¬_compressed);
6283 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6284 __ Bind(&done);
6285 } else {
6286 ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
6287 Address src = CodeGeneratorX86::ArrayAddress(obj, index, scale, data_offset);
6288 codegen_->LoadFromMemoryNoBarrier(type, out_loc, src, instruction);
6289 }
6290 }
6291
VisitArraySet(HArraySet * instruction)6292 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
6293 DataType::Type value_type = instruction->GetComponentType();
6294
6295 bool needs_write_barrier =
6296 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6297 bool needs_type_check = instruction->NeedsTypeCheck();
6298
6299 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6300 instruction,
6301 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6302
6303 bool is_byte_type = DataType::Size(value_type) == 1u;
6304 // We need the inputs to be different than the output in case of long operation.
6305 // In case of a byte operation, the register allocator does not support multiple
6306 // inputs that die at entry with one in a specific register.
6307 locations->SetInAt(0, Location::RequiresRegister());
6308 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6309 if (is_byte_type) {
6310 // Ensure the value is in a byte register.
6311 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
6312 } else if (DataType::IsFloatingPointType(value_type)) {
6313 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
6314 } else {
6315 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
6316 }
6317 if (needs_write_barrier) {
6318 // Temporary registers for the write barrier.
6319 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
6320 // Ensure the card is in a byte register.
6321 locations->AddTemp(Location::RegisterLocation(ECX));
6322 }
6323 }
6324
VisitArraySet(HArraySet * instruction)6325 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
6326 LocationSummary* locations = instruction->GetLocations();
6327 Location array_loc = locations->InAt(0);
6328 Register array = array_loc.AsRegister<Register>();
6329 Location index = locations->InAt(1);
6330 Location value = locations->InAt(2);
6331 DataType::Type value_type = instruction->GetComponentType();
6332 bool needs_type_check = instruction->NeedsTypeCheck();
6333 bool needs_write_barrier =
6334 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6335
6336 switch (value_type) {
6337 case DataType::Type::kBool:
6338 case DataType::Type::kUint8:
6339 case DataType::Type::kInt8: {
6340 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
6341 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
6342 if (value.IsRegister()) {
6343 __ movb(address, value.AsRegister<ByteRegister>());
6344 } else {
6345 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6346 }
6347 codegen_->MaybeRecordImplicitNullCheck(instruction);
6348 break;
6349 }
6350
6351 case DataType::Type::kUint16:
6352 case DataType::Type::kInt16: {
6353 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
6354 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
6355 if (value.IsRegister()) {
6356 __ movw(address, value.AsRegister<Register>());
6357 } else {
6358 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6359 }
6360 codegen_->MaybeRecordImplicitNullCheck(instruction);
6361 break;
6362 }
6363
6364 case DataType::Type::kReference: {
6365 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6366 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6367
6368 if (!value.IsRegister()) {
6369 // Just setting null.
6370 DCHECK(instruction->InputAt(2)->IsNullConstant());
6371 DCHECK(value.IsConstant()) << value;
6372 __ movl(address, Immediate(0));
6373 codegen_->MaybeRecordImplicitNullCheck(instruction);
6374 DCHECK(!needs_write_barrier);
6375 DCHECK(!needs_type_check);
6376 break;
6377 }
6378
6379 DCHECK(needs_write_barrier);
6380 Register register_value = value.AsRegister<Register>();
6381 Location temp_loc = locations->GetTemp(0);
6382 Register temp = temp_loc.AsRegister<Register>();
6383
6384 bool can_value_be_null = instruction->GetValueCanBeNull();
6385 NearLabel do_store;
6386 if (can_value_be_null) {
6387 __ testl(register_value, register_value);
6388 __ j(kEqual, &do_store);
6389 }
6390
6391 SlowPathCode* slow_path = nullptr;
6392 if (needs_type_check) {
6393 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6394 codegen_->AddSlowPath(slow_path);
6395
6396 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6397 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6398 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6399
6400 // Note that when Baker read barriers are enabled, the type
6401 // checks are performed without read barriers. This is fine,
6402 // even in the case where a class object is in the from-space
6403 // after the flip, as a comparison involving such a type would
6404 // not produce a false positive; it may of course produce a
6405 // false negative, in which case we would take the ArraySet
6406 // slow path.
6407
6408 // /* HeapReference<Class> */ temp = array->klass_
6409 __ movl(temp, Address(array, class_offset));
6410 codegen_->MaybeRecordImplicitNullCheck(instruction);
6411 __ MaybeUnpoisonHeapReference(temp);
6412
6413 // /* HeapReference<Class> */ temp = temp->component_type_
6414 __ movl(temp, Address(temp, component_offset));
6415 // If heap poisoning is enabled, no need to unpoison `temp`
6416 // nor the object reference in `register_value->klass`, as
6417 // we are comparing two poisoned references.
6418 __ cmpl(temp, Address(register_value, class_offset));
6419
6420 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6421 NearLabel do_put;
6422 __ j(kEqual, &do_put);
6423 // If heap poisoning is enabled, the `temp` reference has
6424 // not been unpoisoned yet; unpoison it now.
6425 __ MaybeUnpoisonHeapReference(temp);
6426
6427 // If heap poisoning is enabled, no need to unpoison the
6428 // heap reference loaded below, as it is only used for a
6429 // comparison with null.
6430 __ cmpl(Address(temp, super_offset), Immediate(0));
6431 __ j(kNotEqual, slow_path->GetEntryLabel());
6432 __ Bind(&do_put);
6433 } else {
6434 __ j(kNotEqual, slow_path->GetEntryLabel());
6435 }
6436 }
6437
6438 Register card = locations->GetTemp(1).AsRegister<Register>();
6439 codegen_->MarkGCCard(
6440 temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false);
6441
6442 if (can_value_be_null) {
6443 DCHECK(do_store.IsLinked());
6444 __ Bind(&do_store);
6445 }
6446
6447 Register source = register_value;
6448 if (kPoisonHeapReferences) {
6449 __ movl(temp, register_value);
6450 __ PoisonHeapReference(temp);
6451 source = temp;
6452 }
6453
6454 __ movl(address, source);
6455
6456 if (can_value_be_null || !needs_type_check) {
6457 codegen_->MaybeRecordImplicitNullCheck(instruction);
6458 }
6459
6460 if (slow_path != nullptr) {
6461 __ Bind(slow_path->GetExitLabel());
6462 }
6463
6464 break;
6465 }
6466
6467 case DataType::Type::kInt32: {
6468 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6469 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6470 if (value.IsRegister()) {
6471 __ movl(address, value.AsRegister<Register>());
6472 } else {
6473 DCHECK(value.IsConstant()) << value;
6474 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6475 __ movl(address, Immediate(v));
6476 }
6477 codegen_->MaybeRecordImplicitNullCheck(instruction);
6478 break;
6479 }
6480
6481 case DataType::Type::kInt64: {
6482 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6483 if (value.IsRegisterPair()) {
6484 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6485 value.AsRegisterPairLow<Register>());
6486 codegen_->MaybeRecordImplicitNullCheck(instruction);
6487 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6488 value.AsRegisterPairHigh<Register>());
6489 } else {
6490 DCHECK(value.IsConstant());
6491 int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6492 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6493 Immediate(Low32Bits(val)));
6494 codegen_->MaybeRecordImplicitNullCheck(instruction);
6495 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6496 Immediate(High32Bits(val)));
6497 }
6498 break;
6499 }
6500
6501 case DataType::Type::kFloat32: {
6502 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6503 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6504 if (value.IsFpuRegister()) {
6505 __ movss(address, value.AsFpuRegister<XmmRegister>());
6506 } else {
6507 DCHECK(value.IsConstant());
6508 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6509 __ movl(address, Immediate(v));
6510 }
6511 codegen_->MaybeRecordImplicitNullCheck(instruction);
6512 break;
6513 }
6514
6515 case DataType::Type::kFloat64: {
6516 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6517 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6518 if (value.IsFpuRegister()) {
6519 __ movsd(address, value.AsFpuRegister<XmmRegister>());
6520 } else {
6521 DCHECK(value.IsConstant());
6522 Address address_hi =
6523 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6524 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6525 __ movl(address, Immediate(Low32Bits(v)));
6526 codegen_->MaybeRecordImplicitNullCheck(instruction);
6527 __ movl(address_hi, Immediate(High32Bits(v)));
6528 }
6529 break;
6530 }
6531
6532 case DataType::Type::kUint32:
6533 case DataType::Type::kUint64:
6534 case DataType::Type::kVoid:
6535 LOG(FATAL) << "Unreachable type " << instruction->GetType();
6536 UNREACHABLE();
6537 }
6538 }
6539
VisitArrayLength(HArrayLength * instruction)6540 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6541 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6542 locations->SetInAt(0, Location::RequiresRegister());
6543 if (!instruction->IsEmittedAtUseSite()) {
6544 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6545 }
6546 }
6547
VisitArrayLength(HArrayLength * instruction)6548 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6549 if (instruction->IsEmittedAtUseSite()) {
6550 return;
6551 }
6552
6553 LocationSummary* locations = instruction->GetLocations();
6554 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6555 Register obj = locations->InAt(0).AsRegister<Register>();
6556 Register out = locations->Out().AsRegister<Register>();
6557 __ movl(out, Address(obj, offset));
6558 codegen_->MaybeRecordImplicitNullCheck(instruction);
6559 // Mask out most significant bit in case the array is String's array of char.
6560 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6561 __ shrl(out, Immediate(1));
6562 }
6563 }
6564
VisitBoundsCheck(HBoundsCheck * instruction)6565 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6566 RegisterSet caller_saves = RegisterSet::Empty();
6567 InvokeRuntimeCallingConvention calling_convention;
6568 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6569 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6570 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6571 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6572 HInstruction* length = instruction->InputAt(1);
6573 if (!length->IsEmittedAtUseSite()) {
6574 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6575 }
6576 // Need register to see array's length.
6577 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6578 locations->AddTemp(Location::RequiresRegister());
6579 }
6580 }
6581
VisitBoundsCheck(HBoundsCheck * instruction)6582 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6583 const bool is_string_compressed_char_at =
6584 mirror::kUseStringCompression && instruction->IsStringCharAt();
6585 LocationSummary* locations = instruction->GetLocations();
6586 Location index_loc = locations->InAt(0);
6587 Location length_loc = locations->InAt(1);
6588 SlowPathCode* slow_path =
6589 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6590
6591 if (length_loc.IsConstant()) {
6592 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6593 if (index_loc.IsConstant()) {
6594 // BCE will remove the bounds check if we are guarenteed to pass.
6595 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6596 if (index < 0 || index >= length) {
6597 codegen_->AddSlowPath(slow_path);
6598 __ jmp(slow_path->GetEntryLabel());
6599 } else {
6600 // Some optimization after BCE may have generated this, and we should not
6601 // generate a bounds check if it is a valid range.
6602 }
6603 return;
6604 }
6605
6606 // We have to reverse the jump condition because the length is the constant.
6607 Register index_reg = index_loc.AsRegister<Register>();
6608 __ cmpl(index_reg, Immediate(length));
6609 codegen_->AddSlowPath(slow_path);
6610 __ j(kAboveEqual, slow_path->GetEntryLabel());
6611 } else {
6612 HInstruction* array_length = instruction->InputAt(1);
6613 if (array_length->IsEmittedAtUseSite()) {
6614 // Address the length field in the array.
6615 DCHECK(array_length->IsArrayLength());
6616 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6617 Location array_loc = array_length->GetLocations()->InAt(0);
6618 Address array_len(array_loc.AsRegister<Register>(), len_offset);
6619 if (is_string_compressed_char_at) {
6620 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6621 // the string compression flag) with the in-memory length and avoid the temporary.
6622 Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6623 __ movl(length_reg, array_len);
6624 codegen_->MaybeRecordImplicitNullCheck(array_length);
6625 __ shrl(length_reg, Immediate(1));
6626 codegen_->GenerateIntCompare(length_reg, index_loc);
6627 } else {
6628 // Checking bounds for general case:
6629 // Array of char or string's array with feature compression off.
6630 if (index_loc.IsConstant()) {
6631 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6632 __ cmpl(array_len, Immediate(value));
6633 } else {
6634 __ cmpl(array_len, index_loc.AsRegister<Register>());
6635 }
6636 codegen_->MaybeRecordImplicitNullCheck(array_length);
6637 }
6638 } else {
6639 codegen_->GenerateIntCompare(length_loc, index_loc);
6640 }
6641 codegen_->AddSlowPath(slow_path);
6642 __ j(kBelowEqual, slow_path->GetEntryLabel());
6643 }
6644 }
6645
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)6646 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
6647 LOG(FATAL) << "Unreachable";
6648 }
6649
VisitParallelMove(HParallelMove * instruction)6650 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6651 if (instruction->GetNext()->IsSuspendCheck() &&
6652 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6653 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6654 // The back edge will generate the suspend check.
6655 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6656 }
6657
6658 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6659 }
6660
VisitSuspendCheck(HSuspendCheck * instruction)6661 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6662 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6663 instruction, LocationSummary::kCallOnSlowPath);
6664 // In suspend check slow path, usually there are no caller-save registers at all.
6665 // If SIMD instructions are present, however, we force spilling all live SIMD
6666 // registers in full width (since the runtime only saves/restores lower part).
6667 locations->SetCustomSlowPathCallerSaves(
6668 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6669 }
6670
VisitSuspendCheck(HSuspendCheck * instruction)6671 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6672 HBasicBlock* block = instruction->GetBlock();
6673 if (block->GetLoopInformation() != nullptr) {
6674 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6675 // The back edge will generate the suspend check.
6676 return;
6677 }
6678 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6679 // The goto will generate the suspend check.
6680 return;
6681 }
6682 GenerateSuspendCheck(instruction, nullptr);
6683 }
6684
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6685 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
6686 HBasicBlock* successor) {
6687 SuspendCheckSlowPathX86* slow_path =
6688 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
6689 if (slow_path == nullptr) {
6690 slow_path =
6691 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
6692 instruction->SetSlowPath(slow_path);
6693 codegen_->AddSlowPath(slow_path);
6694 if (successor != nullptr) {
6695 DCHECK(successor->IsLoopHeader());
6696 }
6697 } else {
6698 DCHECK_EQ(slow_path->GetSuccessor(), successor);
6699 }
6700
6701 __ fs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
6702 Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6703 if (successor == nullptr) {
6704 __ j(kNotZero, slow_path->GetEntryLabel());
6705 __ Bind(slow_path->GetReturnLabel());
6706 } else {
6707 __ j(kZero, codegen_->GetLabelOf(successor));
6708 __ jmp(slow_path->GetEntryLabel());
6709 }
6710 }
6711
GetAssembler() const6712 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
6713 return codegen_->GetAssembler();
6714 }
6715
MoveMemoryToMemory(int dst,int src,int number_of_words)6716 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
6717 ScratchRegisterScope ensure_scratch(
6718 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6719 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6720 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6721
6722 // Now that temp register is available (possibly spilled), move blocks of memory.
6723 for (int i = 0; i < number_of_words; i++) {
6724 __ movl(temp_reg, Address(ESP, src + stack_offset));
6725 __ movl(Address(ESP, dst + stack_offset), temp_reg);
6726 stack_offset += kX86WordSize;
6727 }
6728 }
6729
EmitMove(size_t index)6730 void ParallelMoveResolverX86::EmitMove(size_t index) {
6731 MoveOperands* move = moves_[index];
6732 Location source = move->GetSource();
6733 Location destination = move->GetDestination();
6734
6735 if (source.IsRegister()) {
6736 if (destination.IsRegister()) {
6737 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6738 } else if (destination.IsFpuRegister()) {
6739 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
6740 } else {
6741 DCHECK(destination.IsStackSlot());
6742 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
6743 }
6744 } else if (source.IsRegisterPair()) {
6745 if (destination.IsRegisterPair()) {
6746 __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
6747 DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
6748 __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
6749 } else if (destination.IsFpuRegister()) {
6750 size_t elem_size = DataType::Size(DataType::Type::kInt32);
6751 // Push the 2 source registers to the stack.
6752 __ pushl(source.AsRegisterPairHigh<Register>());
6753 __ cfi().AdjustCFAOffset(elem_size);
6754 __ pushl(source.AsRegisterPairLow<Register>());
6755 __ cfi().AdjustCFAOffset(elem_size);
6756 // Load the destination register.
6757 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
6758 // And remove the temporary stack space we allocated.
6759 codegen_->DecreaseFrame(2 * elem_size);
6760 } else {
6761 DCHECK(destination.IsDoubleStackSlot());
6762 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
6763 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
6764 source.AsRegisterPairHigh<Register>());
6765 }
6766 } else if (source.IsFpuRegister()) {
6767 if (destination.IsRegister()) {
6768 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
6769 } else if (destination.IsFpuRegister()) {
6770 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6771 } else if (destination.IsRegisterPair()) {
6772 size_t elem_size = DataType::Size(DataType::Type::kInt32);
6773 // Create stack space for 2 elements.
6774 codegen_->IncreaseFrame(2 * elem_size);
6775 // Store the source register.
6776 __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
6777 // And pop the values into destination registers.
6778 __ popl(destination.AsRegisterPairLow<Register>());
6779 __ cfi().AdjustCFAOffset(-elem_size);
6780 __ popl(destination.AsRegisterPairHigh<Register>());
6781 __ cfi().AdjustCFAOffset(-elem_size);
6782 } else if (destination.IsStackSlot()) {
6783 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6784 } else if (destination.IsDoubleStackSlot()) {
6785 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6786 } else {
6787 DCHECK(destination.IsSIMDStackSlot());
6788 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6789 }
6790 } else if (source.IsStackSlot()) {
6791 if (destination.IsRegister()) {
6792 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
6793 } else if (destination.IsFpuRegister()) {
6794 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6795 } else {
6796 DCHECK(destination.IsStackSlot());
6797 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6798 }
6799 } else if (source.IsDoubleStackSlot()) {
6800 if (destination.IsRegisterPair()) {
6801 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
6802 __ movl(destination.AsRegisterPairHigh<Register>(),
6803 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
6804 } else if (destination.IsFpuRegister()) {
6805 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6806 } else {
6807 DCHECK(destination.IsDoubleStackSlot()) << destination;
6808 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6809 }
6810 } else if (source.IsSIMDStackSlot()) {
6811 if (destination.IsFpuRegister()) {
6812 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6813 } else {
6814 DCHECK(destination.IsSIMDStackSlot());
6815 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6816 }
6817 } else if (source.IsConstant()) {
6818 HConstant* constant = source.GetConstant();
6819 if (constant->IsIntConstant() || constant->IsNullConstant()) {
6820 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6821 if (destination.IsRegister()) {
6822 if (value == 0) {
6823 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
6824 } else {
6825 __ movl(destination.AsRegister<Register>(), Immediate(value));
6826 }
6827 } else {
6828 DCHECK(destination.IsStackSlot()) << destination;
6829 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
6830 }
6831 } else if (constant->IsFloatConstant()) {
6832 float fp_value = constant->AsFloatConstant()->GetValue();
6833 int32_t value = bit_cast<int32_t, float>(fp_value);
6834 Immediate imm(value);
6835 if (destination.IsFpuRegister()) {
6836 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6837 if (value == 0) {
6838 // Easy handling of 0.0.
6839 __ xorps(dest, dest);
6840 } else {
6841 ScratchRegisterScope ensure_scratch(
6842 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6843 Register temp = static_cast<Register>(ensure_scratch.GetRegister());
6844 __ movl(temp, Immediate(value));
6845 __ movd(dest, temp);
6846 }
6847 } else {
6848 DCHECK(destination.IsStackSlot()) << destination;
6849 __ movl(Address(ESP, destination.GetStackIndex()), imm);
6850 }
6851 } else if (constant->IsLongConstant()) {
6852 int64_t value = constant->AsLongConstant()->GetValue();
6853 int32_t low_value = Low32Bits(value);
6854 int32_t high_value = High32Bits(value);
6855 Immediate low(low_value);
6856 Immediate high(high_value);
6857 if (destination.IsDoubleStackSlot()) {
6858 __ movl(Address(ESP, destination.GetStackIndex()), low);
6859 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6860 } else {
6861 __ movl(destination.AsRegisterPairLow<Register>(), low);
6862 __ movl(destination.AsRegisterPairHigh<Register>(), high);
6863 }
6864 } else {
6865 DCHECK(constant->IsDoubleConstant());
6866 double dbl_value = constant->AsDoubleConstant()->GetValue();
6867 int64_t value = bit_cast<int64_t, double>(dbl_value);
6868 int32_t low_value = Low32Bits(value);
6869 int32_t high_value = High32Bits(value);
6870 Immediate low(low_value);
6871 Immediate high(high_value);
6872 if (destination.IsFpuRegister()) {
6873 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6874 if (value == 0) {
6875 // Easy handling of 0.0.
6876 __ xorpd(dest, dest);
6877 } else {
6878 __ pushl(high);
6879 __ cfi().AdjustCFAOffset(4);
6880 __ pushl(low);
6881 __ cfi().AdjustCFAOffset(4);
6882 __ movsd(dest, Address(ESP, 0));
6883 codegen_->DecreaseFrame(8);
6884 }
6885 } else {
6886 DCHECK(destination.IsDoubleStackSlot()) << destination;
6887 __ movl(Address(ESP, destination.GetStackIndex()), low);
6888 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6889 }
6890 }
6891 } else {
6892 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
6893 }
6894 }
6895
Exchange(Register reg,int mem)6896 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
6897 Register suggested_scratch = reg == EAX ? EBX : EAX;
6898 ScratchRegisterScope ensure_scratch(
6899 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6900
6901 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6902 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
6903 __ movl(Address(ESP, mem + stack_offset), reg);
6904 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
6905 }
6906
Exchange32(XmmRegister reg,int mem)6907 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
6908 ScratchRegisterScope ensure_scratch(
6909 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6910
6911 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6912 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6913 __ movl(temp_reg, Address(ESP, mem + stack_offset));
6914 __ movss(Address(ESP, mem + stack_offset), reg);
6915 __ movd(reg, temp_reg);
6916 }
6917
Exchange128(XmmRegister reg,int mem)6918 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
6919 size_t extra_slot = 4 * kX86WordSize;
6920 codegen_->IncreaseFrame(extra_slot);
6921 __ movups(Address(ESP, 0), XmmRegister(reg));
6922 ExchangeMemory(0, mem + extra_slot, 4);
6923 __ movups(XmmRegister(reg), Address(ESP, 0));
6924 codegen_->DecreaseFrame(extra_slot);
6925 }
6926
ExchangeMemory(int mem1,int mem2,int number_of_words)6927 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
6928 ScratchRegisterScope ensure_scratch1(
6929 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6930
6931 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
6932 ScratchRegisterScope ensure_scratch2(
6933 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6934
6935 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
6936 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
6937
6938 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6939 for (int i = 0; i < number_of_words; i++) {
6940 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
6941 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
6942 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
6943 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
6944 stack_offset += kX86WordSize;
6945 }
6946 }
6947
EmitSwap(size_t index)6948 void ParallelMoveResolverX86::EmitSwap(size_t index) {
6949 MoveOperands* move = moves_[index];
6950 Location source = move->GetSource();
6951 Location destination = move->GetDestination();
6952
6953 if (source.IsRegister() && destination.IsRegister()) {
6954 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
6955 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
6956 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6957 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
6958 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6959 } else if (source.IsRegister() && destination.IsStackSlot()) {
6960 Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
6961 } else if (source.IsStackSlot() && destination.IsRegister()) {
6962 Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
6963 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6964 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6965 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6966 // Use XOR Swap algorithm to avoid a temporary.
6967 DCHECK_NE(source.reg(), destination.reg());
6968 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6969 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6970 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6971 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6972 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6973 } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
6974 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6975 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6976 // Take advantage of the 16 bytes in the XMM register.
6977 XmmRegister reg = source.AsFpuRegister<XmmRegister>();
6978 Address stack(ESP, destination.GetStackIndex());
6979 // Load the double into the high doubleword.
6980 __ movhpd(reg, stack);
6981
6982 // Store the low double into the destination.
6983 __ movsd(stack, reg);
6984
6985 // Move the high double to the low double.
6986 __ psrldq(reg, Immediate(8));
6987 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
6988 // Take advantage of the 16 bytes in the XMM register.
6989 XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
6990 Address stack(ESP, source.GetStackIndex());
6991 // Load the double into the high doubleword.
6992 __ movhpd(reg, stack);
6993
6994 // Store the low double into the destination.
6995 __ movsd(stack, reg);
6996
6997 // Move the high double to the low double.
6998 __ psrldq(reg, Immediate(8));
6999 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
7000 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7001 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
7002 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7003 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
7004 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7005 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
7006 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7007 } else {
7008 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
7009 }
7010 }
7011
SpillScratch(int reg)7012 void ParallelMoveResolverX86::SpillScratch(int reg) {
7013 __ pushl(static_cast<Register>(reg));
7014 }
7015
RestoreScratch(int reg)7016 void ParallelMoveResolverX86::RestoreScratch(int reg) {
7017 __ popl(static_cast<Register>(reg));
7018 }
7019
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7020 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
7021 HLoadClass::LoadKind desired_class_load_kind) {
7022 switch (desired_class_load_kind) {
7023 case HLoadClass::LoadKind::kInvalid:
7024 LOG(FATAL) << "UNREACHABLE";
7025 UNREACHABLE();
7026 case HLoadClass::LoadKind::kReferrersClass:
7027 break;
7028 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7029 case HLoadClass::LoadKind::kBootImageRelRo:
7030 case HLoadClass::LoadKind::kBssEntry:
7031 case HLoadClass::LoadKind::kBssEntryPublic:
7032 case HLoadClass::LoadKind::kBssEntryPackage:
7033 DCHECK(!GetCompilerOptions().IsJitCompiler());
7034 break;
7035 case HLoadClass::LoadKind::kJitBootImageAddress:
7036 case HLoadClass::LoadKind::kJitTableAddress:
7037 DCHECK(GetCompilerOptions().IsJitCompiler());
7038 break;
7039 case HLoadClass::LoadKind::kRuntimeCall:
7040 break;
7041 }
7042 return desired_class_load_kind;
7043 }
7044
VisitLoadClass(HLoadClass * cls)7045 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
7046 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7047 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7048 InvokeRuntimeCallingConvention calling_convention;
7049 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7050 cls,
7051 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
7052 Location::RegisterLocation(EAX));
7053 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
7054 return;
7055 }
7056 DCHECK_EQ(cls->NeedsAccessCheck(),
7057 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7058 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7059
7060 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
7061 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7062 ? LocationSummary::kCallOnSlowPath
7063 : LocationSummary::kNoCall;
7064 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7065 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7066 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7067 }
7068
7069 if (load_kind == HLoadClass::LoadKind::kReferrersClass || cls->HasPcRelativeLoadKind()) {
7070 locations->SetInAt(0, Location::RequiresRegister());
7071 }
7072 locations->SetOut(Location::RequiresRegister());
7073 if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) {
7074 if (!kUseReadBarrier || kUseBakerReadBarrier) {
7075 // Rely on the type resolution and/or initialization to save everything.
7076 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7077 } else {
7078 // For non-Baker read barrier we have a temp-clobbering call.
7079 }
7080 }
7081 }
7082
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)7083 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
7084 dex::TypeIndex type_index,
7085 Handle<mirror::Class> handle) {
7086 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
7087 // Add a patch entry and return the label.
7088 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
7089 PatchInfo<Label>* info = &jit_class_patches_.back();
7090 return &info->label;
7091 }
7092
7093 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7094 // move.
VisitLoadClass(HLoadClass * cls)7095 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7096 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7097 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7098 codegen_->GenerateLoadClassRuntimeCall(cls);
7099 return;
7100 }
7101 DCHECK_EQ(cls->NeedsAccessCheck(),
7102 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7103 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7104
7105 LocationSummary* locations = cls->GetLocations();
7106 Location out_loc = locations->Out();
7107 Register out = out_loc.AsRegister<Register>();
7108
7109 bool generate_null_check = false;
7110 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
7111 ? kWithoutReadBarrier
7112 : kCompilerReadBarrierOption;
7113 switch (load_kind) {
7114 case HLoadClass::LoadKind::kReferrersClass: {
7115 DCHECK(!cls->CanCallRuntime());
7116 DCHECK(!cls->MustGenerateClinitCheck());
7117 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7118 Register current_method = locations->InAt(0).AsRegister<Register>();
7119 GenerateGcRootFieldLoad(
7120 cls,
7121 out_loc,
7122 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
7123 /* fixup_label= */ nullptr,
7124 read_barrier_option);
7125 break;
7126 }
7127 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7128 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7129 codegen_->GetCompilerOptions().IsBootImageExtension());
7130 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7131 Register method_address = locations->InAt(0).AsRegister<Register>();
7132 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7133 codegen_->RecordBootImageTypePatch(cls);
7134 break;
7135 }
7136 case HLoadClass::LoadKind::kBootImageRelRo: {
7137 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7138 Register method_address = locations->InAt(0).AsRegister<Register>();
7139 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7140 codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7141 CodeGenerator::GetBootImageOffset(cls));
7142 break;
7143 }
7144 case HLoadClass::LoadKind::kBssEntry:
7145 case HLoadClass::LoadKind::kBssEntryPublic:
7146 case HLoadClass::LoadKind::kBssEntryPackage: {
7147 Register method_address = locations->InAt(0).AsRegister<Register>();
7148 Address address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7149 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
7150 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7151 // No need for memory fence, thanks to the x86 memory model.
7152 generate_null_check = true;
7153 break;
7154 }
7155 case HLoadClass::LoadKind::kJitBootImageAddress: {
7156 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7157 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7158 DCHECK_NE(address, 0u);
7159 __ movl(out, Immediate(address));
7160 break;
7161 }
7162 case HLoadClass::LoadKind::kJitTableAddress: {
7163 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7164 Label* fixup_label = codegen_->NewJitRootClassPatch(
7165 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
7166 // /* GcRoot<mirror::Class> */ out = *address
7167 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7168 break;
7169 }
7170 case HLoadClass::LoadKind::kRuntimeCall:
7171 case HLoadClass::LoadKind::kInvalid:
7172 LOG(FATAL) << "UNREACHABLE";
7173 UNREACHABLE();
7174 }
7175
7176 if (generate_null_check || cls->MustGenerateClinitCheck()) {
7177 DCHECK(cls->CanCallRuntime());
7178 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
7179 codegen_->AddSlowPath(slow_path);
7180
7181 if (generate_null_check) {
7182 __ testl(out, out);
7183 __ j(kEqual, slow_path->GetEntryLabel());
7184 }
7185
7186 if (cls->MustGenerateClinitCheck()) {
7187 GenerateClassInitializationCheck(slow_path, out);
7188 } else {
7189 __ Bind(slow_path->GetExitLabel());
7190 }
7191 }
7192 }
7193
VisitLoadMethodHandle(HLoadMethodHandle * load)7194 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7195 InvokeRuntimeCallingConvention calling_convention;
7196 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7197 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7198 }
7199
VisitLoadMethodHandle(HLoadMethodHandle * load)7200 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7201 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7202 }
7203
VisitLoadMethodType(HLoadMethodType * load)7204 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
7205 InvokeRuntimeCallingConvention calling_convention;
7206 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7207 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7208 }
7209
VisitLoadMethodType(HLoadMethodType * load)7210 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
7211 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7212 }
7213
VisitClinitCheck(HClinitCheck * check)7214 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
7215 LocationSummary* locations =
7216 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7217 locations->SetInAt(0, Location::RequiresRegister());
7218 if (check->HasUses()) {
7219 locations->SetOut(Location::SameAsFirstInput());
7220 }
7221 // Rely on the type initialization to save everything we need.
7222 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7223 }
7224
VisitClinitCheck(HClinitCheck * check)7225 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
7226 // We assume the class to not be null.
7227 SlowPathCode* slow_path =
7228 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
7229 codegen_->AddSlowPath(slow_path);
7230 GenerateClassInitializationCheck(slow_path,
7231 check->GetLocations()->InAt(0).AsRegister<Register>());
7232 }
7233
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)7234 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
7235 SlowPathCode* slow_path, Register class_reg) {
7236 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
7237 const size_t status_byte_offset =
7238 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
7239 constexpr uint32_t shifted_visibly_initialized_value =
7240 enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
7241
7242 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
7243 __ j(kBelow, slow_path->GetEntryLabel());
7244 __ Bind(slow_path->GetExitLabel());
7245 }
7246
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)7247 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
7248 Register temp) {
7249 uint32_t path_to_root = check->GetBitstringPathToRoot();
7250 uint32_t mask = check->GetBitstringMask();
7251 DCHECK(IsPowerOfTwo(mask + 1));
7252 size_t mask_bits = WhichPowerOf2(mask + 1);
7253
7254 if (mask_bits == 16u) {
7255 // Compare the bitstring in memory.
7256 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
7257 } else {
7258 // /* uint32_t */ temp = temp->status_
7259 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
7260 // Compare the bitstring bits using SUB.
7261 __ subl(temp, Immediate(path_to_root));
7262 // Shift out bits that do not contribute to the comparison.
7263 __ shll(temp, Immediate(32u - mask_bits));
7264 }
7265 }
7266
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7267 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
7268 HLoadString::LoadKind desired_string_load_kind) {
7269 switch (desired_string_load_kind) {
7270 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7271 case HLoadString::LoadKind::kBootImageRelRo:
7272 case HLoadString::LoadKind::kBssEntry:
7273 DCHECK(!GetCompilerOptions().IsJitCompiler());
7274 break;
7275 case HLoadString::LoadKind::kJitBootImageAddress:
7276 case HLoadString::LoadKind::kJitTableAddress:
7277 DCHECK(GetCompilerOptions().IsJitCompiler());
7278 break;
7279 case HLoadString::LoadKind::kRuntimeCall:
7280 break;
7281 }
7282 return desired_string_load_kind;
7283 }
7284
VisitLoadString(HLoadString * load)7285 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
7286 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
7287 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7288 HLoadString::LoadKind load_kind = load->GetLoadKind();
7289 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
7290 load_kind == HLoadString::LoadKind::kBootImageRelRo ||
7291 load_kind == HLoadString::LoadKind::kBssEntry) {
7292 locations->SetInAt(0, Location::RequiresRegister());
7293 }
7294 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7295 locations->SetOut(Location::RegisterLocation(EAX));
7296 } else {
7297 locations->SetOut(Location::RequiresRegister());
7298 if (load_kind == HLoadString::LoadKind::kBssEntry) {
7299 if (!kUseReadBarrier || kUseBakerReadBarrier) {
7300 // Rely on the pResolveString to save everything.
7301 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7302 } else {
7303 // For non-Baker read barrier we have a temp-clobbering call.
7304 }
7305 }
7306 }
7307 }
7308
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7309 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
7310 dex::StringIndex string_index,
7311 Handle<mirror::String> handle) {
7312 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7313 // Add a patch entry and return the label.
7314 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7315 PatchInfo<Label>* info = &jit_string_patches_.back();
7316 return &info->label;
7317 }
7318
7319 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7320 // move.
VisitLoadString(HLoadString * load)7321 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7322 LocationSummary* locations = load->GetLocations();
7323 Location out_loc = locations->Out();
7324 Register out = out_loc.AsRegister<Register>();
7325
7326 switch (load->GetLoadKind()) {
7327 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7328 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7329 codegen_->GetCompilerOptions().IsBootImageExtension());
7330 Register method_address = locations->InAt(0).AsRegister<Register>();
7331 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7332 codegen_->RecordBootImageStringPatch(load);
7333 return;
7334 }
7335 case HLoadString::LoadKind::kBootImageRelRo: {
7336 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7337 Register method_address = locations->InAt(0).AsRegister<Register>();
7338 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7339 codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7340 CodeGenerator::GetBootImageOffset(load));
7341 return;
7342 }
7343 case HLoadString::LoadKind::kBssEntry: {
7344 Register method_address = locations->InAt(0).AsRegister<Register>();
7345 Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7346 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7347 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
7348 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
7349 // No need for memory fence, thanks to the x86 memory model.
7350 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
7351 codegen_->AddSlowPath(slow_path);
7352 __ testl(out, out);
7353 __ j(kEqual, slow_path->GetEntryLabel());
7354 __ Bind(slow_path->GetExitLabel());
7355 return;
7356 }
7357 case HLoadString::LoadKind::kJitBootImageAddress: {
7358 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7359 DCHECK_NE(address, 0u);
7360 __ movl(out, Immediate(address));
7361 return;
7362 }
7363 case HLoadString::LoadKind::kJitTableAddress: {
7364 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7365 Label* fixup_label = codegen_->NewJitRootStringPatch(
7366 load->GetDexFile(), load->GetStringIndex(), load->GetString());
7367 // /* GcRoot<mirror::String> */ out = *address
7368 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
7369 return;
7370 }
7371 default:
7372 break;
7373 }
7374
7375 // TODO: Re-add the compiler code to do string dex cache lookup again.
7376 InvokeRuntimeCallingConvention calling_convention;
7377 DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
7378 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
7379 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7380 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7381 }
7382
GetExceptionTlsAddress()7383 static Address GetExceptionTlsAddress() {
7384 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
7385 }
7386
VisitLoadException(HLoadException * load)7387 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
7388 LocationSummary* locations =
7389 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7390 locations->SetOut(Location::RequiresRegister());
7391 }
7392
VisitLoadException(HLoadException * load)7393 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
7394 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
7395 }
7396
VisitClearException(HClearException * clear)7397 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
7398 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7399 }
7400
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)7401 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
7402 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7403 }
7404
VisitThrow(HThrow * instruction)7405 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7406 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7407 instruction, LocationSummary::kCallOnMainOnly);
7408 InvokeRuntimeCallingConvention calling_convention;
7409 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7410 }
7411
VisitThrow(HThrow * instruction)7412 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7413 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7414 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7415 }
7416
7417 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)7418 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
7419 if (kEmitCompilerReadBarrier &&
7420 !kUseBakerReadBarrier &&
7421 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7422 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7423 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7424 return 1;
7425 }
7426 return 0;
7427 }
7428
7429 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7430 // interface pointer, the current interface is compared in memory.
7431 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)7432 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
7433 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7434 return 2;
7435 }
7436 return 1 + NumberOfInstanceOfTemps(type_check_kind);
7437 }
7438
VisitInstanceOf(HInstanceOf * instruction)7439 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7440 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7441 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7442 bool baker_read_barrier_slow_path = false;
7443 switch (type_check_kind) {
7444 case TypeCheckKind::kExactCheck:
7445 case TypeCheckKind::kAbstractClassCheck:
7446 case TypeCheckKind::kClassHierarchyCheck:
7447 case TypeCheckKind::kArrayObjectCheck: {
7448 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
7449 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7450 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
7451 break;
7452 }
7453 case TypeCheckKind::kArrayCheck:
7454 case TypeCheckKind::kUnresolvedCheck:
7455 case TypeCheckKind::kInterfaceCheck:
7456 call_kind = LocationSummary::kCallOnSlowPath;
7457 break;
7458 case TypeCheckKind::kBitstringCheck:
7459 break;
7460 }
7461
7462 LocationSummary* locations =
7463 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7464 if (baker_read_barrier_slow_path) {
7465 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7466 }
7467 locations->SetInAt(0, Location::RequiresRegister());
7468 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7469 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7470 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7471 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7472 } else {
7473 locations->SetInAt(1, Location::Any());
7474 }
7475 // Note that TypeCheckSlowPathX86 uses this "out" register too.
7476 locations->SetOut(Location::RequiresRegister());
7477 // When read barriers are enabled, we need a temporary register for some cases.
7478 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
7479 }
7480
VisitInstanceOf(HInstanceOf * instruction)7481 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7482 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7483 LocationSummary* locations = instruction->GetLocations();
7484 Location obj_loc = locations->InAt(0);
7485 Register obj = obj_loc.AsRegister<Register>();
7486 Location cls = locations->InAt(1);
7487 Location out_loc = locations->Out();
7488 Register out = out_loc.AsRegister<Register>();
7489 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
7490 DCHECK_LE(num_temps, 1u);
7491 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7492 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7493 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7494 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7495 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7496 SlowPathCode* slow_path = nullptr;
7497 NearLabel done, zero;
7498
7499 // Return 0 if `obj` is null.
7500 // Avoid null check if we know obj is not null.
7501 if (instruction->MustDoNullCheck()) {
7502 __ testl(obj, obj);
7503 __ j(kEqual, &zero);
7504 }
7505
7506 switch (type_check_kind) {
7507 case TypeCheckKind::kExactCheck: {
7508 ReadBarrierOption read_barrier_option =
7509 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7510 // /* HeapReference<Class> */ out = obj->klass_
7511 GenerateReferenceLoadTwoRegisters(instruction,
7512 out_loc,
7513 obj_loc,
7514 class_offset,
7515 read_barrier_option);
7516 if (cls.IsRegister()) {
7517 __ cmpl(out, cls.AsRegister<Register>());
7518 } else {
7519 DCHECK(cls.IsStackSlot()) << cls;
7520 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7521 }
7522
7523 // Classes must be equal for the instanceof to succeed.
7524 __ j(kNotEqual, &zero);
7525 __ movl(out, Immediate(1));
7526 __ jmp(&done);
7527 break;
7528 }
7529
7530 case TypeCheckKind::kAbstractClassCheck: {
7531 ReadBarrierOption read_barrier_option =
7532 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7533 // /* HeapReference<Class> */ out = obj->klass_
7534 GenerateReferenceLoadTwoRegisters(instruction,
7535 out_loc,
7536 obj_loc,
7537 class_offset,
7538 read_barrier_option);
7539 // If the class is abstract, we eagerly fetch the super class of the
7540 // object to avoid doing a comparison we know will fail.
7541 NearLabel loop;
7542 __ Bind(&loop);
7543 // /* HeapReference<Class> */ out = out->super_class_
7544 GenerateReferenceLoadOneRegister(instruction,
7545 out_loc,
7546 super_offset,
7547 maybe_temp_loc,
7548 read_barrier_option);
7549 __ testl(out, out);
7550 // If `out` is null, we use it for the result, and jump to `done`.
7551 __ j(kEqual, &done);
7552 if (cls.IsRegister()) {
7553 __ cmpl(out, cls.AsRegister<Register>());
7554 } else {
7555 DCHECK(cls.IsStackSlot()) << cls;
7556 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7557 }
7558 __ j(kNotEqual, &loop);
7559 __ movl(out, Immediate(1));
7560 if (zero.IsLinked()) {
7561 __ jmp(&done);
7562 }
7563 break;
7564 }
7565
7566 case TypeCheckKind::kClassHierarchyCheck: {
7567 ReadBarrierOption read_barrier_option =
7568 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7569 // /* HeapReference<Class> */ out = obj->klass_
7570 GenerateReferenceLoadTwoRegisters(instruction,
7571 out_loc,
7572 obj_loc,
7573 class_offset,
7574 read_barrier_option);
7575 // Walk over the class hierarchy to find a match.
7576 NearLabel loop, success;
7577 __ Bind(&loop);
7578 if (cls.IsRegister()) {
7579 __ cmpl(out, cls.AsRegister<Register>());
7580 } else {
7581 DCHECK(cls.IsStackSlot()) << cls;
7582 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7583 }
7584 __ j(kEqual, &success);
7585 // /* HeapReference<Class> */ out = out->super_class_
7586 GenerateReferenceLoadOneRegister(instruction,
7587 out_loc,
7588 super_offset,
7589 maybe_temp_loc,
7590 read_barrier_option);
7591 __ testl(out, out);
7592 __ j(kNotEqual, &loop);
7593 // If `out` is null, we use it for the result, and jump to `done`.
7594 __ jmp(&done);
7595 __ Bind(&success);
7596 __ movl(out, Immediate(1));
7597 if (zero.IsLinked()) {
7598 __ jmp(&done);
7599 }
7600 break;
7601 }
7602
7603 case TypeCheckKind::kArrayObjectCheck: {
7604 ReadBarrierOption read_barrier_option =
7605 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7606 // /* HeapReference<Class> */ out = obj->klass_
7607 GenerateReferenceLoadTwoRegisters(instruction,
7608 out_loc,
7609 obj_loc,
7610 class_offset,
7611 read_barrier_option);
7612 // Do an exact check.
7613 NearLabel exact_check;
7614 if (cls.IsRegister()) {
7615 __ cmpl(out, cls.AsRegister<Register>());
7616 } else {
7617 DCHECK(cls.IsStackSlot()) << cls;
7618 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7619 }
7620 __ j(kEqual, &exact_check);
7621 // Otherwise, we need to check that the object's class is a non-primitive array.
7622 // /* HeapReference<Class> */ out = out->component_type_
7623 GenerateReferenceLoadOneRegister(instruction,
7624 out_loc,
7625 component_offset,
7626 maybe_temp_loc,
7627 read_barrier_option);
7628 __ testl(out, out);
7629 // If `out` is null, we use it for the result, and jump to `done`.
7630 __ j(kEqual, &done);
7631 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7632 __ j(kNotEqual, &zero);
7633 __ Bind(&exact_check);
7634 __ movl(out, Immediate(1));
7635 __ jmp(&done);
7636 break;
7637 }
7638
7639 case TypeCheckKind::kArrayCheck: {
7640 // No read barrier since the slow path will retry upon failure.
7641 // /* HeapReference<Class> */ out = obj->klass_
7642 GenerateReferenceLoadTwoRegisters(instruction,
7643 out_loc,
7644 obj_loc,
7645 class_offset,
7646 kWithoutReadBarrier);
7647 if (cls.IsRegister()) {
7648 __ cmpl(out, cls.AsRegister<Register>());
7649 } else {
7650 DCHECK(cls.IsStackSlot()) << cls;
7651 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7652 }
7653 DCHECK(locations->OnlyCallsOnSlowPath());
7654 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7655 instruction, /* is_fatal= */ false);
7656 codegen_->AddSlowPath(slow_path);
7657 __ j(kNotEqual, slow_path->GetEntryLabel());
7658 __ movl(out, Immediate(1));
7659 if (zero.IsLinked()) {
7660 __ jmp(&done);
7661 }
7662 break;
7663 }
7664
7665 case TypeCheckKind::kUnresolvedCheck:
7666 case TypeCheckKind::kInterfaceCheck: {
7667 // Note that we indeed only call on slow path, but we always go
7668 // into the slow path for the unresolved and interface check
7669 // cases.
7670 //
7671 // We cannot directly call the InstanceofNonTrivial runtime
7672 // entry point without resorting to a type checking slow path
7673 // here (i.e. by calling InvokeRuntime directly), as it would
7674 // require to assign fixed registers for the inputs of this
7675 // HInstanceOf instruction (following the runtime calling
7676 // convention), which might be cluttered by the potential first
7677 // read barrier emission at the beginning of this method.
7678 //
7679 // TODO: Introduce a new runtime entry point taking the object
7680 // to test (instead of its class) as argument, and let it deal
7681 // with the read barrier issues. This will let us refactor this
7682 // case of the `switch` code as it was previously (with a direct
7683 // call to the runtime not using a type checking slow path).
7684 // This should also be beneficial for the other cases above.
7685 DCHECK(locations->OnlyCallsOnSlowPath());
7686 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7687 instruction, /* is_fatal= */ false);
7688 codegen_->AddSlowPath(slow_path);
7689 __ jmp(slow_path->GetEntryLabel());
7690 if (zero.IsLinked()) {
7691 __ jmp(&done);
7692 }
7693 break;
7694 }
7695
7696 case TypeCheckKind::kBitstringCheck: {
7697 // /* HeapReference<Class> */ temp = obj->klass_
7698 GenerateReferenceLoadTwoRegisters(instruction,
7699 out_loc,
7700 obj_loc,
7701 class_offset,
7702 kWithoutReadBarrier);
7703
7704 GenerateBitstringTypeCheckCompare(instruction, out);
7705 __ j(kNotEqual, &zero);
7706 __ movl(out, Immediate(1));
7707 __ jmp(&done);
7708 break;
7709 }
7710 }
7711
7712 if (zero.IsLinked()) {
7713 __ Bind(&zero);
7714 __ xorl(out, out);
7715 }
7716
7717 if (done.IsLinked()) {
7718 __ Bind(&done);
7719 }
7720
7721 if (slow_path != nullptr) {
7722 __ Bind(slow_path->GetExitLabel());
7723 }
7724 }
7725
VisitCheckCast(HCheckCast * instruction)7726 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
7727 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7728 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
7729 LocationSummary* locations =
7730 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7731 locations->SetInAt(0, Location::RequiresRegister());
7732 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7733 // Require a register for the interface check since there is a loop that compares the class to
7734 // a memory address.
7735 locations->SetInAt(1, Location::RequiresRegister());
7736 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7737 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7738 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7739 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7740 } else {
7741 locations->SetInAt(1, Location::Any());
7742 }
7743 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7744 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7745 }
7746
VisitCheckCast(HCheckCast * instruction)7747 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
7748 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7749 LocationSummary* locations = instruction->GetLocations();
7750 Location obj_loc = locations->InAt(0);
7751 Register obj = obj_loc.AsRegister<Register>();
7752 Location cls = locations->InAt(1);
7753 Location temp_loc = locations->GetTemp(0);
7754 Register temp = temp_loc.AsRegister<Register>();
7755 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7756 DCHECK_GE(num_temps, 1u);
7757 DCHECK_LE(num_temps, 2u);
7758 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
7759 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7760 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7761 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7762 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7763 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7764 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7765 const uint32_t object_array_data_offset =
7766 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7767
7768 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7769 SlowPathCode* type_check_slow_path =
7770 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7771 instruction, is_type_check_slow_path_fatal);
7772 codegen_->AddSlowPath(type_check_slow_path);
7773
7774 NearLabel done;
7775 // Avoid null check if we know obj is not null.
7776 if (instruction->MustDoNullCheck()) {
7777 __ testl(obj, obj);
7778 __ j(kEqual, &done);
7779 }
7780
7781 switch (type_check_kind) {
7782 case TypeCheckKind::kExactCheck:
7783 case TypeCheckKind::kArrayCheck: {
7784 // /* HeapReference<Class> */ temp = obj->klass_
7785 GenerateReferenceLoadTwoRegisters(instruction,
7786 temp_loc,
7787 obj_loc,
7788 class_offset,
7789 kWithoutReadBarrier);
7790
7791 if (cls.IsRegister()) {
7792 __ cmpl(temp, cls.AsRegister<Register>());
7793 } else {
7794 DCHECK(cls.IsStackSlot()) << cls;
7795 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7796 }
7797 // Jump to slow path for throwing the exception or doing a
7798 // more involved array check.
7799 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7800 break;
7801 }
7802
7803 case TypeCheckKind::kAbstractClassCheck: {
7804 // /* HeapReference<Class> */ temp = obj->klass_
7805 GenerateReferenceLoadTwoRegisters(instruction,
7806 temp_loc,
7807 obj_loc,
7808 class_offset,
7809 kWithoutReadBarrier);
7810
7811 // If the class is abstract, we eagerly fetch the super class of the
7812 // object to avoid doing a comparison we know will fail.
7813 NearLabel loop;
7814 __ Bind(&loop);
7815 // /* HeapReference<Class> */ temp = temp->super_class_
7816 GenerateReferenceLoadOneRegister(instruction,
7817 temp_loc,
7818 super_offset,
7819 maybe_temp2_loc,
7820 kWithoutReadBarrier);
7821
7822 // If the class reference currently in `temp` is null, jump to the slow path to throw the
7823 // exception.
7824 __ testl(temp, temp);
7825 __ j(kZero, type_check_slow_path->GetEntryLabel());
7826
7827 // Otherwise, compare the classes
7828 if (cls.IsRegister()) {
7829 __ cmpl(temp, cls.AsRegister<Register>());
7830 } else {
7831 DCHECK(cls.IsStackSlot()) << cls;
7832 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7833 }
7834 __ j(kNotEqual, &loop);
7835 break;
7836 }
7837
7838 case TypeCheckKind::kClassHierarchyCheck: {
7839 // /* HeapReference<Class> */ temp = obj->klass_
7840 GenerateReferenceLoadTwoRegisters(instruction,
7841 temp_loc,
7842 obj_loc,
7843 class_offset,
7844 kWithoutReadBarrier);
7845
7846 // Walk over the class hierarchy to find a match.
7847 NearLabel loop;
7848 __ Bind(&loop);
7849 if (cls.IsRegister()) {
7850 __ cmpl(temp, cls.AsRegister<Register>());
7851 } else {
7852 DCHECK(cls.IsStackSlot()) << cls;
7853 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7854 }
7855 __ j(kEqual, &done);
7856
7857 // /* HeapReference<Class> */ temp = temp->super_class_
7858 GenerateReferenceLoadOneRegister(instruction,
7859 temp_loc,
7860 super_offset,
7861 maybe_temp2_loc,
7862 kWithoutReadBarrier);
7863
7864 // If the class reference currently in `temp` is not null, jump
7865 // back at the beginning of the loop.
7866 __ testl(temp, temp);
7867 __ j(kNotZero, &loop);
7868 // Otherwise, jump to the slow path to throw the exception.;
7869 __ jmp(type_check_slow_path->GetEntryLabel());
7870 break;
7871 }
7872
7873 case TypeCheckKind::kArrayObjectCheck: {
7874 // /* HeapReference<Class> */ temp = obj->klass_
7875 GenerateReferenceLoadTwoRegisters(instruction,
7876 temp_loc,
7877 obj_loc,
7878 class_offset,
7879 kWithoutReadBarrier);
7880
7881 // Do an exact check.
7882 if (cls.IsRegister()) {
7883 __ cmpl(temp, cls.AsRegister<Register>());
7884 } else {
7885 DCHECK(cls.IsStackSlot()) << cls;
7886 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7887 }
7888 __ j(kEqual, &done);
7889
7890 // Otherwise, we need to check that the object's class is a non-primitive array.
7891 // /* HeapReference<Class> */ temp = temp->component_type_
7892 GenerateReferenceLoadOneRegister(instruction,
7893 temp_loc,
7894 component_offset,
7895 maybe_temp2_loc,
7896 kWithoutReadBarrier);
7897
7898 // If the component type is null (i.e. the object not an array), jump to the slow path to
7899 // throw the exception. Otherwise proceed with the check.
7900 __ testl(temp, temp);
7901 __ j(kZero, type_check_slow_path->GetEntryLabel());
7902
7903 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7904 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7905 break;
7906 }
7907
7908 case TypeCheckKind::kUnresolvedCheck:
7909 // We always go into the type check slow path for the unresolved check case.
7910 // We cannot directly call the CheckCast runtime entry point
7911 // without resorting to a type checking slow path here (i.e. by
7912 // calling InvokeRuntime directly), as it would require to
7913 // assign fixed registers for the inputs of this HInstanceOf
7914 // instruction (following the runtime calling convention), which
7915 // might be cluttered by the potential first read barrier
7916 // emission at the beginning of this method.
7917 __ jmp(type_check_slow_path->GetEntryLabel());
7918 break;
7919
7920 case TypeCheckKind::kInterfaceCheck: {
7921 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7922 // We can not get false positives by doing this.
7923 // /* HeapReference<Class> */ temp = obj->klass_
7924 GenerateReferenceLoadTwoRegisters(instruction,
7925 temp_loc,
7926 obj_loc,
7927 class_offset,
7928 kWithoutReadBarrier);
7929
7930 // /* HeapReference<Class> */ temp = temp->iftable_
7931 GenerateReferenceLoadTwoRegisters(instruction,
7932 temp_loc,
7933 temp_loc,
7934 iftable_offset,
7935 kWithoutReadBarrier);
7936 // Iftable is never null.
7937 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
7938 // Maybe poison the `cls` for direct comparison with memory.
7939 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
7940 // Loop through the iftable and check if any class matches.
7941 NearLabel start_loop;
7942 __ Bind(&start_loop);
7943 // Need to subtract first to handle the empty array case.
7944 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
7945 __ j(kNegative, type_check_slow_path->GetEntryLabel());
7946 // Go to next interface if the classes do not match.
7947 __ cmpl(cls.AsRegister<Register>(),
7948 CodeGeneratorX86::ArrayAddress(temp,
7949 maybe_temp2_loc,
7950 TIMES_4,
7951 object_array_data_offset));
7952 __ j(kNotEqual, &start_loop);
7953 // If `cls` was poisoned above, unpoison it.
7954 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
7955 break;
7956 }
7957
7958 case TypeCheckKind::kBitstringCheck: {
7959 // /* HeapReference<Class> */ temp = obj->klass_
7960 GenerateReferenceLoadTwoRegisters(instruction,
7961 temp_loc,
7962 obj_loc,
7963 class_offset,
7964 kWithoutReadBarrier);
7965
7966 GenerateBitstringTypeCheckCompare(instruction, temp);
7967 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7968 break;
7969 }
7970 }
7971 __ Bind(&done);
7972
7973 __ Bind(type_check_slow_path->GetExitLabel());
7974 }
7975
VisitMonitorOperation(HMonitorOperation * instruction)7976 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
7977 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7978 instruction, LocationSummary::kCallOnMainOnly);
7979 InvokeRuntimeCallingConvention calling_convention;
7980 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7981 }
7982
VisitMonitorOperation(HMonitorOperation * instruction)7983 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
7984 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
7985 : kQuickUnlockObject,
7986 instruction,
7987 instruction->GetDexPc());
7988 if (instruction->IsEnter()) {
7989 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7990 } else {
7991 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7992 }
7993 }
7994
VisitX86AndNot(HX86AndNot * instruction)7995 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
7996 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7997 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7998 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7999 locations->SetInAt(0, Location::RequiresRegister());
8000 locations->SetInAt(1, Location::RequiresRegister());
8001 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8002 }
8003
VisitX86AndNot(HX86AndNot * instruction)8004 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
8005 LocationSummary* locations = instruction->GetLocations();
8006 Location first = locations->InAt(0);
8007 Location second = locations->InAt(1);
8008 Location dest = locations->Out();
8009 if (instruction->GetResultType() == DataType::Type::kInt32) {
8010 __ andn(dest.AsRegister<Register>(),
8011 first.AsRegister<Register>(),
8012 second.AsRegister<Register>());
8013 } else {
8014 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8015 __ andn(dest.AsRegisterPairLow<Register>(),
8016 first.AsRegisterPairLow<Register>(),
8017 second.AsRegisterPairLow<Register>());
8018 __ andn(dest.AsRegisterPairHigh<Register>(),
8019 first.AsRegisterPairHigh<Register>(),
8020 second.AsRegisterPairHigh<Register>());
8021 }
8022 }
8023
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8024 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
8025 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8026 DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
8027 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8028 locations->SetInAt(0, Location::RequiresRegister());
8029 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8030 }
8031
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8032 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
8033 HX86MaskOrResetLeastSetBit* instruction) {
8034 LocationSummary* locations = instruction->GetLocations();
8035 Location src = locations->InAt(0);
8036 Location dest = locations->Out();
8037 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
8038 switch (instruction->GetOpKind()) {
8039 case HInstruction::kAnd:
8040 __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
8041 break;
8042 case HInstruction::kXor:
8043 __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
8044 break;
8045 default:
8046 LOG(FATAL) << "Unreachable";
8047 }
8048 }
8049
VisitAnd(HAnd * instruction)8050 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)8051 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)8052 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
8053
HandleBitwiseOperation(HBinaryOperation * instruction)8054 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8055 LocationSummary* locations =
8056 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8057 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8058 || instruction->GetResultType() == DataType::Type::kInt64);
8059 locations->SetInAt(0, Location::RequiresRegister());
8060 locations->SetInAt(1, Location::Any());
8061 locations->SetOut(Location::SameAsFirstInput());
8062 }
8063
VisitAnd(HAnd * instruction)8064 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
8065 HandleBitwiseOperation(instruction);
8066 }
8067
VisitOr(HOr * instruction)8068 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
8069 HandleBitwiseOperation(instruction);
8070 }
8071
VisitXor(HXor * instruction)8072 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
8073 HandleBitwiseOperation(instruction);
8074 }
8075
HandleBitwiseOperation(HBinaryOperation * instruction)8076 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8077 LocationSummary* locations = instruction->GetLocations();
8078 Location first = locations->InAt(0);
8079 Location second = locations->InAt(1);
8080 DCHECK(first.Equals(locations->Out()));
8081
8082 if (instruction->GetResultType() == DataType::Type::kInt32) {
8083 if (second.IsRegister()) {
8084 if (instruction->IsAnd()) {
8085 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
8086 } else if (instruction->IsOr()) {
8087 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
8088 } else {
8089 DCHECK(instruction->IsXor());
8090 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
8091 }
8092 } else if (second.IsConstant()) {
8093 if (instruction->IsAnd()) {
8094 __ andl(first.AsRegister<Register>(),
8095 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8096 } else if (instruction->IsOr()) {
8097 __ orl(first.AsRegister<Register>(),
8098 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8099 } else {
8100 DCHECK(instruction->IsXor());
8101 __ xorl(first.AsRegister<Register>(),
8102 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8103 }
8104 } else {
8105 if (instruction->IsAnd()) {
8106 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8107 } else if (instruction->IsOr()) {
8108 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8109 } else {
8110 DCHECK(instruction->IsXor());
8111 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8112 }
8113 }
8114 } else {
8115 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8116 if (second.IsRegisterPair()) {
8117 if (instruction->IsAnd()) {
8118 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8119 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8120 } else if (instruction->IsOr()) {
8121 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8122 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8123 } else {
8124 DCHECK(instruction->IsXor());
8125 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8126 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8127 }
8128 } else if (second.IsDoubleStackSlot()) {
8129 if (instruction->IsAnd()) {
8130 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8131 __ andl(first.AsRegisterPairHigh<Register>(),
8132 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8133 } else if (instruction->IsOr()) {
8134 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8135 __ orl(first.AsRegisterPairHigh<Register>(),
8136 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8137 } else {
8138 DCHECK(instruction->IsXor());
8139 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8140 __ xorl(first.AsRegisterPairHigh<Register>(),
8141 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8142 }
8143 } else {
8144 DCHECK(second.IsConstant()) << second;
8145 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
8146 int32_t low_value = Low32Bits(value);
8147 int32_t high_value = High32Bits(value);
8148 Immediate low(low_value);
8149 Immediate high(high_value);
8150 Register first_low = first.AsRegisterPairLow<Register>();
8151 Register first_high = first.AsRegisterPairHigh<Register>();
8152 if (instruction->IsAnd()) {
8153 if (low_value == 0) {
8154 __ xorl(first_low, first_low);
8155 } else if (low_value != -1) {
8156 __ andl(first_low, low);
8157 }
8158 if (high_value == 0) {
8159 __ xorl(first_high, first_high);
8160 } else if (high_value != -1) {
8161 __ andl(first_high, high);
8162 }
8163 } else if (instruction->IsOr()) {
8164 if (low_value != 0) {
8165 __ orl(first_low, low);
8166 }
8167 if (high_value != 0) {
8168 __ orl(first_high, high);
8169 }
8170 } else {
8171 DCHECK(instruction->IsXor());
8172 if (low_value != 0) {
8173 __ xorl(first_low, low);
8174 }
8175 if (high_value != 0) {
8176 __ xorl(first_high, high);
8177 }
8178 }
8179 }
8180 }
8181 }
8182
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8183 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
8184 HInstruction* instruction,
8185 Location out,
8186 uint32_t offset,
8187 Location maybe_temp,
8188 ReadBarrierOption read_barrier_option) {
8189 Register out_reg = out.AsRegister<Register>();
8190 if (read_barrier_option == kWithReadBarrier) {
8191 CHECK(kEmitCompilerReadBarrier);
8192 if (kUseBakerReadBarrier) {
8193 // Load with fast path based Baker's read barrier.
8194 // /* HeapReference<Object> */ out = *(out + offset)
8195 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8196 instruction, out, out_reg, offset, /* needs_null_check= */ false);
8197 } else {
8198 // Load with slow path based read barrier.
8199 // Save the value of `out` into `maybe_temp` before overwriting it
8200 // in the following move operation, as we will need it for the
8201 // read barrier below.
8202 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8203 __ movl(maybe_temp.AsRegister<Register>(), out_reg);
8204 // /* HeapReference<Object> */ out = *(out + offset)
8205 __ movl(out_reg, Address(out_reg, offset));
8206 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
8207 }
8208 } else {
8209 // Plain load with no read barrier.
8210 // /* HeapReference<Object> */ out = *(out + offset)
8211 __ movl(out_reg, Address(out_reg, offset));
8212 __ MaybeUnpoisonHeapReference(out_reg);
8213 }
8214 }
8215
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)8216 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
8217 HInstruction* instruction,
8218 Location out,
8219 Location obj,
8220 uint32_t offset,
8221 ReadBarrierOption read_barrier_option) {
8222 Register out_reg = out.AsRegister<Register>();
8223 Register obj_reg = obj.AsRegister<Register>();
8224 if (read_barrier_option == kWithReadBarrier) {
8225 CHECK(kEmitCompilerReadBarrier);
8226 if (kUseBakerReadBarrier) {
8227 // Load with fast path based Baker's read barrier.
8228 // /* HeapReference<Object> */ out = *(obj + offset)
8229 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8230 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
8231 } else {
8232 // Load with slow path based read barrier.
8233 // /* HeapReference<Object> */ out = *(obj + offset)
8234 __ movl(out_reg, Address(obj_reg, offset));
8235 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
8236 }
8237 } else {
8238 // Plain load with no read barrier.
8239 // /* HeapReference<Object> */ out = *(obj + offset)
8240 __ movl(out_reg, Address(obj_reg, offset));
8241 __ MaybeUnpoisonHeapReference(out_reg);
8242 }
8243 }
8244
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)8245 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
8246 HInstruction* instruction,
8247 Location root,
8248 const Address& address,
8249 Label* fixup_label,
8250 ReadBarrierOption read_barrier_option) {
8251 Register root_reg = root.AsRegister<Register>();
8252 if (read_barrier_option == kWithReadBarrier) {
8253 DCHECK(kEmitCompilerReadBarrier);
8254 if (kUseBakerReadBarrier) {
8255 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
8256 // Baker's read barrier are used:
8257 //
8258 // root = obj.field;
8259 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
8260 // if (temp != null) {
8261 // root = temp(root)
8262 // }
8263
8264 // /* GcRoot<mirror::Object> */ root = *address
8265 __ movl(root_reg, address);
8266 if (fixup_label != nullptr) {
8267 __ Bind(fixup_label);
8268 }
8269 static_assert(
8270 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
8271 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
8272 "have different sizes.");
8273 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
8274 "art::mirror::CompressedReference<mirror::Object> and int32_t "
8275 "have different sizes.");
8276
8277 // Slow path marking the GC root `root`.
8278 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8279 instruction, root, /* unpoison_ref_before_marking= */ false);
8280 codegen_->AddSlowPath(slow_path);
8281
8282 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
8283 const int32_t entry_point_offset =
8284 Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
8285 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
8286 // The entrypoint is null when the GC is not marking.
8287 __ j(kNotEqual, slow_path->GetEntryLabel());
8288 __ Bind(slow_path->GetExitLabel());
8289 } else {
8290 // GC root loaded through a slow path for read barriers other
8291 // than Baker's.
8292 // /* GcRoot<mirror::Object>* */ root = address
8293 __ leal(root_reg, address);
8294 if (fixup_label != nullptr) {
8295 __ Bind(fixup_label);
8296 }
8297 // /* mirror::Object* */ root = root->Read()
8298 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8299 }
8300 } else {
8301 // Plain GC root load with no read barrier.
8302 // /* GcRoot<mirror::Object> */ root = *address
8303 __ movl(root_reg, address);
8304 if (fixup_label != nullptr) {
8305 __ Bind(fixup_label);
8306 }
8307 // Note that GC roots are not affected by heap poisoning, thus we
8308 // do not have to unpoison `root_reg` here.
8309 }
8310 }
8311
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)8312 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8313 Location ref,
8314 Register obj,
8315 uint32_t offset,
8316 bool needs_null_check) {
8317 DCHECK(kEmitCompilerReadBarrier);
8318 DCHECK(kUseBakerReadBarrier);
8319
8320 // /* HeapReference<Object> */ ref = *(obj + offset)
8321 Address src(obj, offset);
8322 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8323 }
8324
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)8325 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8326 Location ref,
8327 Register obj,
8328 uint32_t data_offset,
8329 Location index,
8330 bool needs_null_check) {
8331 DCHECK(kEmitCompilerReadBarrier);
8332 DCHECK(kUseBakerReadBarrier);
8333
8334 static_assert(
8335 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8336 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8337 // /* HeapReference<Object> */ ref =
8338 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
8339 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
8340 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8341 }
8342
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)8343 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8344 Location ref,
8345 Register obj,
8346 const Address& src,
8347 bool needs_null_check,
8348 bool always_update_field,
8349 Register* temp) {
8350 DCHECK(kEmitCompilerReadBarrier);
8351 DCHECK(kUseBakerReadBarrier);
8352
8353 // In slow path based read barriers, the read barrier call is
8354 // inserted after the original load. However, in fast path based
8355 // Baker's read barriers, we need to perform the load of
8356 // mirror::Object::monitor_ *before* the original reference load.
8357 // This load-load ordering is required by the read barrier.
8358 // The fast path/slow path (for Baker's algorithm) should look like:
8359 //
8360 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8361 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
8362 // HeapReference<Object> ref = *src; // Original reference load.
8363 // bool is_gray = (rb_state == ReadBarrier::GrayState());
8364 // if (is_gray) {
8365 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
8366 // }
8367 //
8368 // Note: the original implementation in ReadBarrier::Barrier is
8369 // slightly more complex as:
8370 // - it implements the load-load fence using a data dependency on
8371 // the high-bits of rb_state, which are expected to be all zeroes
8372 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
8373 // which is a no-op thanks to the x86 memory model);
8374 // - it performs additional checks that we do not do here for
8375 // performance reasons.
8376
8377 Register ref_reg = ref.AsRegister<Register>();
8378 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8379
8380 // Given the numeric representation, it's enough to check the low bit of the rb_state.
8381 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8382 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8383 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8384 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8385 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8386
8387 // if (rb_state == ReadBarrier::GrayState())
8388 // ref = ReadBarrier::Mark(ref);
8389 // At this point, just do the "if" and make sure that flags are preserved until the branch.
8390 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8391 if (needs_null_check) {
8392 MaybeRecordImplicitNullCheck(instruction);
8393 }
8394
8395 // Load fence to prevent load-load reordering.
8396 // Note that this is a no-op, thanks to the x86 memory model.
8397 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8398
8399 // The actual reference load.
8400 // /* HeapReference<Object> */ ref = *src
8401 __ movl(ref_reg, src); // Flags are unaffected.
8402
8403 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8404 // Slow path marking the object `ref` when it is gray.
8405 SlowPathCode* slow_path;
8406 if (always_update_field) {
8407 DCHECK(temp != nullptr);
8408 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8409 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8410 } else {
8411 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8412 instruction, ref, /* unpoison_ref_before_marking= */ true);
8413 }
8414 AddSlowPath(slow_path);
8415
8416 // We have done the "if" of the gray bit check above, now branch based on the flags.
8417 __ j(kNotZero, slow_path->GetEntryLabel());
8418
8419 // Object* ref = ref_addr->AsMirrorPtr()
8420 __ MaybeUnpoisonHeapReference(ref_reg);
8421
8422 __ Bind(slow_path->GetExitLabel());
8423 }
8424
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8425 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8426 Location out,
8427 Location ref,
8428 Location obj,
8429 uint32_t offset,
8430 Location index) {
8431 DCHECK(kEmitCompilerReadBarrier);
8432
8433 // Insert a slow path based read barrier *after* the reference load.
8434 //
8435 // If heap poisoning is enabled, the unpoisoning of the loaded
8436 // reference will be carried out by the runtime within the slow
8437 // path.
8438 //
8439 // Note that `ref` currently does not get unpoisoned (when heap
8440 // poisoning is enabled), which is alright as the `ref` argument is
8441 // not used by the artReadBarrierSlow entry point.
8442 //
8443 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8444 SlowPathCode* slow_path = new (GetScopedAllocator())
8445 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8446 AddSlowPath(slow_path);
8447
8448 __ jmp(slow_path->GetEntryLabel());
8449 __ Bind(slow_path->GetExitLabel());
8450 }
8451
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8452 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8453 Location out,
8454 Location ref,
8455 Location obj,
8456 uint32_t offset,
8457 Location index) {
8458 if (kEmitCompilerReadBarrier) {
8459 // Baker's read barriers shall be handled by the fast path
8460 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8461 DCHECK(!kUseBakerReadBarrier);
8462 // If heap poisoning is enabled, unpoisoning will be taken care of
8463 // by the runtime within the slow path.
8464 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8465 } else if (kPoisonHeapReferences) {
8466 __ UnpoisonHeapReference(out.AsRegister<Register>());
8467 }
8468 }
8469
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8470 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8471 Location out,
8472 Location root) {
8473 DCHECK(kEmitCompilerReadBarrier);
8474
8475 // Insert a slow path based read barrier *after* the GC root load.
8476 //
8477 // Note that GC roots are not affected by heap poisoning, so we do
8478 // not need to do anything special for this here.
8479 SlowPathCode* slow_path =
8480 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8481 AddSlowPath(slow_path);
8482
8483 __ jmp(slow_path->GetEntryLabel());
8484 __ Bind(slow_path->GetExitLabel());
8485 }
8486
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8487 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8488 // Nothing to do, this should be removed during prepare for register allocator.
8489 LOG(FATAL) << "Unreachable";
8490 }
8491
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8492 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8493 // Nothing to do, this should be removed during prepare for register allocator.
8494 LOG(FATAL) << "Unreachable";
8495 }
8496
8497 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8498 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8499 LocationSummary* locations =
8500 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8501 locations->SetInAt(0, Location::RequiresRegister());
8502 }
8503
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8504 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8505 int32_t lower_bound,
8506 uint32_t num_entries,
8507 HBasicBlock* switch_block,
8508 HBasicBlock* default_block) {
8509 // Figure out the correct compare values and jump conditions.
8510 // Handle the first compare/branch as a special case because it might
8511 // jump to the default case.
8512 DCHECK_GT(num_entries, 2u);
8513 Condition first_condition;
8514 uint32_t index;
8515 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8516 if (lower_bound != 0) {
8517 first_condition = kLess;
8518 __ cmpl(value_reg, Immediate(lower_bound));
8519 __ j(first_condition, codegen_->GetLabelOf(default_block));
8520 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8521
8522 index = 1;
8523 } else {
8524 // Handle all the compare/jumps below.
8525 first_condition = kBelow;
8526 index = 0;
8527 }
8528
8529 // Handle the rest of the compare/jumps.
8530 for (; index + 1 < num_entries; index += 2) {
8531 int32_t compare_to_value = lower_bound + index + 1;
8532 __ cmpl(value_reg, Immediate(compare_to_value));
8533 // Jump to successors[index] if value < case_value[index].
8534 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8535 // Jump to successors[index + 1] if value == case_value[index + 1].
8536 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8537 }
8538
8539 if (index != num_entries) {
8540 // There are an odd number of entries. Handle the last one.
8541 DCHECK_EQ(index + 1, num_entries);
8542 __ cmpl(value_reg, Immediate(lower_bound + index));
8543 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8544 }
8545
8546 // And the default for any other value.
8547 if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8548 __ jmp(codegen_->GetLabelOf(default_block));
8549 }
8550 }
8551
VisitPackedSwitch(HPackedSwitch * switch_instr)8552 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8553 int32_t lower_bound = switch_instr->GetStartValue();
8554 uint32_t num_entries = switch_instr->GetNumEntries();
8555 LocationSummary* locations = switch_instr->GetLocations();
8556 Register value_reg = locations->InAt(0).AsRegister<Register>();
8557
8558 GenPackedSwitchWithCompares(value_reg,
8559 lower_bound,
8560 num_entries,
8561 switch_instr->GetBlock(),
8562 switch_instr->GetDefaultBlock());
8563 }
8564
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8565 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8566 LocationSummary* locations =
8567 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8568 locations->SetInAt(0, Location::RequiresRegister());
8569
8570 // Constant area pointer.
8571 locations->SetInAt(1, Location::RequiresRegister());
8572
8573 // And the temporary we need.
8574 locations->AddTemp(Location::RequiresRegister());
8575 }
8576
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8577 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8578 int32_t lower_bound = switch_instr->GetStartValue();
8579 uint32_t num_entries = switch_instr->GetNumEntries();
8580 LocationSummary* locations = switch_instr->GetLocations();
8581 Register value_reg = locations->InAt(0).AsRegister<Register>();
8582 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8583
8584 if (num_entries <= kPackedSwitchJumpTableThreshold) {
8585 GenPackedSwitchWithCompares(value_reg,
8586 lower_bound,
8587 num_entries,
8588 switch_instr->GetBlock(),
8589 default_block);
8590 return;
8591 }
8592
8593 // Optimizing has a jump area.
8594 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
8595 Register constant_area = locations->InAt(1).AsRegister<Register>();
8596
8597 // Remove the bias, if needed.
8598 if (lower_bound != 0) {
8599 __ leal(temp_reg, Address(value_reg, -lower_bound));
8600 value_reg = temp_reg;
8601 }
8602
8603 // Is the value in range?
8604 DCHECK_GE(num_entries, 1u);
8605 __ cmpl(value_reg, Immediate(num_entries - 1));
8606 __ j(kAbove, codegen_->GetLabelOf(default_block));
8607
8608 // We are in the range of the table.
8609 // Load (target-constant_area) from the jump table, indexing by the value.
8610 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
8611
8612 // Compute the actual target address by adding in constant_area.
8613 __ addl(temp_reg, constant_area);
8614
8615 // And jump.
8616 __ jmp(temp_reg);
8617 }
8618
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8619 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
8620 HX86ComputeBaseMethodAddress* insn) {
8621 LocationSummary* locations =
8622 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8623 locations->SetOut(Location::RequiresRegister());
8624 }
8625
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8626 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
8627 HX86ComputeBaseMethodAddress* insn) {
8628 LocationSummary* locations = insn->GetLocations();
8629 Register reg = locations->Out().AsRegister<Register>();
8630
8631 // Generate call to next instruction.
8632 Label next_instruction;
8633 __ call(&next_instruction);
8634 __ Bind(&next_instruction);
8635
8636 // Remember this offset for later use with constant area.
8637 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
8638
8639 // Grab the return address off the stack.
8640 __ popl(reg);
8641 }
8642
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8643 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
8644 HX86LoadFromConstantTable* insn) {
8645 LocationSummary* locations =
8646 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8647
8648 locations->SetInAt(0, Location::RequiresRegister());
8649 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
8650
8651 // If we don't need to be materialized, we only need the inputs to be set.
8652 if (insn->IsEmittedAtUseSite()) {
8653 return;
8654 }
8655
8656 switch (insn->GetType()) {
8657 case DataType::Type::kFloat32:
8658 case DataType::Type::kFloat64:
8659 locations->SetOut(Location::RequiresFpuRegister());
8660 break;
8661
8662 case DataType::Type::kInt32:
8663 locations->SetOut(Location::RequiresRegister());
8664 break;
8665
8666 default:
8667 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8668 }
8669 }
8670
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8671 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
8672 if (insn->IsEmittedAtUseSite()) {
8673 return;
8674 }
8675
8676 LocationSummary* locations = insn->GetLocations();
8677 Location out = locations->Out();
8678 Register const_area = locations->InAt(0).AsRegister<Register>();
8679 HConstant *value = insn->GetConstant();
8680
8681 switch (insn->GetType()) {
8682 case DataType::Type::kFloat32:
8683 __ movss(out.AsFpuRegister<XmmRegister>(),
8684 codegen_->LiteralFloatAddress(
8685 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8686 break;
8687
8688 case DataType::Type::kFloat64:
8689 __ movsd(out.AsFpuRegister<XmmRegister>(),
8690 codegen_->LiteralDoubleAddress(
8691 value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8692 break;
8693
8694 case DataType::Type::kInt32:
8695 __ movl(out.AsRegister<Register>(),
8696 codegen_->LiteralInt32Address(
8697 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8698 break;
8699
8700 default:
8701 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8702 }
8703 }
8704
8705 /**
8706 * Class to handle late fixup of offsets into constant area.
8707 */
8708 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8709 public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)8710 RIPFixup(CodeGeneratorX86& codegen,
8711 HX86ComputeBaseMethodAddress* base_method_address,
8712 size_t offset)
8713 : codegen_(&codegen),
8714 base_method_address_(base_method_address),
8715 offset_into_constant_area_(offset) {}
8716
8717 protected:
SetOffset(size_t offset)8718 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8719
8720 CodeGeneratorX86* codegen_;
8721 HX86ComputeBaseMethodAddress* base_method_address_;
8722
8723 private:
Process(const MemoryRegion & region,int pos)8724 void Process(const MemoryRegion& region, int pos) override {
8725 // Patch the correct offset for the instruction. The place to patch is the
8726 // last 4 bytes of the instruction.
8727 // The value to patch is the distance from the offset in the constant area
8728 // from the address computed by the HX86ComputeBaseMethodAddress instruction.
8729 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8730 int32_t relative_position =
8731 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
8732
8733 // Patch in the right value.
8734 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8735 }
8736
8737 // Location in constant area that the fixup refers to.
8738 int32_t offset_into_constant_area_;
8739 };
8740
8741 /**
8742 * Class to handle late fixup of offsets to a jump table that will be created in the
8743 * constant area.
8744 */
8745 class JumpTableRIPFixup : public RIPFixup {
8746 public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)8747 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
8748 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
8749 switch_instr_(switch_instr) {}
8750
CreateJumpTable()8751 void CreateJumpTable() {
8752 X86Assembler* assembler = codegen_->GetAssembler();
8753
8754 // Ensure that the reference to the jump table has the correct offset.
8755 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8756 SetOffset(offset_in_constant_table);
8757
8758 // The label values in the jump table are computed relative to the
8759 // instruction addressing the constant area.
8760 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
8761
8762 // Populate the jump table with the correct values for the jump table.
8763 int32_t num_entries = switch_instr_->GetNumEntries();
8764 HBasicBlock* block = switch_instr_->GetBlock();
8765 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8766 // The value that we want is the target offset - the position of the table.
8767 for (int32_t i = 0; i < num_entries; i++) {
8768 HBasicBlock* b = successors[i];
8769 Label* l = codegen_->GetLabelOf(b);
8770 DCHECK(l->IsBound());
8771 int32_t offset_to_block = l->Position() - relative_offset;
8772 assembler->AppendInt32(offset_to_block);
8773 }
8774 }
8775
8776 private:
8777 const HX86PackedSwitch* switch_instr_;
8778 };
8779
Finalize(CodeAllocator * allocator)8780 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
8781 // Generate the constant area if needed.
8782 X86Assembler* assembler = GetAssembler();
8783
8784 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8785 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
8786 // byte values.
8787 assembler->Align(4, 0);
8788 constant_area_start_ = assembler->CodeSize();
8789
8790 // Populate any jump tables.
8791 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8792 jump_table->CreateJumpTable();
8793 }
8794
8795 // And now add the constant area to the generated code.
8796 assembler->AddConstantArea();
8797 }
8798
8799 // And finish up.
8800 CodeGenerator::Finalize(allocator);
8801 }
8802
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)8803 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
8804 HX86ComputeBaseMethodAddress* method_base,
8805 Register reg) {
8806 AssemblerFixup* fixup =
8807 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
8808 return Address(reg, kPlaceholder32BitOffset, fixup);
8809 }
8810
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)8811 Address CodeGeneratorX86::LiteralFloatAddress(float v,
8812 HX86ComputeBaseMethodAddress* method_base,
8813 Register reg) {
8814 AssemblerFixup* fixup =
8815 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
8816 return Address(reg, kPlaceholder32BitOffset, fixup);
8817 }
8818
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8819 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
8820 HX86ComputeBaseMethodAddress* method_base,
8821 Register reg) {
8822 AssemblerFixup* fixup =
8823 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
8824 return Address(reg, kPlaceholder32BitOffset, fixup);
8825 }
8826
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8827 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
8828 HX86ComputeBaseMethodAddress* method_base,
8829 Register reg) {
8830 AssemblerFixup* fixup =
8831 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
8832 return Address(reg, kPlaceholder32BitOffset, fixup);
8833 }
8834
Load32BitValue(Register dest,int32_t value)8835 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
8836 if (value == 0) {
8837 __ xorl(dest, dest);
8838 } else {
8839 __ movl(dest, Immediate(value));
8840 }
8841 }
8842
Compare32BitValue(Register dest,int32_t value)8843 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
8844 if (value == 0) {
8845 __ testl(dest, dest);
8846 } else {
8847 __ cmpl(dest, Immediate(value));
8848 }
8849 }
8850
GenerateIntCompare(Location lhs,Location rhs)8851 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
8852 Register lhs_reg = lhs.AsRegister<Register>();
8853 GenerateIntCompare(lhs_reg, rhs);
8854 }
8855
GenerateIntCompare(Register lhs,Location rhs)8856 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
8857 if (rhs.IsConstant()) {
8858 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8859 Compare32BitValue(lhs, value);
8860 } else if (rhs.IsStackSlot()) {
8861 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
8862 } else {
8863 __ cmpl(lhs, rhs.AsRegister<Register>());
8864 }
8865 }
8866
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)8867 Address CodeGeneratorX86::ArrayAddress(Register obj,
8868 Location index,
8869 ScaleFactor scale,
8870 uint32_t data_offset) {
8871 return index.IsConstant() ?
8872 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
8873 Address(obj, index.AsRegister<Register>(), scale, data_offset);
8874 }
8875
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)8876 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
8877 Register reg,
8878 Register value) {
8879 // Create a fixup to be used to create and address the jump table.
8880 JumpTableRIPFixup* table_fixup =
8881 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8882
8883 // We have to populate the jump tables.
8884 fixups_to_jump_tables_.push_back(table_fixup);
8885
8886 // We want a scaled address, as we are extracting the correct offset from the table.
8887 return Address(reg, value, TIMES_4, kPlaceholder32BitOffset, table_fixup);
8888 }
8889
8890 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)8891 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
8892 if (!target.IsValid()) {
8893 DCHECK_EQ(type, DataType::Type::kVoid);
8894 return;
8895 }
8896
8897 DCHECK_NE(type, DataType::Type::kVoid);
8898
8899 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
8900 if (target.Equals(return_loc)) {
8901 return;
8902 }
8903
8904 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
8905 // with the else branch.
8906 if (type == DataType::Type::kInt64) {
8907 HParallelMove parallel_move(GetGraph()->GetAllocator());
8908 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
8909 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
8910 GetMoveResolver()->EmitNativeCode(¶llel_move);
8911 } else {
8912 // Let the parallel move resolver take care of all of this.
8913 HParallelMove parallel_move(GetGraph()->GetAllocator());
8914 parallel_move.AddMove(return_loc, target, type, nullptr);
8915 GetMoveResolver()->EmitNativeCode(¶llel_move);
8916 }
8917 }
8918
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8919 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
8920 const uint8_t* roots_data,
8921 const PatchInfo<Label>& info,
8922 uint64_t index_in_table) const {
8923 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8924 uintptr_t address =
8925 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8926 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8927 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8928 dchecked_integral_cast<uint32_t>(address);
8929 }
8930
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8931 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8932 for (const PatchInfo<Label>& info : jit_string_patches_) {
8933 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8934 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8935 PatchJitRootUse(code, roots_data, info, index_in_table);
8936 }
8937
8938 for (const PatchInfo<Label>& info : jit_class_patches_) {
8939 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8940 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8941 PatchJitRootUse(code, roots_data, info, index_in_table);
8942 }
8943 }
8944
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8945 void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8946 ATTRIBUTE_UNUSED) {
8947 LOG(FATAL) << "Unreachable";
8948 }
8949
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8950 void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8951 ATTRIBUTE_UNUSED) {
8952 LOG(FATAL) << "Unreachable";
8953 }
8954
CpuHasAvxFeatureFlag()8955 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
8956 return codegen_->GetInstructionSetFeatures().HasAVX();
8957 }
CpuHasAvx2FeatureFlag()8958 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
8959 return codegen_->GetInstructionSetFeatures().HasAVX2();
8960 }
CpuHasAvxFeatureFlag()8961 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
8962 return codegen_->GetInstructionSetFeatures().HasAVX();
8963 }
CpuHasAvx2FeatureFlag()8964 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
8965 return codegen_->GetInstructionSetFeatures().HasAVX2();
8966 }
8967
8968 #undef __
8969
8970 } // namespace x86
8971 } // namespace art
8972