1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "arch/x86/jni_frame_x86.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "entrypoints/quick/quick_entrypoints_enum.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_utils.h"
31 #include "intrinsics_x86.h"
32 #include "jit/profiling_info.h"
33 #include "linker/linker_patch.h"
34 #include "lock_word.h"
35 #include "mirror/array-inl.h"
36 #include "mirror/class-inl.h"
37 #include "mirror/var_handle.h"
38 #include "optimizing/nodes.h"
39 #include "scoped_thread_state_change-inl.h"
40 #include "thread.h"
41 #include "utils/assembler.h"
42 #include "utils/stack_checks.h"
43 #include "utils/x86/assembler_x86.h"
44 #include "utils/x86/managed_register_x86.h"
45
46 namespace art HIDDEN {
47
48 template<class MirrorType>
49 class GcRoot;
50
51 namespace x86 {
52
53 static constexpr int kCurrentMethodStackOffset = 0;
54 static constexpr Register kMethodRegisterArgument = EAX;
55 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
56
57 static constexpr int kC2ConditionMask = 0x400;
58
59 static constexpr int kFakeReturnRegister = Register(8);
60
61 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
62 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
63
OneRegInReferenceOutSaveEverythingCallerSaves()64 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
65 InvokeRuntimeCallingConvention calling_convention;
66 RegisterSet caller_saves = RegisterSet::Empty();
67 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
68 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
69 // that the the kPrimNot result register is the same as the first argument register.
70 return caller_saves;
71 }
72
73 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
74 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
75 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
76
77 class NullCheckSlowPathX86 : public SlowPathCode {
78 public:
NullCheckSlowPathX86(HNullCheck * instruction)79 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
80
EmitNativeCode(CodeGenerator * codegen)81 void EmitNativeCode(CodeGenerator* codegen) override {
82 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
83 __ Bind(GetEntryLabel());
84 if (instruction_->CanThrowIntoCatchBlock()) {
85 // Live registers will be restored in the catch block if caught.
86 SaveLiveRegisters(codegen, instruction_->GetLocations());
87 }
88 x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
89 instruction_,
90 instruction_->GetDexPc(),
91 this);
92 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
93 }
94
IsFatal() const95 bool IsFatal() const override { return true; }
96
GetDescription() const97 const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
98
99 private:
100 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
101 };
102
103 class DivZeroCheckSlowPathX86 : public SlowPathCode {
104 public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)105 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
106
EmitNativeCode(CodeGenerator * codegen)107 void EmitNativeCode(CodeGenerator* codegen) override {
108 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
109 __ Bind(GetEntryLabel());
110 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
111 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
112 }
113
IsFatal() const114 bool IsFatal() const override { return true; }
115
GetDescription() const116 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
117
118 private:
119 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
120 };
121
122 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
123 public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)124 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
125 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
126
EmitNativeCode(CodeGenerator * codegen)127 void EmitNativeCode(CodeGenerator* codegen) override {
128 __ Bind(GetEntryLabel());
129 if (is_div_) {
130 __ negl(reg_);
131 } else {
132 __ movl(reg_, Immediate(0));
133 }
134 __ jmp(GetExitLabel());
135 }
136
GetDescription() const137 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
138
139 private:
140 Register reg_;
141 bool is_div_;
142 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
143 };
144
145 class BoundsCheckSlowPathX86 : public SlowPathCode {
146 public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)147 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
148
EmitNativeCode(CodeGenerator * codegen)149 void EmitNativeCode(CodeGenerator* codegen) override {
150 LocationSummary* locations = instruction_->GetLocations();
151 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
152 __ Bind(GetEntryLabel());
153 if (instruction_->CanThrowIntoCatchBlock()) {
154 // Live registers will be restored in the catch block if caught.
155 SaveLiveRegisters(codegen, locations);
156 }
157
158 Location index_loc = locations->InAt(0);
159 Location length_loc = locations->InAt(1);
160 InvokeRuntimeCallingConvention calling_convention;
161 Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
162 Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
163
164 // Are we using an array length from memory?
165 if (!length_loc.IsValid()) {
166 DCHECK(instruction_->InputAt(1)->IsArrayLength());
167 HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
168 DCHECK(array_length->IsEmittedAtUseSite());
169 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
170 Location array_loc = array_length->GetLocations()->InAt(0);
171 if (!index_loc.Equals(length_arg)) {
172 // The index is not clobbered by loading the length directly to `length_arg`.
173 __ movl(length_arg.AsRegister<Register>(),
174 Address(array_loc.AsRegister<Register>(), len_offset));
175 x86_codegen->Move32(index_arg, index_loc);
176 } else if (!array_loc.Equals(index_arg)) {
177 // The array reference is not clobbered by the index move.
178 x86_codegen->Move32(index_arg, index_loc);
179 __ movl(length_arg.AsRegister<Register>(),
180 Address(array_loc.AsRegister<Register>(), len_offset));
181 } else {
182 // We do not have a temporary we could use, so swap the registers using the
183 // parallel move resolver and replace the array with the length afterwards.
184 codegen->EmitParallelMoves(
185 index_loc,
186 index_arg,
187 DataType::Type::kInt32,
188 array_loc,
189 length_arg,
190 DataType::Type::kReference);
191 __ movl(length_arg.AsRegister<Register>(),
192 Address(length_arg.AsRegister<Register>(), len_offset));
193 }
194 if (mirror::kUseStringCompression && array_length->IsStringLength()) {
195 __ shrl(length_arg.AsRegister<Register>(), Immediate(1));
196 }
197 } else {
198 // We're moving two locations to locations that could overlap,
199 // so we need a parallel move resolver.
200 codegen->EmitParallelMoves(
201 index_loc,
202 index_arg,
203 DataType::Type::kInt32,
204 length_loc,
205 length_arg,
206 DataType::Type::kInt32);
207 }
208
209 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
210 ? kQuickThrowStringBounds
211 : kQuickThrowArrayBounds;
212 x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
213 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
214 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
215 }
216
IsFatal() const217 bool IsFatal() const override { return true; }
218
GetDescription() const219 const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
220
221 private:
222 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
223 };
224
225 class SuspendCheckSlowPathX86 : public SlowPathCode {
226 public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)227 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
228 : SlowPathCode(instruction), successor_(successor) {}
229
EmitNativeCode(CodeGenerator * codegen)230 void EmitNativeCode(CodeGenerator* codegen) override {
231 LocationSummary* locations = instruction_->GetLocations();
232 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
233 __ Bind(GetEntryLabel());
234 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
235 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
236 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
237 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
238 if (successor_ == nullptr) {
239 __ jmp(GetReturnLabel());
240 } else {
241 __ jmp(x86_codegen->GetLabelOf(successor_));
242 }
243 }
244
GetReturnLabel()245 Label* GetReturnLabel() {
246 DCHECK(successor_ == nullptr);
247 return &return_label_;
248 }
249
GetSuccessor() const250 HBasicBlock* GetSuccessor() const {
251 return successor_;
252 }
253
GetDescription() const254 const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
255
256 private:
257 HBasicBlock* const successor_;
258 Label return_label_;
259
260 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
261 };
262
263 class LoadStringSlowPathX86 : public SlowPathCode {
264 public:
LoadStringSlowPathX86(HLoadString * instruction)265 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
266
EmitNativeCode(CodeGenerator * codegen)267 void EmitNativeCode(CodeGenerator* codegen) override {
268 LocationSummary* locations = instruction_->GetLocations();
269 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
270
271 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
272 __ Bind(GetEntryLabel());
273 SaveLiveRegisters(codegen, locations);
274
275 InvokeRuntimeCallingConvention calling_convention;
276 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
277 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
278 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
279 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
280 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
281 RestoreLiveRegisters(codegen, locations);
282
283 __ jmp(GetExitLabel());
284 }
285
GetDescription() const286 const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
287
288 private:
289 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
290 };
291
292 class LoadClassSlowPathX86 : public SlowPathCode {
293 public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)294 LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
295 : SlowPathCode(at), cls_(cls) {
296 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
297 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
298 }
299
EmitNativeCode(CodeGenerator * codegen)300 void EmitNativeCode(CodeGenerator* codegen) override {
301 LocationSummary* locations = instruction_->GetLocations();
302 Location out = locations->Out();
303 const uint32_t dex_pc = instruction_->GetDexPc();
304 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
305 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
306
307 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
308 __ Bind(GetEntryLabel());
309 SaveLiveRegisters(codegen, locations);
310
311 InvokeRuntimeCallingConvention calling_convention;
312 if (must_resolve_type) {
313 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()) ||
314 x86_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
315 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
316 &cls_->GetDexFile()));
317 dex::TypeIndex type_index = cls_->GetTypeIndex();
318 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
319 if (cls_->NeedsAccessCheck()) {
320 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
321 x86_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
322 } else {
323 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
324 x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
325 }
326 // If we also must_do_clinit, the resolved type is now in the correct register.
327 } else {
328 DCHECK(must_do_clinit);
329 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
330 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
331 }
332 if (must_do_clinit) {
333 x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
334 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
335 }
336
337 // Move the class to the desired location.
338 if (out.IsValid()) {
339 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
340 x86_codegen->Move32(out, Location::RegisterLocation(EAX));
341 }
342 RestoreLiveRegisters(codegen, locations);
343 __ jmp(GetExitLabel());
344 }
345
GetDescription() const346 const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
347
348 private:
349 // The class this slow path will load.
350 HLoadClass* const cls_;
351
352 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
353 };
354
355 class TypeCheckSlowPathX86 : public SlowPathCode {
356 public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)357 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
358 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
359
EmitNativeCode(CodeGenerator * codegen)360 void EmitNativeCode(CodeGenerator* codegen) override {
361 LocationSummary* locations = instruction_->GetLocations();
362 DCHECK(instruction_->IsCheckCast()
363 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
364
365 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
366 __ Bind(GetEntryLabel());
367
368 if (kPoisonHeapReferences &&
369 instruction_->IsCheckCast() &&
370 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
371 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
372 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
373 }
374
375 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
376 SaveLiveRegisters(codegen, locations);
377 }
378
379 // We're moving two locations to locations that could overlap, so we need a parallel
380 // move resolver.
381 InvokeRuntimeCallingConvention calling_convention;
382 x86_codegen->EmitParallelMoves(locations->InAt(0),
383 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
384 DataType::Type::kReference,
385 locations->InAt(1),
386 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
387 DataType::Type::kReference);
388 if (instruction_->IsInstanceOf()) {
389 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
390 instruction_,
391 instruction_->GetDexPc(),
392 this);
393 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
394 } else {
395 DCHECK(instruction_->IsCheckCast());
396 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
397 instruction_,
398 instruction_->GetDexPc(),
399 this);
400 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
401 }
402
403 if (!is_fatal_) {
404 if (instruction_->IsInstanceOf()) {
405 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
406 }
407 RestoreLiveRegisters(codegen, locations);
408
409 __ jmp(GetExitLabel());
410 }
411 }
412
GetDescription() const413 const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const414 bool IsFatal() const override { return is_fatal_; }
415
416 private:
417 const bool is_fatal_;
418
419 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
420 };
421
422 class DeoptimizationSlowPathX86 : public SlowPathCode {
423 public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)424 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
425 : SlowPathCode(instruction) {}
426
EmitNativeCode(CodeGenerator * codegen)427 void EmitNativeCode(CodeGenerator* codegen) override {
428 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
429 __ Bind(GetEntryLabel());
430 LocationSummary* locations = instruction_->GetLocations();
431 SaveLiveRegisters(codegen, locations);
432 InvokeRuntimeCallingConvention calling_convention;
433 x86_codegen->Load32BitValue(
434 calling_convention.GetRegisterAt(0),
435 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
436 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
437 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
438 }
439
GetDescription() const440 const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
441
442 private:
443 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
444 };
445
446 class ArraySetSlowPathX86 : public SlowPathCode {
447 public:
ArraySetSlowPathX86(HInstruction * instruction)448 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
449
EmitNativeCode(CodeGenerator * codegen)450 void EmitNativeCode(CodeGenerator* codegen) override {
451 LocationSummary* locations = instruction_->GetLocations();
452 __ Bind(GetEntryLabel());
453 SaveLiveRegisters(codegen, locations);
454
455 InvokeRuntimeCallingConvention calling_convention;
456 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
457 parallel_move.AddMove(
458 locations->InAt(0),
459 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
460 DataType::Type::kReference,
461 nullptr);
462 parallel_move.AddMove(
463 locations->InAt(1),
464 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
465 DataType::Type::kInt32,
466 nullptr);
467 parallel_move.AddMove(
468 locations->InAt(2),
469 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
470 DataType::Type::kReference,
471 nullptr);
472 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
473
474 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
475 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
476 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
477 RestoreLiveRegisters(codegen, locations);
478 __ jmp(GetExitLabel());
479 }
480
GetDescription() const481 const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
482
483 private:
484 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
485 };
486
487 // Slow path marking an object reference `ref` during a read
488 // barrier. The field `obj.field` in the object `obj` holding this
489 // reference does not get updated by this slow path after marking (see
490 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
491 //
492 // This means that after the execution of this slow path, `ref` will
493 // always be up-to-date, but `obj.field` may not; i.e., after the
494 // flip, `ref` will be a to-space reference, but `obj.field` will
495 // probably still be a from-space reference (unless it gets updated by
496 // another thread, or if another thread installed another object
497 // reference (different from `ref`) in `obj.field`).
498 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
499 public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)500 ReadBarrierMarkSlowPathX86(HInstruction* instruction,
501 Location ref,
502 bool unpoison_ref_before_marking)
503 : SlowPathCode(instruction),
504 ref_(ref),
505 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
506 DCHECK(gUseReadBarrier);
507 }
508
GetDescription() const509 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
510
EmitNativeCode(CodeGenerator * codegen)511 void EmitNativeCode(CodeGenerator* codegen) override {
512 LocationSummary* locations = instruction_->GetLocations();
513 Register ref_reg = ref_.AsRegister<Register>();
514 DCHECK(locations->CanCall());
515 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
516 DCHECK(instruction_->IsInstanceFieldGet() ||
517 instruction_->IsPredicatedInstanceFieldGet() ||
518 instruction_->IsStaticFieldGet() ||
519 instruction_->IsArrayGet() ||
520 instruction_->IsArraySet() ||
521 instruction_->IsLoadClass() ||
522 instruction_->IsLoadString() ||
523 instruction_->IsInstanceOf() ||
524 instruction_->IsCheckCast() ||
525 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
526 << "Unexpected instruction in read barrier marking slow path: "
527 << instruction_->DebugName();
528
529 __ Bind(GetEntryLabel());
530 if (unpoison_ref_before_marking_) {
531 // Object* ref = ref_addr->AsMirrorPtr()
532 __ MaybeUnpoisonHeapReference(ref_reg);
533 }
534 // No need to save live registers; it's taken care of by the
535 // entrypoint. Also, there is no need to update the stack mask,
536 // as this runtime call will not trigger a garbage collection.
537 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
538 DCHECK_NE(ref_reg, ESP);
539 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
540 // "Compact" slow path, saving two moves.
541 //
542 // Instead of using the standard runtime calling convention (input
543 // and output in EAX):
544 //
545 // EAX <- ref
546 // EAX <- ReadBarrierMark(EAX)
547 // ref <- EAX
548 //
549 // we just use rX (the register containing `ref`) as input and output
550 // of a dedicated entrypoint:
551 //
552 // rX <- ReadBarrierMarkRegX(rX)
553 //
554 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
555 // This runtime call does not require a stack map.
556 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
557 __ jmp(GetExitLabel());
558 }
559
560 private:
561 // The location (register) of the marked object reference.
562 const Location ref_;
563 // Should the reference in `ref_` be unpoisoned prior to marking it?
564 const bool unpoison_ref_before_marking_;
565
566 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
567 };
568
569 // Slow path marking an object reference `ref` during a read barrier,
570 // and if needed, atomically updating the field `obj.field` in the
571 // object `obj` holding this reference after marking (contrary to
572 // ReadBarrierMarkSlowPathX86 above, which never tries to update
573 // `obj.field`).
574 //
575 // This means that after the execution of this slow path, both `ref`
576 // and `obj.field` will be up-to-date; i.e., after the flip, both will
577 // hold the same to-space reference (unless another thread installed
578 // another object reference (different from `ref`) in `obj.field`).
579 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
580 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)581 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
582 Location ref,
583 Register obj,
584 const Address& field_addr,
585 bool unpoison_ref_before_marking,
586 Register temp)
587 : SlowPathCode(instruction),
588 ref_(ref),
589 obj_(obj),
590 field_addr_(field_addr),
591 unpoison_ref_before_marking_(unpoison_ref_before_marking),
592 temp_(temp) {
593 DCHECK(gUseReadBarrier);
594 }
595
GetDescription() const596 const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
597
EmitNativeCode(CodeGenerator * codegen)598 void EmitNativeCode(CodeGenerator* codegen) override {
599 LocationSummary* locations = instruction_->GetLocations();
600 Register ref_reg = ref_.AsRegister<Register>();
601 DCHECK(locations->CanCall());
602 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
603 DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
604 << "Unexpected instruction in read barrier marking and field updating slow path: "
605 << instruction_->DebugName();
606 HInvoke* invoke = instruction_->AsInvoke();
607 DCHECK(IsUnsafeCASObject(invoke) || IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
608
609 __ Bind(GetEntryLabel());
610 if (unpoison_ref_before_marking_) {
611 // Object* ref = ref_addr->AsMirrorPtr()
612 __ MaybeUnpoisonHeapReference(ref_reg);
613 }
614
615 // Save the old (unpoisoned) reference.
616 __ movl(temp_, ref_reg);
617
618 // No need to save live registers; it's taken care of by the
619 // entrypoint. Also, there is no need to update the stack mask,
620 // as this runtime call will not trigger a garbage collection.
621 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
622 DCHECK_NE(ref_reg, ESP);
623 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
624 // "Compact" slow path, saving two moves.
625 //
626 // Instead of using the standard runtime calling convention (input
627 // and output in EAX):
628 //
629 // EAX <- ref
630 // EAX <- ReadBarrierMark(EAX)
631 // ref <- EAX
632 //
633 // we just use rX (the register containing `ref`) as input and output
634 // of a dedicated entrypoint:
635 //
636 // rX <- ReadBarrierMarkRegX(rX)
637 //
638 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
639 // This runtime call does not require a stack map.
640 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
641
642 // If the new reference is different from the old reference,
643 // update the field in the holder (`*field_addr`).
644 //
645 // Note that this field could also hold a different object, if
646 // another thread had concurrently changed it. In that case, the
647 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
648 // operation below would abort the CAS, leaving the field as-is.
649 NearLabel done;
650 __ cmpl(temp_, ref_reg);
651 __ j(kEqual, &done);
652
653 // Update the the holder's field atomically. This may fail if
654 // mutator updates before us, but it's OK. This is achieved
655 // using a strong compare-and-set (CAS) operation with relaxed
656 // memory synchronization ordering, where the expected value is
657 // the old reference and the desired value is the new reference.
658 // This operation is implemented with a 32-bit LOCK CMPXLCHG
659 // instruction, which requires the expected value (the old
660 // reference) to be in EAX. Save EAX beforehand, and move the
661 // expected value (stored in `temp_`) into EAX.
662 __ pushl(EAX);
663 __ movl(EAX, temp_);
664
665 // Convenience aliases.
666 Register base = obj_;
667 Register expected = EAX;
668 Register value = ref_reg;
669
670 bool base_equals_value = (base == value);
671 if (kPoisonHeapReferences) {
672 if (base_equals_value) {
673 // If `base` and `value` are the same register location, move
674 // `value` to a temporary register. This way, poisoning
675 // `value` won't invalidate `base`.
676 value = temp_;
677 __ movl(value, base);
678 }
679
680 // Check that the register allocator did not assign the location
681 // of `expected` (EAX) to `value` nor to `base`, so that heap
682 // poisoning (when enabled) works as intended below.
683 // - If `value` were equal to `expected`, both references would
684 // be poisoned twice, meaning they would not be poisoned at
685 // all, as heap poisoning uses address negation.
686 // - If `base` were equal to `expected`, poisoning `expected`
687 // would invalidate `base`.
688 DCHECK_NE(value, expected);
689 DCHECK_NE(base, expected);
690
691 __ PoisonHeapReference(expected);
692 __ PoisonHeapReference(value);
693 }
694
695 __ LockCmpxchgl(field_addr_, value);
696
697 // If heap poisoning is enabled, we need to unpoison the values
698 // that were poisoned earlier.
699 if (kPoisonHeapReferences) {
700 if (base_equals_value) {
701 // `value` has been moved to a temporary register, no need
702 // to unpoison it.
703 } else {
704 __ UnpoisonHeapReference(value);
705 }
706 // No need to unpoison `expected` (EAX), as it is be overwritten below.
707 }
708
709 // Restore EAX.
710 __ popl(EAX);
711
712 __ Bind(&done);
713 __ jmp(GetExitLabel());
714 }
715
716 private:
717 // The location (register) of the marked object reference.
718 const Location ref_;
719 // The register containing the object holding the marked object reference field.
720 const Register obj_;
721 // The address of the marked reference field. The base of this address must be `obj_`.
722 const Address field_addr_;
723
724 // Should the reference in `ref_` be unpoisoned prior to marking it?
725 const bool unpoison_ref_before_marking_;
726
727 const Register temp_;
728
729 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
730 };
731
732 // Slow path generating a read barrier for a heap reference.
733 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
734 public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)735 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
736 Location out,
737 Location ref,
738 Location obj,
739 uint32_t offset,
740 Location index)
741 : SlowPathCode(instruction),
742 out_(out),
743 ref_(ref),
744 obj_(obj),
745 offset_(offset),
746 index_(index) {
747 DCHECK(gUseReadBarrier);
748 // If `obj` is equal to `out` or `ref`, it means the initial object
749 // has been overwritten by (or after) the heap object reference load
750 // to be instrumented, e.g.:
751 //
752 // __ movl(out, Address(out, offset));
753 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
754 //
755 // In that case, we have lost the information about the original
756 // object, and the emitted read barrier cannot work properly.
757 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
758 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
759 }
760
EmitNativeCode(CodeGenerator * codegen)761 void EmitNativeCode(CodeGenerator* codegen) override {
762 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
763 LocationSummary* locations = instruction_->GetLocations();
764 Register reg_out = out_.AsRegister<Register>();
765 DCHECK(locations->CanCall());
766 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
767 DCHECK(instruction_->IsInstanceFieldGet() ||
768 instruction_->IsPredicatedInstanceFieldGet() ||
769 instruction_->IsStaticFieldGet() ||
770 instruction_->IsArrayGet() ||
771 instruction_->IsInstanceOf() ||
772 instruction_->IsCheckCast() ||
773 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
774 << "Unexpected instruction in read barrier for heap reference slow path: "
775 << instruction_->DebugName();
776
777 __ Bind(GetEntryLabel());
778 SaveLiveRegisters(codegen, locations);
779
780 // We may have to change the index's value, but as `index_` is a
781 // constant member (like other "inputs" of this slow path),
782 // introduce a copy of it, `index`.
783 Location index = index_;
784 if (index_.IsValid()) {
785 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
786 if (instruction_->IsArrayGet()) {
787 // Compute the actual memory offset and store it in `index`.
788 Register index_reg = index_.AsRegister<Register>();
789 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
790 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
791 // We are about to change the value of `index_reg` (see the
792 // calls to art::x86::X86Assembler::shll and
793 // art::x86::X86Assembler::AddImmediate below), but it has
794 // not been saved by the previous call to
795 // art::SlowPathCode::SaveLiveRegisters, as it is a
796 // callee-save register --
797 // art::SlowPathCode::SaveLiveRegisters does not consider
798 // callee-save registers, as it has been designed with the
799 // assumption that callee-save registers are supposed to be
800 // handled by the called function. So, as a callee-save
801 // register, `index_reg` _would_ eventually be saved onto
802 // the stack, but it would be too late: we would have
803 // changed its value earlier. Therefore, we manually save
804 // it here into another freely available register,
805 // `free_reg`, chosen of course among the caller-save
806 // registers (as a callee-save `free_reg` register would
807 // exhibit the same problem).
808 //
809 // Note we could have requested a temporary register from
810 // the register allocator instead; but we prefer not to, as
811 // this is a slow path, and we know we can find a
812 // caller-save register that is available.
813 Register free_reg = FindAvailableCallerSaveRegister(codegen);
814 __ movl(free_reg, index_reg);
815 index_reg = free_reg;
816 index = Location::RegisterLocation(index_reg);
817 } else {
818 // The initial register stored in `index_` has already been
819 // saved in the call to art::SlowPathCode::SaveLiveRegisters
820 // (as it is not a callee-save register), so we can freely
821 // use it.
822 }
823 // Shifting the index value contained in `index_reg` by the scale
824 // factor (2) cannot overflow in practice, as the runtime is
825 // unable to allocate object arrays with a size larger than
826 // 2^26 - 1 (that is, 2^28 - 4 bytes).
827 __ shll(index_reg, Immediate(TIMES_4));
828 static_assert(
829 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
830 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
831 __ AddImmediate(index_reg, Immediate(offset_));
832 } else {
833 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
834 // intrinsics, `index_` is not shifted by a scale factor of 2
835 // (as in the case of ArrayGet), as it is actually an offset
836 // to an object field within an object.
837 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
838 DCHECK(instruction_->GetLocations()->Intrinsified());
839 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
840 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
841 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) ||
842 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) ||
843 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire))
844 << instruction_->AsInvoke()->GetIntrinsic();
845 DCHECK_EQ(offset_, 0U);
846 DCHECK(index_.IsRegisterPair());
847 // UnsafeGet's offset location is a register pair, the low
848 // part contains the correct offset.
849 index = index_.ToLow();
850 }
851 }
852
853 // We're moving two or three locations to locations that could
854 // overlap, so we need a parallel move resolver.
855 InvokeRuntimeCallingConvention calling_convention;
856 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
857 parallel_move.AddMove(ref_,
858 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
859 DataType::Type::kReference,
860 nullptr);
861 parallel_move.AddMove(obj_,
862 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
863 DataType::Type::kReference,
864 nullptr);
865 if (index.IsValid()) {
866 parallel_move.AddMove(index,
867 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
868 DataType::Type::kInt32,
869 nullptr);
870 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
871 } else {
872 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
873 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
874 }
875 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
876 CheckEntrypointTypes<
877 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
878 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
879
880 RestoreLiveRegisters(codegen, locations);
881 __ jmp(GetExitLabel());
882 }
883
GetDescription() const884 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
885
886 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)887 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
888 size_t ref = static_cast<int>(ref_.AsRegister<Register>());
889 size_t obj = static_cast<int>(obj_.AsRegister<Register>());
890 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
891 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
892 return static_cast<Register>(i);
893 }
894 }
895 // We shall never fail to find a free caller-save register, as
896 // there are more than two core caller-save registers on x86
897 // (meaning it is possible to find one which is different from
898 // `ref` and `obj`).
899 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
900 LOG(FATAL) << "Could not find a free caller-save register";
901 UNREACHABLE();
902 }
903
904 const Location out_;
905 const Location ref_;
906 const Location obj_;
907 const uint32_t offset_;
908 // An additional location containing an index to an array.
909 // Only used for HArrayGet and the UnsafeGetObject &
910 // UnsafeGetObjectVolatile intrinsics.
911 const Location index_;
912
913 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
914 };
915
916 // Slow path generating a read barrier for a GC root.
917 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
918 public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)919 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
920 : SlowPathCode(instruction), out_(out), root_(root) {
921 DCHECK(gUseReadBarrier);
922 }
923
EmitNativeCode(CodeGenerator * codegen)924 void EmitNativeCode(CodeGenerator* codegen) override {
925 LocationSummary* locations = instruction_->GetLocations();
926 Register reg_out = out_.AsRegister<Register>();
927 DCHECK(locations->CanCall());
928 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
929 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
930 << "Unexpected instruction in read barrier for GC root slow path: "
931 << instruction_->DebugName();
932
933 __ Bind(GetEntryLabel());
934 SaveLiveRegisters(codegen, locations);
935
936 InvokeRuntimeCallingConvention calling_convention;
937 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
938 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
939 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
940 instruction_,
941 instruction_->GetDexPc(),
942 this);
943 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
944 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
945
946 RestoreLiveRegisters(codegen, locations);
947 __ jmp(GetExitLabel());
948 }
949
GetDescription() const950 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
951
952 private:
953 const Location out_;
954 const Location root_;
955
956 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
957 };
958
959 class MethodEntryExitHooksSlowPathX86 : public SlowPathCode {
960 public:
MethodEntryExitHooksSlowPathX86(HInstruction * instruction)961 explicit MethodEntryExitHooksSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
962
EmitNativeCode(CodeGenerator * codegen)963 void EmitNativeCode(CodeGenerator* codegen) override {
964 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
965 LocationSummary* locations = instruction_->GetLocations();
966 QuickEntrypointEnum entry_point =
967 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
968 __ Bind(GetEntryLabel());
969 SaveLiveRegisters(codegen, locations);
970 if (instruction_->IsMethodExitHook()) {
971 __ movl(EBX, Immediate(codegen->GetFrameSize()));
972 }
973 x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
974 RestoreLiveRegisters(codegen, locations);
975 __ jmp(GetExitLabel());
976 }
977
GetDescription() const978 const char* GetDescription() const override {
979 return "MethodEntryExitHooksSlowPath";
980 }
981
982 private:
983 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86);
984 };
985
986 class CompileOptimizedSlowPathX86 : public SlowPathCode {
987 public:
CompileOptimizedSlowPathX86()988 CompileOptimizedSlowPathX86() : SlowPathCode(/* instruction= */ nullptr) {}
989
EmitNativeCode(CodeGenerator * codegen)990 void EmitNativeCode(CodeGenerator* codegen) override {
991 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
992 __ Bind(GetEntryLabel());
993 x86_codegen->GenerateInvokeRuntime(
994 GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
995 __ jmp(GetExitLabel());
996 }
997
GetDescription() const998 const char* GetDescription() const override {
999 return "CompileOptimizedSlowPath";
1000 }
1001
1002 private:
1003 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86);
1004 };
1005
1006 #undef __
1007 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1008 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
1009
X86Condition(IfCondition cond)1010 inline Condition X86Condition(IfCondition cond) {
1011 switch (cond) {
1012 case kCondEQ: return kEqual;
1013 case kCondNE: return kNotEqual;
1014 case kCondLT: return kLess;
1015 case kCondLE: return kLessEqual;
1016 case kCondGT: return kGreater;
1017 case kCondGE: return kGreaterEqual;
1018 case kCondB: return kBelow;
1019 case kCondBE: return kBelowEqual;
1020 case kCondA: return kAbove;
1021 case kCondAE: return kAboveEqual;
1022 }
1023 LOG(FATAL) << "Unreachable";
1024 UNREACHABLE();
1025 }
1026
1027 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)1028 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
1029 switch (cond) {
1030 case kCondEQ: return kEqual;
1031 case kCondNE: return kNotEqual;
1032 // Signed to unsigned, and FP to x86 name.
1033 case kCondLT: return kBelow;
1034 case kCondLE: return kBelowEqual;
1035 case kCondGT: return kAbove;
1036 case kCondGE: return kAboveEqual;
1037 // Unsigned remain unchanged.
1038 case kCondB: return kBelow;
1039 case kCondBE: return kBelowEqual;
1040 case kCondA: return kAbove;
1041 case kCondAE: return kAboveEqual;
1042 }
1043 LOG(FATAL) << "Unreachable";
1044 UNREACHABLE();
1045 }
1046
DumpCoreRegister(std::ostream & stream,int reg) const1047 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
1048 stream << Register(reg);
1049 }
1050
DumpFloatingPointRegister(std::ostream & stream,int reg) const1051 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1052 stream << XmmRegister(reg);
1053 }
1054
GetInstructionSetFeatures() const1055 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
1056 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
1057 }
1058
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1059 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1060 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
1061 return kX86WordSize;
1062 }
1063
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1064 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1065 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
1066 return kX86WordSize;
1067 }
1068
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1069 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1070 if (GetGraph()->HasSIMD()) {
1071 __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
1072 } else {
1073 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
1074 }
1075 return GetSlowPathFPWidth();
1076 }
1077
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1078 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1079 if (GetGraph()->HasSIMD()) {
1080 __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
1081 } else {
1082 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1083 }
1084 return GetSlowPathFPWidth();
1085 }
1086
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1087 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1088 HInstruction* instruction,
1089 uint32_t dex_pc,
1090 SlowPathCode* slow_path) {
1091 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1092 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1093 if (EntrypointRequiresStackMap(entrypoint)) {
1094 RecordPcInfo(instruction, dex_pc, slow_path);
1095 }
1096 }
1097
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1098 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1099 HInstruction* instruction,
1100 SlowPathCode* slow_path) {
1101 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1102 GenerateInvokeRuntime(entry_point_offset);
1103 }
1104
GenerateInvokeRuntime(int32_t entry_point_offset)1105 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1106 __ fs()->call(Address::Absolute(entry_point_offset));
1107 }
1108
1109 namespace detail {
1110 // Mark which intrinsics we don't have handcrafted code for.
1111 template <Intrinsics T>
1112 struct IsUnimplemented {
1113 bool is_unimplemented = false;
1114 };
1115
1116 #define TRUE_OVERRIDE(Name) \
1117 template <> \
1118 struct IsUnimplemented<Intrinsics::k##Name> { \
1119 bool is_unimplemented = true; \
1120 };
1121 UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE)
1122 #undef TRUE_OVERRIDE
1123
1124 #include "intrinsics_list.h"
1125 static constexpr bool kIsIntrinsicUnimplemented[] = {
1126 false, // kNone
1127 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1128 IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1129 INTRINSICS_LIST(IS_UNIMPLEMENTED)
1130 #undef IS_UNIMPLEMENTED
1131 };
1132 #undef INTRINSICS_LIST
1133
1134 } // namespace detail
1135
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1136 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1137 const CompilerOptions& compiler_options,
1138 OptimizingCompilerStats* stats)
1139 : CodeGenerator(graph,
1140 kNumberOfCpuRegisters,
1141 kNumberOfXmmRegisters,
1142 kNumberOfRegisterPairs,
1143 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1144 arraysize(kCoreCalleeSaves))
1145 | (1 << kFakeReturnRegister),
1146 0,
1147 compiler_options,
1148 stats,
1149 ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1150 block_labels_(nullptr),
1151 location_builder_(graph, this),
1152 instruction_visitor_(graph, this),
1153 move_resolver_(graph->GetAllocator(), this),
1154 assembler_(graph->GetAllocator(),
1155 compiler_options.GetInstructionSetFeatures()->AsX86InstructionSetFeatures()),
1156 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1157 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1158 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1159 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1160 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1161 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1162 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1163 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1164 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1165 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1166 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1167 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1168 constant_area_start_(-1),
1169 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1170 method_address_offset_(std::less<uint32_t>(),
1171 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1172 // Use a fake return address register to mimic Quick.
1173 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1174 }
1175
SetupBlockedRegisters() const1176 void CodeGeneratorX86::SetupBlockedRegisters() const {
1177 // Stack register is always reserved.
1178 blocked_core_registers_[ESP] = true;
1179 }
1180
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1181 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1182 : InstructionCodeGenerator(graph, codegen),
1183 assembler_(codegen->GetAssembler()),
1184 codegen_(codegen) {}
1185
DWARFReg(Register reg)1186 static dwarf::Reg DWARFReg(Register reg) {
1187 return dwarf::Reg::X86Core(static_cast<int>(reg));
1188 }
1189
SetInForReturnValue(HInstruction * ret,LocationSummary * locations)1190 void SetInForReturnValue(HInstruction* ret, LocationSummary* locations) {
1191 switch (ret->InputAt(0)->GetType()) {
1192 case DataType::Type::kReference:
1193 case DataType::Type::kBool:
1194 case DataType::Type::kUint8:
1195 case DataType::Type::kInt8:
1196 case DataType::Type::kUint16:
1197 case DataType::Type::kInt16:
1198 case DataType::Type::kInt32:
1199 locations->SetInAt(0, Location::RegisterLocation(EAX));
1200 break;
1201
1202 case DataType::Type::kInt64:
1203 locations->SetInAt(0, Location::RegisterPairLocation(EAX, EDX));
1204 break;
1205
1206 case DataType::Type::kFloat32:
1207 case DataType::Type::kFloat64:
1208 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1209 break;
1210
1211 case DataType::Type::kVoid:
1212 locations->SetInAt(0, Location::NoLocation());
1213 break;
1214
1215 default:
1216 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
1217 }
1218 }
1219
VisitMethodExitHook(HMethodExitHook * method_hook)1220 void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) {
1221 LocationSummary* locations = new (GetGraph()->GetAllocator())
1222 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1223 SetInForReturnValue(method_hook, locations);
1224 }
1225
GenerateMethodEntryExitHook(HInstruction * instruction)1226 void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) {
1227 SlowPathCode* slow_path =
1228 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
1229 codegen_->AddSlowPath(slow_path);
1230
1231 if (instruction->IsMethodExitHook()) {
1232 // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1233 // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1234 // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1235 // disabled in debuggable runtime. The other bit is used when this method itself requires a
1236 // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1237 __ cmpl(Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1238 __ j(kNotEqual, slow_path->GetEntryLabel());
1239 }
1240
1241 uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1242 MemberOffset offset = instruction->IsMethodExitHook() ?
1243 instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
1244 instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1245 __ cmpb(Address::Absolute(address + offset.Int32Value()), Immediate(0));
1246 __ j(kNotEqual, slow_path->GetEntryLabel());
1247 __ Bind(slow_path->GetExitLabel());
1248 }
1249
VisitMethodExitHook(HMethodExitHook * instruction)1250 void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instruction) {
1251 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1252 DCHECK(codegen_->RequiresCurrentMethod());
1253 GenerateMethodEntryExitHook(instruction);
1254 }
1255
VisitMethodEntryHook(HMethodEntryHook * method_hook)1256 void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1257 new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1258 }
1259
VisitMethodEntryHook(HMethodEntryHook * instruction)1260 void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1261 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1262 DCHECK(codegen_->RequiresCurrentMethod());
1263 GenerateMethodEntryExitHook(instruction);
1264 }
1265
MaybeIncrementHotness(bool is_frame_entry)1266 void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
1267 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1268 Register reg = EAX;
1269 if (is_frame_entry) {
1270 reg = kMethodRegisterArgument;
1271 } else {
1272 __ pushl(EAX);
1273 __ cfi().AdjustCFAOffset(4);
1274 __ movl(EAX, Address(ESP, kX86WordSize));
1275 }
1276 NearLabel overflow;
1277 __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1278 Immediate(interpreter::kNterpHotnessValue));
1279 __ j(kEqual, &overflow);
1280 __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(-1));
1281 __ Bind(&overflow);
1282 if (!is_frame_entry) {
1283 __ popl(EAX);
1284 __ cfi().AdjustCFAOffset(-4);
1285 }
1286 }
1287
1288 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1289 SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86();
1290 AddSlowPath(slow_path);
1291 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1292 DCHECK(info != nullptr);
1293 uint32_t address = reinterpret_cast32<uint32_t>(info) +
1294 ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1295 DCHECK(!HasEmptyFrame());
1296 // With multiple threads, this can overflow. This is OK, we will eventually get to see
1297 // it reaching 0. Also, at this point we have no register available to look
1298 // at the counter directly.
1299 __ addw(Address::Absolute(address), Immediate(-1));
1300 __ j(kEqual, slow_path->GetEntryLabel());
1301 __ Bind(slow_path->GetExitLabel());
1302 }
1303 }
1304
GenerateFrameEntry()1305 void CodeGeneratorX86::GenerateFrameEntry() {
1306 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
1307
1308 // Check if we need to generate the clinit check. We will jump to the
1309 // resolution stub if the class is not initialized and the executing thread is
1310 // not the thread initializing it.
1311 // We do this before constructing the frame to get the correct stack trace if
1312 // an exception is thrown.
1313 if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1314 NearLabel continue_execution, resolution;
1315 // We'll use EBP as temporary.
1316 __ pushl(EBP);
1317 // Check if we're visibly initialized.
1318
1319 // We don't emit a read barrier here to save on code size. We rely on the
1320 // resolution trampoline to do a suspend check before re-entering this code.
1321 __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value()));
1322 __ cmpb(Address(EBP, status_byte_offset), Immediate(shifted_visibly_initialized_value));
1323 __ j(kAboveEqual, &continue_execution);
1324
1325 // Check if we're initializing and the thread initializing is the one
1326 // executing the code.
1327 __ cmpb(Address(EBP, status_byte_offset), Immediate(shifted_initializing_value));
1328 __ j(kBelow, &resolution);
1329
1330 __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value()));
1331 __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value()));
1332 __ j(kEqual, &continue_execution);
1333 __ Bind(&resolution);
1334
1335 __ popl(EBP);
1336 // Jump to the resolution stub.
1337 ThreadOffset32 entrypoint_offset =
1338 GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline);
1339 __ fs()->jmp(Address::Absolute(entrypoint_offset));
1340
1341 __ Bind(&continue_execution);
1342 __ popl(EBP);
1343 }
1344
1345 __ Bind(&frame_entry_label_);
1346 bool skip_overflow_check =
1347 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1348 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1349
1350 if (!skip_overflow_check) {
1351 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1352 __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1353 RecordPcInfo(nullptr, 0);
1354 }
1355
1356 if (!HasEmptyFrame()) {
1357 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1358 Register reg = kCoreCalleeSaves[i];
1359 if (allocated_registers_.ContainsCoreRegister(reg)) {
1360 __ pushl(reg);
1361 __ cfi().AdjustCFAOffset(kX86WordSize);
1362 __ cfi().RelOffset(DWARFReg(reg), 0);
1363 }
1364 }
1365
1366 int adjust = GetFrameSize() - FrameEntrySpillSize();
1367 IncreaseFrame(adjust);
1368 // Save the current method if we need it. Note that we do not
1369 // do this in HCurrentMethod, as the instruction might have been removed
1370 // in the SSA graph.
1371 if (RequiresCurrentMethod()) {
1372 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1373 }
1374
1375 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1376 // Initialize should_deoptimize flag to 0.
1377 __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1378 }
1379 }
1380
1381 MaybeIncrementHotness(/* is_frame_entry= */ true);
1382 }
1383
GenerateFrameExit()1384 void CodeGeneratorX86::GenerateFrameExit() {
1385 __ cfi().RememberState();
1386 if (!HasEmptyFrame()) {
1387 int adjust = GetFrameSize() - FrameEntrySpillSize();
1388 DecreaseFrame(adjust);
1389
1390 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1391 Register reg = kCoreCalleeSaves[i];
1392 if (allocated_registers_.ContainsCoreRegister(reg)) {
1393 __ popl(reg);
1394 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1395 __ cfi().Restore(DWARFReg(reg));
1396 }
1397 }
1398 }
1399 __ ret();
1400 __ cfi().RestoreState();
1401 __ cfi().DefCFAOffset(GetFrameSize());
1402 }
1403
Bind(HBasicBlock * block)1404 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1405 __ Bind(GetLabelOf(block));
1406 }
1407
GetReturnLocation(DataType::Type type) const1408 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1409 switch (type) {
1410 case DataType::Type::kReference:
1411 case DataType::Type::kBool:
1412 case DataType::Type::kUint8:
1413 case DataType::Type::kInt8:
1414 case DataType::Type::kUint16:
1415 case DataType::Type::kInt16:
1416 case DataType::Type::kUint32:
1417 case DataType::Type::kInt32:
1418 return Location::RegisterLocation(EAX);
1419
1420 case DataType::Type::kUint64:
1421 case DataType::Type::kInt64:
1422 return Location::RegisterPairLocation(EAX, EDX);
1423
1424 case DataType::Type::kVoid:
1425 return Location::NoLocation();
1426
1427 case DataType::Type::kFloat64:
1428 case DataType::Type::kFloat32:
1429 return Location::FpuRegisterLocation(XMM0);
1430 }
1431
1432 UNREACHABLE();
1433 }
1434
GetMethodLocation() const1435 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1436 return Location::RegisterLocation(kMethodRegisterArgument);
1437 }
1438
GetNextLocation(DataType::Type type)1439 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1440 switch (type) {
1441 case DataType::Type::kReference:
1442 case DataType::Type::kBool:
1443 case DataType::Type::kUint8:
1444 case DataType::Type::kInt8:
1445 case DataType::Type::kUint16:
1446 case DataType::Type::kInt16:
1447 case DataType::Type::kInt32: {
1448 uint32_t index = gp_index_++;
1449 stack_index_++;
1450 if (index < calling_convention.GetNumberOfRegisters()) {
1451 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1452 } else {
1453 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1454 }
1455 }
1456
1457 case DataType::Type::kInt64: {
1458 uint32_t index = gp_index_;
1459 gp_index_ += 2;
1460 stack_index_ += 2;
1461 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1462 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1463 calling_convention.GetRegisterPairAt(index));
1464 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1465 } else {
1466 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1467 }
1468 }
1469
1470 case DataType::Type::kFloat32: {
1471 uint32_t index = float_index_++;
1472 stack_index_++;
1473 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1474 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1475 } else {
1476 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1477 }
1478 }
1479
1480 case DataType::Type::kFloat64: {
1481 uint32_t index = float_index_++;
1482 stack_index_ += 2;
1483 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1484 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1485 } else {
1486 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1487 }
1488 }
1489
1490 case DataType::Type::kUint32:
1491 case DataType::Type::kUint64:
1492 case DataType::Type::kVoid:
1493 LOG(FATAL) << "Unexpected parameter type " << type;
1494 UNREACHABLE();
1495 }
1496 return Location::NoLocation();
1497 }
1498
GetNextLocation(DataType::Type type)1499 Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1500 DCHECK_NE(type, DataType::Type::kReference);
1501
1502 Location location;
1503 if (DataType::Is64BitType(type)) {
1504 location = Location::DoubleStackSlot(stack_offset_);
1505 stack_offset_ += 2 * kFramePointerSize;
1506 } else {
1507 location = Location::StackSlot(stack_offset_);
1508 stack_offset_ += kFramePointerSize;
1509 }
1510 if (for_register_allocation_) {
1511 location = Location::Any();
1512 }
1513 return location;
1514 }
1515
GetReturnLocation(DataType::Type type) const1516 Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1517 // We perform conversion to the managed ABI return register after the call if needed.
1518 InvokeDexCallingConventionVisitorX86 dex_calling_convention;
1519 return dex_calling_convention.GetReturnLocation(type);
1520 }
1521
GetMethodLocation() const1522 Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
1523 // Pass the method in the hidden argument EAX.
1524 return Location::RegisterLocation(EAX);
1525 }
1526
Move32(Location destination,Location source)1527 void CodeGeneratorX86::Move32(Location destination, Location source) {
1528 if (source.Equals(destination)) {
1529 return;
1530 }
1531 if (destination.IsRegister()) {
1532 if (source.IsRegister()) {
1533 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1534 } else if (source.IsFpuRegister()) {
1535 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1536 } else if (source.IsConstant()) {
1537 int32_t value = GetInt32ValueOf(source.GetConstant());
1538 __ movl(destination.AsRegister<Register>(), Immediate(value));
1539 } else {
1540 DCHECK(source.IsStackSlot());
1541 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1542 }
1543 } else if (destination.IsFpuRegister()) {
1544 if (source.IsRegister()) {
1545 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1546 } else if (source.IsFpuRegister()) {
1547 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1548 } else {
1549 DCHECK(source.IsStackSlot());
1550 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1551 }
1552 } else {
1553 DCHECK(destination.IsStackSlot()) << destination;
1554 if (source.IsRegister()) {
1555 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1556 } else if (source.IsFpuRegister()) {
1557 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1558 } else if (source.IsConstant()) {
1559 HConstant* constant = source.GetConstant();
1560 int32_t value = GetInt32ValueOf(constant);
1561 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1562 } else {
1563 DCHECK(source.IsStackSlot());
1564 __ pushl(Address(ESP, source.GetStackIndex()));
1565 __ popl(Address(ESP, destination.GetStackIndex()));
1566 }
1567 }
1568 }
1569
Move64(Location destination,Location source)1570 void CodeGeneratorX86::Move64(Location destination, Location source) {
1571 if (source.Equals(destination)) {
1572 return;
1573 }
1574 if (destination.IsRegisterPair()) {
1575 if (source.IsRegisterPair()) {
1576 EmitParallelMoves(
1577 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1578 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1579 DataType::Type::kInt32,
1580 Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1581 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1582 DataType::Type::kInt32);
1583 } else if (source.IsFpuRegister()) {
1584 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1585 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1586 __ psrlq(src_reg, Immediate(32));
1587 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1588 } else {
1589 // No conflict possible, so just do the moves.
1590 DCHECK(source.IsDoubleStackSlot());
1591 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1592 __ movl(destination.AsRegisterPairHigh<Register>(),
1593 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1594 }
1595 } else if (destination.IsFpuRegister()) {
1596 if (source.IsFpuRegister()) {
1597 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1598 } else if (source.IsDoubleStackSlot()) {
1599 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1600 } else if (source.IsRegisterPair()) {
1601 size_t elem_size = DataType::Size(DataType::Type::kInt32);
1602 // Push the 2 source registers to the stack.
1603 __ pushl(source.AsRegisterPairHigh<Register>());
1604 __ cfi().AdjustCFAOffset(elem_size);
1605 __ pushl(source.AsRegisterPairLow<Register>());
1606 __ cfi().AdjustCFAOffset(elem_size);
1607 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1608 // And remove the temporary stack space we allocated.
1609 DecreaseFrame(2 * elem_size);
1610 } else {
1611 LOG(FATAL) << "Unimplemented";
1612 }
1613 } else {
1614 DCHECK(destination.IsDoubleStackSlot()) << destination;
1615 if (source.IsRegisterPair()) {
1616 // No conflict possible, so just do the moves.
1617 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1618 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1619 source.AsRegisterPairHigh<Register>());
1620 } else if (source.IsFpuRegister()) {
1621 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1622 } else if (source.IsConstant()) {
1623 HConstant* constant = source.GetConstant();
1624 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1625 int64_t value = GetInt64ValueOf(constant);
1626 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1627 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1628 Immediate(High32Bits(value)));
1629 } else {
1630 DCHECK(source.IsDoubleStackSlot()) << source;
1631 EmitParallelMoves(
1632 Location::StackSlot(source.GetStackIndex()),
1633 Location::StackSlot(destination.GetStackIndex()),
1634 DataType::Type::kInt32,
1635 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1636 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1637 DataType::Type::kInt32);
1638 }
1639 }
1640 }
1641
CreateAddress(Register base,Register index=Register::kNoRegister,ScaleFactor scale=TIMES_1,int32_t disp=0)1642 static Address CreateAddress(Register base,
1643 Register index = Register::kNoRegister,
1644 ScaleFactor scale = TIMES_1,
1645 int32_t disp = 0) {
1646 if (index == Register::kNoRegister) {
1647 return Address(base, disp);
1648 }
1649
1650 return Address(base, index, scale, disp);
1651 }
1652
LoadFromMemoryNoBarrier(DataType::Type dst_type,Location dst,Address src,HInstruction * instr,XmmRegister temp,bool is_atomic_load)1653 void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type,
1654 Location dst,
1655 Address src,
1656 HInstruction* instr,
1657 XmmRegister temp,
1658 bool is_atomic_load) {
1659 switch (dst_type) {
1660 case DataType::Type::kBool:
1661 case DataType::Type::kUint8:
1662 __ movzxb(dst.AsRegister<Register>(), src);
1663 break;
1664 case DataType::Type::kInt8:
1665 __ movsxb(dst.AsRegister<Register>(), src);
1666 break;
1667 case DataType::Type::kInt16:
1668 __ movsxw(dst.AsRegister<Register>(), src);
1669 break;
1670 case DataType::Type::kUint16:
1671 __ movzxw(dst.AsRegister<Register>(), src);
1672 break;
1673 case DataType::Type::kInt32:
1674 __ movl(dst.AsRegister<Register>(), src);
1675 break;
1676 case DataType::Type::kInt64: {
1677 if (is_atomic_load) {
1678 __ movsd(temp, src);
1679 if (instr != nullptr) {
1680 MaybeRecordImplicitNullCheck(instr);
1681 }
1682 __ movd(dst.AsRegisterPairLow<Register>(), temp);
1683 __ psrlq(temp, Immediate(32));
1684 __ movd(dst.AsRegisterPairHigh<Register>(), temp);
1685 } else {
1686 DCHECK_NE(src.GetBaseRegister(), dst.AsRegisterPairLow<Register>());
1687 Address src_high = Address::displace(src, kX86WordSize);
1688 __ movl(dst.AsRegisterPairLow<Register>(), src);
1689 if (instr != nullptr) {
1690 MaybeRecordImplicitNullCheck(instr);
1691 }
1692 __ movl(dst.AsRegisterPairHigh<Register>(), src_high);
1693 }
1694 break;
1695 }
1696 case DataType::Type::kFloat32:
1697 __ movss(dst.AsFpuRegister<XmmRegister>(), src);
1698 break;
1699 case DataType::Type::kFloat64:
1700 __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
1701 break;
1702 case DataType::Type::kReference:
1703 DCHECK(!gUseReadBarrier);
1704 __ movl(dst.AsRegister<Register>(), src);
1705 __ MaybeUnpoisonHeapReference(dst.AsRegister<Register>());
1706 break;
1707 default:
1708 LOG(FATAL) << "Unreachable type " << dst_type;
1709 }
1710 if (instr != nullptr && dst_type != DataType::Type::kInt64) {
1711 // kInt64 needs special handling that is done in the above switch.
1712 MaybeRecordImplicitNullCheck(instr);
1713 }
1714 }
1715
MoveToMemory(DataType::Type src_type,Location src,Register dst_base,Register dst_index,ScaleFactor dst_scale,int32_t dst_disp)1716 void CodeGeneratorX86::MoveToMemory(DataType::Type src_type,
1717 Location src,
1718 Register dst_base,
1719 Register dst_index,
1720 ScaleFactor dst_scale,
1721 int32_t dst_disp) {
1722 DCHECK(dst_base != Register::kNoRegister);
1723 Address dst = CreateAddress(dst_base, dst_index, dst_scale, dst_disp);
1724
1725 switch (src_type) {
1726 case DataType::Type::kBool:
1727 case DataType::Type::kUint8:
1728 case DataType::Type::kInt8: {
1729 if (src.IsConstant()) {
1730 __ movb(dst, Immediate(CodeGenerator::GetInt8ValueOf(src.GetConstant())));
1731 } else {
1732 __ movb(dst, src.AsRegister<ByteRegister>());
1733 }
1734 break;
1735 }
1736 case DataType::Type::kUint16:
1737 case DataType::Type::kInt16: {
1738 if (src.IsConstant()) {
1739 __ movw(dst, Immediate(CodeGenerator::GetInt16ValueOf(src.GetConstant())));
1740 } else {
1741 __ movw(dst, src.AsRegister<Register>());
1742 }
1743 break;
1744 }
1745 case DataType::Type::kUint32:
1746 case DataType::Type::kInt32: {
1747 if (src.IsConstant()) {
1748 int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1749 __ movl(dst, Immediate(v));
1750 } else {
1751 __ movl(dst, src.AsRegister<Register>());
1752 }
1753 break;
1754 }
1755 case DataType::Type::kUint64:
1756 case DataType::Type::kInt64: {
1757 Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1758 if (src.IsConstant()) {
1759 int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1760 __ movl(dst, Immediate(Low32Bits(v)));
1761 __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1762 } else {
1763 __ movl(dst, src.AsRegisterPairLow<Register>());
1764 __ movl(dst_next_4_bytes, src.AsRegisterPairHigh<Register>());
1765 }
1766 break;
1767 }
1768 case DataType::Type::kFloat32: {
1769 if (src.IsConstant()) {
1770 int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1771 __ movl(dst, Immediate(v));
1772 } else {
1773 __ movss(dst, src.AsFpuRegister<XmmRegister>());
1774 }
1775 break;
1776 }
1777 case DataType::Type::kFloat64: {
1778 Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1779 if (src.IsConstant()) {
1780 int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1781 __ movl(dst, Immediate(Low32Bits(v)));
1782 __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1783 } else {
1784 __ movsd(dst, src.AsFpuRegister<XmmRegister>());
1785 }
1786 break;
1787 }
1788 case DataType::Type::kVoid:
1789 case DataType::Type::kReference:
1790 LOG(FATAL) << "Unreachable type " << src_type;
1791 }
1792 }
1793
MoveConstant(Location location,int32_t value)1794 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1795 DCHECK(location.IsRegister());
1796 __ movl(location.AsRegister<Register>(), Immediate(value));
1797 }
1798
MoveLocation(Location dst,Location src,DataType::Type dst_type)1799 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1800 HParallelMove move(GetGraph()->GetAllocator());
1801 if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1802 move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1803 move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1804 } else {
1805 move.AddMove(src, dst, dst_type, nullptr);
1806 }
1807 GetMoveResolver()->EmitNativeCode(&move);
1808 }
1809
AddLocationAsTemp(Location location,LocationSummary * locations)1810 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1811 if (location.IsRegister()) {
1812 locations->AddTemp(location);
1813 } else if (location.IsRegisterPair()) {
1814 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1815 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1816 } else {
1817 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1818 }
1819 }
1820
HandleGoto(HInstruction * got,HBasicBlock * successor)1821 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1822 if (successor->IsExitBlock()) {
1823 DCHECK(got->GetPrevious()->AlwaysThrows());
1824 return; // no code needed
1825 }
1826
1827 HBasicBlock* block = got->GetBlock();
1828 HInstruction* previous = got->GetPrevious();
1829
1830 HLoopInformation* info = block->GetLoopInformation();
1831 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1832 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1833 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1834 return;
1835 }
1836
1837 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1838 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1839 }
1840 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1841 __ jmp(codegen_->GetLabelOf(successor));
1842 }
1843 }
1844
VisitGoto(HGoto * got)1845 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1846 got->SetLocations(nullptr);
1847 }
1848
VisitGoto(HGoto * got)1849 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1850 HandleGoto(got, got->GetSuccessor());
1851 }
1852
VisitTryBoundary(HTryBoundary * try_boundary)1853 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1854 try_boundary->SetLocations(nullptr);
1855 }
1856
VisitTryBoundary(HTryBoundary * try_boundary)1857 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1858 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1859 if (!successor->IsExitBlock()) {
1860 HandleGoto(try_boundary, successor);
1861 }
1862 }
1863
VisitExit(HExit * exit)1864 void LocationsBuilderX86::VisitExit(HExit* exit) {
1865 exit->SetLocations(nullptr);
1866 }
1867
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1868 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1869 }
1870
1871 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1872 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1873 LabelType* true_label,
1874 LabelType* false_label) {
1875 if (cond->IsFPConditionTrueIfNaN()) {
1876 __ j(kUnordered, true_label);
1877 } else if (cond->IsFPConditionFalseIfNaN()) {
1878 __ j(kUnordered, false_label);
1879 }
1880 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1881 }
1882
1883 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1884 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1885 LabelType* true_label,
1886 LabelType* false_label) {
1887 LocationSummary* locations = cond->GetLocations();
1888 Location left = locations->InAt(0);
1889 Location right = locations->InAt(1);
1890 IfCondition if_cond = cond->GetCondition();
1891
1892 Register left_high = left.AsRegisterPairHigh<Register>();
1893 Register left_low = left.AsRegisterPairLow<Register>();
1894 IfCondition true_high_cond = if_cond;
1895 IfCondition false_high_cond = cond->GetOppositeCondition();
1896 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part
1897
1898 // Set the conditions for the test, remembering that == needs to be
1899 // decided using the low words.
1900 switch (if_cond) {
1901 case kCondEQ:
1902 case kCondNE:
1903 // Nothing to do.
1904 break;
1905 case kCondLT:
1906 false_high_cond = kCondGT;
1907 break;
1908 case kCondLE:
1909 true_high_cond = kCondLT;
1910 break;
1911 case kCondGT:
1912 false_high_cond = kCondLT;
1913 break;
1914 case kCondGE:
1915 true_high_cond = kCondGT;
1916 break;
1917 case kCondB:
1918 false_high_cond = kCondA;
1919 break;
1920 case kCondBE:
1921 true_high_cond = kCondB;
1922 break;
1923 case kCondA:
1924 false_high_cond = kCondB;
1925 break;
1926 case kCondAE:
1927 true_high_cond = kCondA;
1928 break;
1929 }
1930
1931 if (right.IsConstant()) {
1932 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1933 int32_t val_high = High32Bits(value);
1934 int32_t val_low = Low32Bits(value);
1935
1936 codegen_->Compare32BitValue(left_high, val_high);
1937 if (if_cond == kCondNE) {
1938 __ j(X86Condition(true_high_cond), true_label);
1939 } else if (if_cond == kCondEQ) {
1940 __ j(X86Condition(false_high_cond), false_label);
1941 } else {
1942 __ j(X86Condition(true_high_cond), true_label);
1943 __ j(X86Condition(false_high_cond), false_label);
1944 }
1945 // Must be equal high, so compare the lows.
1946 codegen_->Compare32BitValue(left_low, val_low);
1947 } else if (right.IsRegisterPair()) {
1948 Register right_high = right.AsRegisterPairHigh<Register>();
1949 Register right_low = right.AsRegisterPairLow<Register>();
1950
1951 __ cmpl(left_high, right_high);
1952 if (if_cond == kCondNE) {
1953 __ j(X86Condition(true_high_cond), true_label);
1954 } else if (if_cond == kCondEQ) {
1955 __ j(X86Condition(false_high_cond), false_label);
1956 } else {
1957 __ j(X86Condition(true_high_cond), true_label);
1958 __ j(X86Condition(false_high_cond), false_label);
1959 }
1960 // Must be equal high, so compare the lows.
1961 __ cmpl(left_low, right_low);
1962 } else {
1963 DCHECK(right.IsDoubleStackSlot());
1964 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1965 if (if_cond == kCondNE) {
1966 __ j(X86Condition(true_high_cond), true_label);
1967 } else if (if_cond == kCondEQ) {
1968 __ j(X86Condition(false_high_cond), false_label);
1969 } else {
1970 __ j(X86Condition(true_high_cond), true_label);
1971 __ j(X86Condition(false_high_cond), false_label);
1972 }
1973 // Must be equal high, so compare the lows.
1974 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1975 }
1976 // The last comparison might be unsigned.
1977 __ j(final_condition, true_label);
1978 }
1979
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1980 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1981 Location rhs,
1982 HInstruction* insn,
1983 bool is_double) {
1984 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1985 if (is_double) {
1986 if (rhs.IsFpuRegister()) {
1987 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1988 } else if (const_area != nullptr) {
1989 DCHECK(const_area->IsEmittedAtUseSite());
1990 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1991 codegen_->LiteralDoubleAddress(
1992 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1993 const_area->GetBaseMethodAddress(),
1994 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1995 } else {
1996 DCHECK(rhs.IsDoubleStackSlot());
1997 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1998 }
1999 } else {
2000 if (rhs.IsFpuRegister()) {
2001 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
2002 } else if (const_area != nullptr) {
2003 DCHECK(const_area->IsEmittedAtUseSite());
2004 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
2005 codegen_->LiteralFloatAddress(
2006 const_area->GetConstant()->AsFloatConstant()->GetValue(),
2007 const_area->GetBaseMethodAddress(),
2008 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
2009 } else {
2010 DCHECK(rhs.IsStackSlot());
2011 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
2012 }
2013 }
2014 }
2015
2016 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2017 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
2018 LabelType* true_target_in,
2019 LabelType* false_target_in) {
2020 // Generated branching requires both targets to be explicit. If either of the
2021 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2022 LabelType fallthrough_target;
2023 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2024 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2025
2026 LocationSummary* locations = condition->GetLocations();
2027 Location left = locations->InAt(0);
2028 Location right = locations->InAt(1);
2029
2030 DataType::Type type = condition->InputAt(0)->GetType();
2031 switch (type) {
2032 case DataType::Type::kInt64:
2033 GenerateLongComparesAndJumps(condition, true_target, false_target);
2034 break;
2035 case DataType::Type::kFloat32:
2036 GenerateFPCompare(left, right, condition, false);
2037 GenerateFPJumps(condition, true_target, false_target);
2038 break;
2039 case DataType::Type::kFloat64:
2040 GenerateFPCompare(left, right, condition, true);
2041 GenerateFPJumps(condition, true_target, false_target);
2042 break;
2043 default:
2044 LOG(FATAL) << "Unexpected compare type " << type;
2045 }
2046
2047 if (false_target != &fallthrough_target) {
2048 __ jmp(false_target);
2049 }
2050
2051 if (fallthrough_target.IsLinked()) {
2052 __ Bind(&fallthrough_target);
2053 }
2054 }
2055
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)2056 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
2057 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2058 // are set only strictly before `branch`. We can't use the eflags on long/FP
2059 // conditions if they are materialized due to the complex branching.
2060 return cond->IsCondition() &&
2061 cond->GetNext() == branch &&
2062 cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
2063 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
2064 }
2065
2066 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2067 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
2068 size_t condition_input_index,
2069 LabelType* true_target,
2070 LabelType* false_target) {
2071 HInstruction* cond = instruction->InputAt(condition_input_index);
2072
2073 if (true_target == nullptr && false_target == nullptr) {
2074 // Nothing to do. The code always falls through.
2075 return;
2076 } else if (cond->IsIntConstant()) {
2077 // Constant condition, statically compared against "true" (integer value 1).
2078 if (cond->AsIntConstant()->IsTrue()) {
2079 if (true_target != nullptr) {
2080 __ jmp(true_target);
2081 }
2082 } else {
2083 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2084 if (false_target != nullptr) {
2085 __ jmp(false_target);
2086 }
2087 }
2088 return;
2089 }
2090
2091 // The following code generates these patterns:
2092 // (1) true_target == nullptr && false_target != nullptr
2093 // - opposite condition true => branch to false_target
2094 // (2) true_target != nullptr && false_target == nullptr
2095 // - condition true => branch to true_target
2096 // (3) true_target != nullptr && false_target != nullptr
2097 // - condition true => branch to true_target
2098 // - branch to false_target
2099 if (IsBooleanValueOrMaterializedCondition(cond)) {
2100 if (AreEflagsSetFrom(cond, instruction)) {
2101 if (true_target == nullptr) {
2102 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
2103 } else {
2104 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
2105 }
2106 } else {
2107 // Materialized condition, compare against 0.
2108 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2109 if (lhs.IsRegister()) {
2110 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
2111 } else {
2112 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
2113 }
2114 if (true_target == nullptr) {
2115 __ j(kEqual, false_target);
2116 } else {
2117 __ j(kNotEqual, true_target);
2118 }
2119 }
2120 } else {
2121 // Condition has not been materialized, use its inputs as the comparison and
2122 // its condition as the branch condition.
2123 HCondition* condition = cond->AsCondition();
2124
2125 // If this is a long or FP comparison that has been folded into
2126 // the HCondition, generate the comparison directly.
2127 DataType::Type type = condition->InputAt(0)->GetType();
2128 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2129 GenerateCompareTestAndBranch(condition, true_target, false_target);
2130 return;
2131 }
2132
2133 Location lhs = condition->GetLocations()->InAt(0);
2134 Location rhs = condition->GetLocations()->InAt(1);
2135 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
2136 codegen_->GenerateIntCompare(lhs, rhs);
2137 if (true_target == nullptr) {
2138 __ j(X86Condition(condition->GetOppositeCondition()), false_target);
2139 } else {
2140 __ j(X86Condition(condition->GetCondition()), true_target);
2141 }
2142 }
2143
2144 // If neither branch falls through (case 3), the conditional branch to `true_target`
2145 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2146 if (true_target != nullptr && false_target != nullptr) {
2147 __ jmp(false_target);
2148 }
2149 }
2150
VisitIf(HIf * if_instr)2151 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
2152 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2153 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2154 locations->SetInAt(0, Location::Any());
2155 }
2156 }
2157
VisitIf(HIf * if_instr)2158 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
2159 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2160 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2161 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2162 nullptr : codegen_->GetLabelOf(true_successor);
2163 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2164 nullptr : codegen_->GetLabelOf(false_successor);
2165 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2166 }
2167
VisitDeoptimize(HDeoptimize * deoptimize)2168 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2169 LocationSummary* locations = new (GetGraph()->GetAllocator())
2170 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2171 InvokeRuntimeCallingConvention calling_convention;
2172 RegisterSet caller_saves = RegisterSet::Empty();
2173 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2174 locations->SetCustomSlowPathCallerSaves(caller_saves);
2175 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2176 locations->SetInAt(0, Location::Any());
2177 }
2178 }
2179
VisitDeoptimize(HDeoptimize * deoptimize)2180 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2181 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
2182 GenerateTestAndBranch<Label>(deoptimize,
2183 /* condition_input_index= */ 0,
2184 slow_path->GetEntryLabel(),
2185 /* false_target= */ nullptr);
2186 }
2187
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2188 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2189 LocationSummary* locations = new (GetGraph()->GetAllocator())
2190 LocationSummary(flag, LocationSummary::kNoCall);
2191 locations->SetOut(Location::RequiresRegister());
2192 }
2193
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2194 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2195 __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
2196 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2197 }
2198
SelectCanUseCMOV(HSelect * select)2199 static bool SelectCanUseCMOV(HSelect* select) {
2200 // There are no conditional move instructions for XMMs.
2201 if (DataType::IsFloatingPointType(select->GetType())) {
2202 return false;
2203 }
2204
2205 // A FP condition doesn't generate the single CC that we need.
2206 // In 32 bit mode, a long condition doesn't generate a single CC either.
2207 HInstruction* condition = select->GetCondition();
2208 if (condition->IsCondition()) {
2209 DataType::Type compare_type = condition->InputAt(0)->GetType();
2210 if (compare_type == DataType::Type::kInt64 ||
2211 DataType::IsFloatingPointType(compare_type)) {
2212 return false;
2213 }
2214 }
2215
2216 // We can generate a CMOV for this Select.
2217 return true;
2218 }
2219
VisitSelect(HSelect * select)2220 void LocationsBuilderX86::VisitSelect(HSelect* select) {
2221 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2222 if (DataType::IsFloatingPointType(select->GetType())) {
2223 locations->SetInAt(0, Location::RequiresFpuRegister());
2224 locations->SetInAt(1, Location::Any());
2225 } else {
2226 locations->SetInAt(0, Location::RequiresRegister());
2227 if (SelectCanUseCMOV(select)) {
2228 if (select->InputAt(1)->IsConstant()) {
2229 // Cmov can't handle a constant value.
2230 locations->SetInAt(1, Location::RequiresRegister());
2231 } else {
2232 locations->SetInAt(1, Location::Any());
2233 }
2234 } else {
2235 locations->SetInAt(1, Location::Any());
2236 }
2237 }
2238 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2239 locations->SetInAt(2, Location::RequiresRegister());
2240 }
2241 locations->SetOut(Location::SameAsFirstInput());
2242 }
2243
VisitSelect(HSelect * select)2244 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
2245 LocationSummary* locations = select->GetLocations();
2246 DCHECK(locations->InAt(0).Equals(locations->Out()));
2247 if (SelectCanUseCMOV(select)) {
2248 // If both the condition and the source types are integer, we can generate
2249 // a CMOV to implement Select.
2250
2251 HInstruction* select_condition = select->GetCondition();
2252 Condition cond = kNotEqual;
2253
2254 // Figure out how to test the 'condition'.
2255 if (select_condition->IsCondition()) {
2256 HCondition* condition = select_condition->AsCondition();
2257 if (!condition->IsEmittedAtUseSite()) {
2258 // This was a previously materialized condition.
2259 // Can we use the existing condition code?
2260 if (AreEflagsSetFrom(condition, select)) {
2261 // Materialization was the previous instruction. Condition codes are right.
2262 cond = X86Condition(condition->GetCondition());
2263 } else {
2264 // No, we have to recreate the condition code.
2265 Register cond_reg = locations->InAt(2).AsRegister<Register>();
2266 __ testl(cond_reg, cond_reg);
2267 }
2268 } else {
2269 // We can't handle FP or long here.
2270 DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
2271 DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
2272 LocationSummary* cond_locations = condition->GetLocations();
2273 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
2274 cond = X86Condition(condition->GetCondition());
2275 }
2276 } else {
2277 // Must be a Boolean condition, which needs to be compared to 0.
2278 Register cond_reg = locations->InAt(2).AsRegister<Register>();
2279 __ testl(cond_reg, cond_reg);
2280 }
2281
2282 // If the condition is true, overwrite the output, which already contains false.
2283 Location false_loc = locations->InAt(0);
2284 Location true_loc = locations->InAt(1);
2285 if (select->GetType() == DataType::Type::kInt64) {
2286 // 64 bit conditional move.
2287 Register false_high = false_loc.AsRegisterPairHigh<Register>();
2288 Register false_low = false_loc.AsRegisterPairLow<Register>();
2289 if (true_loc.IsRegisterPair()) {
2290 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
2291 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
2292 } else {
2293 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
2294 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
2295 }
2296 } else {
2297 // 32 bit conditional move.
2298 Register false_reg = false_loc.AsRegister<Register>();
2299 if (true_loc.IsRegister()) {
2300 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
2301 } else {
2302 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
2303 }
2304 }
2305 } else {
2306 NearLabel false_target;
2307 GenerateTestAndBranch<NearLabel>(
2308 select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
2309 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2310 __ Bind(&false_target);
2311 }
2312 }
2313
VisitNop(HNop * nop)2314 void LocationsBuilderX86::VisitNop(HNop* nop) {
2315 new (GetGraph()->GetAllocator()) LocationSummary(nop);
2316 }
2317
VisitNop(HNop *)2318 void InstructionCodeGeneratorX86::VisitNop(HNop*) {
2319 // The environment recording already happened in CodeGenerator::Compile.
2320 }
2321
IncreaseFrame(size_t adjustment)2322 void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
2323 __ subl(ESP, Immediate(adjustment));
2324 __ cfi().AdjustCFAOffset(adjustment);
2325 }
2326
DecreaseFrame(size_t adjustment)2327 void CodeGeneratorX86::DecreaseFrame(size_t adjustment) {
2328 __ addl(ESP, Immediate(adjustment));
2329 __ cfi().AdjustCFAOffset(-adjustment);
2330 }
2331
GenerateNop()2332 void CodeGeneratorX86::GenerateNop() {
2333 __ nop();
2334 }
2335
HandleCondition(HCondition * cond)2336 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
2337 LocationSummary* locations =
2338 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2339 // Handle the long/FP comparisons made in instruction simplification.
2340 switch (cond->InputAt(0)->GetType()) {
2341 case DataType::Type::kInt64: {
2342 locations->SetInAt(0, Location::RequiresRegister());
2343 locations->SetInAt(1, Location::Any());
2344 if (!cond->IsEmittedAtUseSite()) {
2345 locations->SetOut(Location::RequiresRegister());
2346 }
2347 break;
2348 }
2349 case DataType::Type::kFloat32:
2350 case DataType::Type::kFloat64: {
2351 locations->SetInAt(0, Location::RequiresFpuRegister());
2352 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
2353 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
2354 } else if (cond->InputAt(1)->IsConstant()) {
2355 locations->SetInAt(1, Location::RequiresFpuRegister());
2356 } else {
2357 locations->SetInAt(1, Location::Any());
2358 }
2359 if (!cond->IsEmittedAtUseSite()) {
2360 locations->SetOut(Location::RequiresRegister());
2361 }
2362 break;
2363 }
2364 default:
2365 locations->SetInAt(0, Location::RequiresRegister());
2366 locations->SetInAt(1, Location::Any());
2367 if (!cond->IsEmittedAtUseSite()) {
2368 // We need a byte register.
2369 locations->SetOut(Location::RegisterLocation(ECX));
2370 }
2371 break;
2372 }
2373 }
2374
HandleCondition(HCondition * cond)2375 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
2376 if (cond->IsEmittedAtUseSite()) {
2377 return;
2378 }
2379
2380 LocationSummary* locations = cond->GetLocations();
2381 Location lhs = locations->InAt(0);
2382 Location rhs = locations->InAt(1);
2383 Register reg = locations->Out().AsRegister<Register>();
2384 NearLabel true_label, false_label;
2385
2386 switch (cond->InputAt(0)->GetType()) {
2387 default: {
2388 // Integer case.
2389
2390 // Clear output register: setb only sets the low byte.
2391 __ xorl(reg, reg);
2392 codegen_->GenerateIntCompare(lhs, rhs);
2393 __ setb(X86Condition(cond->GetCondition()), reg);
2394 return;
2395 }
2396 case DataType::Type::kInt64:
2397 GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2398 break;
2399 case DataType::Type::kFloat32:
2400 GenerateFPCompare(lhs, rhs, cond, false);
2401 GenerateFPJumps(cond, &true_label, &false_label);
2402 break;
2403 case DataType::Type::kFloat64:
2404 GenerateFPCompare(lhs, rhs, cond, true);
2405 GenerateFPJumps(cond, &true_label, &false_label);
2406 break;
2407 }
2408
2409 // Convert the jumps into the result.
2410 NearLabel done_label;
2411
2412 // False case: result = 0.
2413 __ Bind(&false_label);
2414 __ xorl(reg, reg);
2415 __ jmp(&done_label);
2416
2417 // True case: result = 1.
2418 __ Bind(&true_label);
2419 __ movl(reg, Immediate(1));
2420 __ Bind(&done_label);
2421 }
2422
VisitEqual(HEqual * comp)2423 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2424 HandleCondition(comp);
2425 }
2426
VisitEqual(HEqual * comp)2427 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2428 HandleCondition(comp);
2429 }
2430
VisitNotEqual(HNotEqual * comp)2431 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2432 HandleCondition(comp);
2433 }
2434
VisitNotEqual(HNotEqual * comp)2435 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2436 HandleCondition(comp);
2437 }
2438
VisitLessThan(HLessThan * comp)2439 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2440 HandleCondition(comp);
2441 }
2442
VisitLessThan(HLessThan * comp)2443 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2444 HandleCondition(comp);
2445 }
2446
VisitLessThanOrEqual(HLessThanOrEqual * comp)2447 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2448 HandleCondition(comp);
2449 }
2450
VisitLessThanOrEqual(HLessThanOrEqual * comp)2451 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2452 HandleCondition(comp);
2453 }
2454
VisitGreaterThan(HGreaterThan * comp)2455 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2456 HandleCondition(comp);
2457 }
2458
VisitGreaterThan(HGreaterThan * comp)2459 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2460 HandleCondition(comp);
2461 }
2462
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2463 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2464 HandleCondition(comp);
2465 }
2466
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2467 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2468 HandleCondition(comp);
2469 }
2470
VisitBelow(HBelow * comp)2471 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2472 HandleCondition(comp);
2473 }
2474
VisitBelow(HBelow * comp)2475 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2476 HandleCondition(comp);
2477 }
2478
VisitBelowOrEqual(HBelowOrEqual * comp)2479 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2480 HandleCondition(comp);
2481 }
2482
VisitBelowOrEqual(HBelowOrEqual * comp)2483 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2484 HandleCondition(comp);
2485 }
2486
VisitAbove(HAbove * comp)2487 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2488 HandleCondition(comp);
2489 }
2490
VisitAbove(HAbove * comp)2491 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2492 HandleCondition(comp);
2493 }
2494
VisitAboveOrEqual(HAboveOrEqual * comp)2495 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2496 HandleCondition(comp);
2497 }
2498
VisitAboveOrEqual(HAboveOrEqual * comp)2499 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2500 HandleCondition(comp);
2501 }
2502
VisitIntConstant(HIntConstant * constant)2503 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2504 LocationSummary* locations =
2505 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2506 locations->SetOut(Location::ConstantLocation(constant));
2507 }
2508
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2509 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2510 // Will be generated at use site.
2511 }
2512
VisitNullConstant(HNullConstant * constant)2513 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2514 LocationSummary* locations =
2515 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2516 locations->SetOut(Location::ConstantLocation(constant));
2517 }
2518
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2519 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2520 // Will be generated at use site.
2521 }
2522
VisitLongConstant(HLongConstant * constant)2523 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2524 LocationSummary* locations =
2525 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2526 locations->SetOut(Location::ConstantLocation(constant));
2527 }
2528
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2529 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2530 // Will be generated at use site.
2531 }
2532
VisitFloatConstant(HFloatConstant * constant)2533 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2534 LocationSummary* locations =
2535 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2536 locations->SetOut(Location::ConstantLocation(constant));
2537 }
2538
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2539 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2540 // Will be generated at use site.
2541 }
2542
VisitDoubleConstant(HDoubleConstant * constant)2543 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2544 LocationSummary* locations =
2545 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2546 locations->SetOut(Location::ConstantLocation(constant));
2547 }
2548
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2549 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2550 // Will be generated at use site.
2551 }
2552
VisitConstructorFence(HConstructorFence * constructor_fence)2553 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2554 constructor_fence->SetLocations(nullptr);
2555 }
2556
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2557 void InstructionCodeGeneratorX86::VisitConstructorFence(
2558 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2559 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2560 }
2561
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2562 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2563 memory_barrier->SetLocations(nullptr);
2564 }
2565
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2566 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2567 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2568 }
2569
VisitReturnVoid(HReturnVoid * ret)2570 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2571 ret->SetLocations(nullptr);
2572 }
2573
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2574 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2575 codegen_->GenerateFrameExit();
2576 }
2577
VisitReturn(HReturn * ret)2578 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2579 LocationSummary* locations =
2580 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2581 SetInForReturnValue(ret, locations);
2582 }
2583
VisitReturn(HReturn * ret)2584 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2585 switch (ret->InputAt(0)->GetType()) {
2586 case DataType::Type::kReference:
2587 case DataType::Type::kBool:
2588 case DataType::Type::kUint8:
2589 case DataType::Type::kInt8:
2590 case DataType::Type::kUint16:
2591 case DataType::Type::kInt16:
2592 case DataType::Type::kInt32:
2593 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2594 break;
2595
2596 case DataType::Type::kInt64:
2597 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2598 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2599 break;
2600
2601 case DataType::Type::kFloat32:
2602 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2603 if (GetGraph()->IsCompilingOsr()) {
2604 // To simplify callers of an OSR method, we put the return value in both
2605 // floating point and core registers.
2606 __ movd(EAX, XMM0);
2607 }
2608 break;
2609
2610 case DataType::Type::kFloat64:
2611 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2612 if (GetGraph()->IsCompilingOsr()) {
2613 // To simplify callers of an OSR method, we put the return value in both
2614 // floating point and core registers.
2615 __ movd(EAX, XMM0);
2616 // Use XMM1 as temporary register to not clobber XMM0.
2617 __ movaps(XMM1, XMM0);
2618 __ psrlq(XMM1, Immediate(32));
2619 __ movd(EDX, XMM1);
2620 }
2621 break;
2622
2623 default:
2624 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2625 }
2626 codegen_->GenerateFrameExit();
2627 }
2628
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2629 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2630 // The trampoline uses the same calling convention as dex calling conventions,
2631 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2632 // the method_idx.
2633 HandleInvoke(invoke);
2634 }
2635
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2636 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2637 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2638 }
2639
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2640 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2641 // Explicit clinit checks triggered by static invokes must have been pruned by
2642 // art::PrepareForRegisterAllocation.
2643 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2644
2645 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2646 if (intrinsic.TryDispatch(invoke)) {
2647 if (invoke->GetLocations()->CanCall() &&
2648 invoke->HasPcRelativeMethodLoadKind() &&
2649 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2650 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2651 }
2652 return;
2653 }
2654
2655 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2656 CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
2657 /*for_register_allocation=*/ true);
2658 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2659 } else {
2660 HandleInvoke(invoke);
2661 }
2662
2663 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2664 if (invoke->HasPcRelativeMethodLoadKind()) {
2665 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2666 }
2667 }
2668
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2669 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2670 if (invoke->GetLocations()->Intrinsified()) {
2671 IntrinsicCodeGeneratorX86 intrinsic(codegen);
2672 intrinsic.Dispatch(invoke);
2673 return true;
2674 }
2675 return false;
2676 }
2677
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2678 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2679 // Explicit clinit checks triggered by static invokes must have been pruned by
2680 // art::PrepareForRegisterAllocation.
2681 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2682
2683 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2684 return;
2685 }
2686
2687 LocationSummary* locations = invoke->GetLocations();
2688 codegen_->GenerateStaticOrDirectCall(
2689 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2690 }
2691
VisitInvokeVirtual(HInvokeVirtual * invoke)2692 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2693 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2694 if (intrinsic.TryDispatch(invoke)) {
2695 return;
2696 }
2697
2698 HandleInvoke(invoke);
2699
2700 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2701 // Add one temporary for inline cache update.
2702 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2703 }
2704 }
2705
HandleInvoke(HInvoke * invoke)2706 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2707 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2708 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2709 }
2710
VisitInvokeVirtual(HInvokeVirtual * invoke)2711 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2712 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2713 return;
2714 }
2715
2716 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2717 DCHECK(!codegen_->IsLeafMethod());
2718 }
2719
VisitInvokeInterface(HInvokeInterface * invoke)2720 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2721 // This call to HandleInvoke allocates a temporary (core) register
2722 // which is also used to transfer the hidden argument from FP to
2723 // core register.
2724 HandleInvoke(invoke);
2725 // Add the hidden argument.
2726 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2727
2728 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2729 // Add one temporary for inline cache update.
2730 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2731 }
2732
2733 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2734 if (IsPcRelativeMethodLoadKind(invoke->GetHiddenArgumentLoadKind())) {
2735 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2736 }
2737
2738 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2739 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2740 Location::RequiresRegister());
2741 }
2742 }
2743
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2744 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2745 DCHECK_EQ(EAX, klass);
2746 // We know the destination of an intrinsic, so no need to record inline
2747 // caches (also the intrinsic location builder doesn't request an additional
2748 // temporary).
2749 if (!instruction->GetLocations()->Intrinsified() &&
2750 GetGraph()->IsCompilingBaseline() &&
2751 !Runtime::Current()->IsAotCompiler()) {
2752 DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
2753 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2754 DCHECK(info != nullptr);
2755 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2756 uint32_t address = reinterpret_cast32<uint32_t>(cache);
2757 if (kIsDebugBuild) {
2758 uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2759 CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2760 }
2761 Register temp = EBP;
2762 NearLabel done;
2763 __ movl(temp, Immediate(address));
2764 // Fast path for a monomorphic cache.
2765 __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2766 __ j(kEqual, &done);
2767 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2768 __ Bind(&done);
2769 }
2770 }
2771
VisitInvokeInterface(HInvokeInterface * invoke)2772 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2773 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2774 LocationSummary* locations = invoke->GetLocations();
2775 Register temp = locations->GetTemp(0).AsRegister<Register>();
2776 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2777 Location receiver = locations->InAt(0);
2778 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2779
2780 // Set the hidden argument. This is safe to do this here, as XMM7
2781 // won't be modified thereafter, before the `call` instruction.
2782 DCHECK_EQ(XMM7, hidden_reg);
2783 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2784 __ movd(hidden_reg, locations->InAt(invoke->GetNumberOfArguments() - 1).AsRegister<Register>());
2785 } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2786 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), locations->GetTemp(0), invoke);
2787 __ movd(hidden_reg, temp);
2788 }
2789
2790 if (receiver.IsStackSlot()) {
2791 __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2792 // /* HeapReference<Class> */ temp = temp->klass_
2793 __ movl(temp, Address(temp, class_offset));
2794 } else {
2795 // /* HeapReference<Class> */ temp = receiver->klass_
2796 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2797 }
2798 codegen_->MaybeRecordImplicitNullCheck(invoke);
2799 // Instead of simply (possibly) unpoisoning `temp` here, we should
2800 // emit a read barrier for the previous class reference load.
2801 // However this is not required in practice, as this is an
2802 // intermediate/temporary reference and because the current
2803 // concurrent copying collector keeps the from-space memory
2804 // intact/accessible until the end of the marking phase (the
2805 // concurrent copying collector may not in the future).
2806 __ MaybeUnpoisonHeapReference(temp);
2807
2808 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2809
2810 // temp = temp->GetAddressOfIMT()
2811 __ movl(temp,
2812 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2813 // temp = temp->GetImtEntryAt(method_offset);
2814 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2815 invoke->GetImtIndex(), kX86PointerSize));
2816 __ movl(temp, Address(temp, method_offset));
2817 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2818 // We pass the method from the IMT in case of a conflict. This will ensure
2819 // we go into the runtime to resolve the actual method.
2820 __ movd(hidden_reg, temp);
2821 }
2822 // call temp->GetEntryPoint();
2823 __ call(Address(temp,
2824 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2825
2826 DCHECK(!codegen_->IsLeafMethod());
2827 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2828 }
2829
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2830 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2831 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2832 if (intrinsic.TryDispatch(invoke)) {
2833 return;
2834 }
2835 HandleInvoke(invoke);
2836 }
2837
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2838 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2839 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2840 return;
2841 }
2842 codegen_->GenerateInvokePolymorphicCall(invoke);
2843 }
2844
VisitInvokeCustom(HInvokeCustom * invoke)2845 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2846 HandleInvoke(invoke);
2847 }
2848
VisitInvokeCustom(HInvokeCustom * invoke)2849 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2850 codegen_->GenerateInvokeCustomCall(invoke);
2851 }
2852
VisitNeg(HNeg * neg)2853 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2854 LocationSummary* locations =
2855 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2856 switch (neg->GetResultType()) {
2857 case DataType::Type::kInt32:
2858 case DataType::Type::kInt64:
2859 locations->SetInAt(0, Location::RequiresRegister());
2860 locations->SetOut(Location::SameAsFirstInput());
2861 break;
2862
2863 case DataType::Type::kFloat32:
2864 locations->SetInAt(0, Location::RequiresFpuRegister());
2865 locations->SetOut(Location::SameAsFirstInput());
2866 locations->AddTemp(Location::RequiresRegister());
2867 locations->AddTemp(Location::RequiresFpuRegister());
2868 break;
2869
2870 case DataType::Type::kFloat64:
2871 locations->SetInAt(0, Location::RequiresFpuRegister());
2872 locations->SetOut(Location::SameAsFirstInput());
2873 locations->AddTemp(Location::RequiresFpuRegister());
2874 break;
2875
2876 default:
2877 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2878 }
2879 }
2880
VisitNeg(HNeg * neg)2881 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2882 LocationSummary* locations = neg->GetLocations();
2883 Location out = locations->Out();
2884 Location in = locations->InAt(0);
2885 switch (neg->GetResultType()) {
2886 case DataType::Type::kInt32:
2887 DCHECK(in.IsRegister());
2888 DCHECK(in.Equals(out));
2889 __ negl(out.AsRegister<Register>());
2890 break;
2891
2892 case DataType::Type::kInt64:
2893 DCHECK(in.IsRegisterPair());
2894 DCHECK(in.Equals(out));
2895 __ negl(out.AsRegisterPairLow<Register>());
2896 // Negation is similar to subtraction from zero. The least
2897 // significant byte triggers a borrow when it is different from
2898 // zero; to take it into account, add 1 to the most significant
2899 // byte if the carry flag (CF) is set to 1 after the first NEGL
2900 // operation.
2901 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2902 __ negl(out.AsRegisterPairHigh<Register>());
2903 break;
2904
2905 case DataType::Type::kFloat32: {
2906 DCHECK(in.Equals(out));
2907 Register constant = locations->GetTemp(0).AsRegister<Register>();
2908 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2909 // Implement float negation with an exclusive or with value
2910 // 0x80000000 (mask for bit 31, representing the sign of a
2911 // single-precision floating-point number).
2912 __ movl(constant, Immediate(INT32_C(0x80000000)));
2913 __ movd(mask, constant);
2914 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2915 break;
2916 }
2917
2918 case DataType::Type::kFloat64: {
2919 DCHECK(in.Equals(out));
2920 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2921 // Implement double negation with an exclusive or with value
2922 // 0x8000000000000000 (mask for bit 63, representing the sign of
2923 // a double-precision floating-point number).
2924 __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2925 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2926 break;
2927 }
2928
2929 default:
2930 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2931 }
2932 }
2933
VisitX86FPNeg(HX86FPNeg * neg)2934 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2935 LocationSummary* locations =
2936 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2937 DCHECK(DataType::IsFloatingPointType(neg->GetType()));
2938 locations->SetInAt(0, Location::RequiresFpuRegister());
2939 locations->SetInAt(1, Location::RequiresRegister());
2940 locations->SetOut(Location::SameAsFirstInput());
2941 locations->AddTemp(Location::RequiresFpuRegister());
2942 }
2943
VisitX86FPNeg(HX86FPNeg * neg)2944 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2945 LocationSummary* locations = neg->GetLocations();
2946 Location out = locations->Out();
2947 DCHECK(locations->InAt(0).Equals(out));
2948
2949 Register constant_area = locations->InAt(1).AsRegister<Register>();
2950 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2951 if (neg->GetType() == DataType::Type::kFloat32) {
2952 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2953 neg->GetBaseMethodAddress(),
2954 constant_area));
2955 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2956 } else {
2957 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2958 neg->GetBaseMethodAddress(),
2959 constant_area));
2960 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2961 }
2962 }
2963
VisitTypeConversion(HTypeConversion * conversion)2964 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2965 DataType::Type result_type = conversion->GetResultType();
2966 DataType::Type input_type = conversion->GetInputType();
2967 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2968 << input_type << " -> " << result_type;
2969
2970 // The float-to-long and double-to-long type conversions rely on a
2971 // call to the runtime.
2972 LocationSummary::CallKind call_kind =
2973 ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
2974 && result_type == DataType::Type::kInt64)
2975 ? LocationSummary::kCallOnMainOnly
2976 : LocationSummary::kNoCall;
2977 LocationSummary* locations =
2978 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
2979
2980 switch (result_type) {
2981 case DataType::Type::kUint8:
2982 case DataType::Type::kInt8:
2983 switch (input_type) {
2984 case DataType::Type::kUint8:
2985 case DataType::Type::kInt8:
2986 case DataType::Type::kUint16:
2987 case DataType::Type::kInt16:
2988 case DataType::Type::kInt32:
2989 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2990 // Make the output overlap to please the register allocator. This greatly simplifies
2991 // the validation of the linear scan implementation
2992 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2993 break;
2994 case DataType::Type::kInt64: {
2995 HInstruction* input = conversion->InputAt(0);
2996 Location input_location = input->IsConstant()
2997 ? Location::ConstantLocation(input)
2998 : Location::RegisterPairLocation(EAX, EDX);
2999 locations->SetInAt(0, input_location);
3000 // Make the output overlap to please the register allocator. This greatly simplifies
3001 // the validation of the linear scan implementation
3002 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3003 break;
3004 }
3005
3006 default:
3007 LOG(FATAL) << "Unexpected type conversion from " << input_type
3008 << " to " << result_type;
3009 }
3010 break;
3011
3012 case DataType::Type::kUint16:
3013 case DataType::Type::kInt16:
3014 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3015 locations->SetInAt(0, Location::Any());
3016 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3017 break;
3018
3019 case DataType::Type::kInt32:
3020 switch (input_type) {
3021 case DataType::Type::kInt64:
3022 locations->SetInAt(0, Location::Any());
3023 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3024 break;
3025
3026 case DataType::Type::kFloat32:
3027 locations->SetInAt(0, Location::RequiresFpuRegister());
3028 locations->SetOut(Location::RequiresRegister());
3029 locations->AddTemp(Location::RequiresFpuRegister());
3030 break;
3031
3032 case DataType::Type::kFloat64:
3033 locations->SetInAt(0, Location::RequiresFpuRegister());
3034 locations->SetOut(Location::RequiresRegister());
3035 locations->AddTemp(Location::RequiresFpuRegister());
3036 break;
3037
3038 default:
3039 LOG(FATAL) << "Unexpected type conversion from " << input_type
3040 << " to " << result_type;
3041 }
3042 break;
3043
3044 case DataType::Type::kInt64:
3045 switch (input_type) {
3046 case DataType::Type::kBool:
3047 case DataType::Type::kUint8:
3048 case DataType::Type::kInt8:
3049 case DataType::Type::kUint16:
3050 case DataType::Type::kInt16:
3051 case DataType::Type::kInt32:
3052 locations->SetInAt(0, Location::RegisterLocation(EAX));
3053 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3054 break;
3055
3056 case DataType::Type::kFloat32:
3057 case DataType::Type::kFloat64: {
3058 InvokeRuntimeCallingConvention calling_convention;
3059 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
3060 locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
3061
3062 // The runtime helper puts the result in EAX, EDX.
3063 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3064 }
3065 break;
3066
3067 default:
3068 LOG(FATAL) << "Unexpected type conversion from " << input_type
3069 << " to " << result_type;
3070 }
3071 break;
3072
3073 case DataType::Type::kFloat32:
3074 switch (input_type) {
3075 case DataType::Type::kBool:
3076 case DataType::Type::kUint8:
3077 case DataType::Type::kInt8:
3078 case DataType::Type::kUint16:
3079 case DataType::Type::kInt16:
3080 case DataType::Type::kInt32:
3081 locations->SetInAt(0, Location::RequiresRegister());
3082 locations->SetOut(Location::RequiresFpuRegister());
3083 break;
3084
3085 case DataType::Type::kInt64:
3086 locations->SetInAt(0, Location::Any());
3087 locations->SetOut(Location::Any());
3088 break;
3089
3090 case DataType::Type::kFloat64:
3091 locations->SetInAt(0, Location::RequiresFpuRegister());
3092 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3093 break;
3094
3095 default:
3096 LOG(FATAL) << "Unexpected type conversion from " << input_type
3097 << " to " << result_type;
3098 }
3099 break;
3100
3101 case DataType::Type::kFloat64:
3102 switch (input_type) {
3103 case DataType::Type::kBool:
3104 case DataType::Type::kUint8:
3105 case DataType::Type::kInt8:
3106 case DataType::Type::kUint16:
3107 case DataType::Type::kInt16:
3108 case DataType::Type::kInt32:
3109 locations->SetInAt(0, Location::RequiresRegister());
3110 locations->SetOut(Location::RequiresFpuRegister());
3111 break;
3112
3113 case DataType::Type::kInt64:
3114 locations->SetInAt(0, Location::Any());
3115 locations->SetOut(Location::Any());
3116 break;
3117
3118 case DataType::Type::kFloat32:
3119 locations->SetInAt(0, Location::RequiresFpuRegister());
3120 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3121 break;
3122
3123 default:
3124 LOG(FATAL) << "Unexpected type conversion from " << input_type
3125 << " to " << result_type;
3126 }
3127 break;
3128
3129 default:
3130 LOG(FATAL) << "Unexpected type conversion from " << input_type
3131 << " to " << result_type;
3132 }
3133 }
3134
VisitTypeConversion(HTypeConversion * conversion)3135 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
3136 LocationSummary* locations = conversion->GetLocations();
3137 Location out = locations->Out();
3138 Location in = locations->InAt(0);
3139 DataType::Type result_type = conversion->GetResultType();
3140 DataType::Type input_type = conversion->GetInputType();
3141 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3142 << input_type << " -> " << result_type;
3143 switch (result_type) {
3144 case DataType::Type::kUint8:
3145 switch (input_type) {
3146 case DataType::Type::kInt8:
3147 case DataType::Type::kUint16:
3148 case DataType::Type::kInt16:
3149 case DataType::Type::kInt32:
3150 if (in.IsRegister()) {
3151 __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3152 } else {
3153 DCHECK(in.GetConstant()->IsIntConstant());
3154 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3155 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3156 }
3157 break;
3158 case DataType::Type::kInt64:
3159 if (in.IsRegisterPair()) {
3160 __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3161 } else {
3162 DCHECK(in.GetConstant()->IsLongConstant());
3163 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3164 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3165 }
3166 break;
3167
3168 default:
3169 LOG(FATAL) << "Unexpected type conversion from " << input_type
3170 << " to " << result_type;
3171 }
3172 break;
3173
3174 case DataType::Type::kInt8:
3175 switch (input_type) {
3176 case DataType::Type::kUint8:
3177 case DataType::Type::kUint16:
3178 case DataType::Type::kInt16:
3179 case DataType::Type::kInt32:
3180 if (in.IsRegister()) {
3181 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3182 } else {
3183 DCHECK(in.GetConstant()->IsIntConstant());
3184 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3185 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3186 }
3187 break;
3188 case DataType::Type::kInt64:
3189 if (in.IsRegisterPair()) {
3190 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3191 } else {
3192 DCHECK(in.GetConstant()->IsLongConstant());
3193 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3194 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3195 }
3196 break;
3197
3198 default:
3199 LOG(FATAL) << "Unexpected type conversion from " << input_type
3200 << " to " << result_type;
3201 }
3202 break;
3203
3204 case DataType::Type::kUint16:
3205 switch (input_type) {
3206 case DataType::Type::kInt8:
3207 case DataType::Type::kInt16:
3208 case DataType::Type::kInt32:
3209 if (in.IsRegister()) {
3210 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3211 } else if (in.IsStackSlot()) {
3212 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3213 } else {
3214 DCHECK(in.GetConstant()->IsIntConstant());
3215 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3216 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3217 }
3218 break;
3219 case DataType::Type::kInt64:
3220 if (in.IsRegisterPair()) {
3221 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3222 } else if (in.IsDoubleStackSlot()) {
3223 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3224 } else {
3225 DCHECK(in.GetConstant()->IsLongConstant());
3226 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3227 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3228 }
3229 break;
3230
3231 default:
3232 LOG(FATAL) << "Unexpected type conversion from " << input_type
3233 << " to " << result_type;
3234 }
3235 break;
3236
3237 case DataType::Type::kInt16:
3238 switch (input_type) {
3239 case DataType::Type::kUint16:
3240 case DataType::Type::kInt32:
3241 if (in.IsRegister()) {
3242 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3243 } else if (in.IsStackSlot()) {
3244 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3245 } else {
3246 DCHECK(in.GetConstant()->IsIntConstant());
3247 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3248 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3249 }
3250 break;
3251 case DataType::Type::kInt64:
3252 if (in.IsRegisterPair()) {
3253 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3254 } else if (in.IsDoubleStackSlot()) {
3255 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3256 } else {
3257 DCHECK(in.GetConstant()->IsLongConstant());
3258 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3259 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3260 }
3261 break;
3262
3263 default:
3264 LOG(FATAL) << "Unexpected type conversion from " << input_type
3265 << " to " << result_type;
3266 }
3267 break;
3268
3269 case DataType::Type::kInt32:
3270 switch (input_type) {
3271 case DataType::Type::kInt64:
3272 if (in.IsRegisterPair()) {
3273 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3274 } else if (in.IsDoubleStackSlot()) {
3275 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3276 } else {
3277 DCHECK(in.IsConstant());
3278 DCHECK(in.GetConstant()->IsLongConstant());
3279 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3280 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
3281 }
3282 break;
3283
3284 case DataType::Type::kFloat32: {
3285 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3286 Register output = out.AsRegister<Register>();
3287 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3288 NearLabel done, nan;
3289
3290 __ movl(output, Immediate(kPrimIntMax));
3291 // temp = int-to-float(output)
3292 __ cvtsi2ss(temp, output);
3293 // if input >= temp goto done
3294 __ comiss(input, temp);
3295 __ j(kAboveEqual, &done);
3296 // if input == NaN goto nan
3297 __ j(kUnordered, &nan);
3298 // output = float-to-int-truncate(input)
3299 __ cvttss2si(output, input);
3300 __ jmp(&done);
3301 __ Bind(&nan);
3302 // output = 0
3303 __ xorl(output, output);
3304 __ Bind(&done);
3305 break;
3306 }
3307
3308 case DataType::Type::kFloat64: {
3309 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3310 Register output = out.AsRegister<Register>();
3311 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3312 NearLabel done, nan;
3313
3314 __ movl(output, Immediate(kPrimIntMax));
3315 // temp = int-to-double(output)
3316 __ cvtsi2sd(temp, output);
3317 // if input >= temp goto done
3318 __ comisd(input, temp);
3319 __ j(kAboveEqual, &done);
3320 // if input == NaN goto nan
3321 __ j(kUnordered, &nan);
3322 // output = double-to-int-truncate(input)
3323 __ cvttsd2si(output, input);
3324 __ jmp(&done);
3325 __ Bind(&nan);
3326 // output = 0
3327 __ xorl(output, output);
3328 __ Bind(&done);
3329 break;
3330 }
3331
3332 default:
3333 LOG(FATAL) << "Unexpected type conversion from " << input_type
3334 << " to " << result_type;
3335 }
3336 break;
3337
3338 case DataType::Type::kInt64:
3339 switch (input_type) {
3340 case DataType::Type::kBool:
3341 case DataType::Type::kUint8:
3342 case DataType::Type::kInt8:
3343 case DataType::Type::kUint16:
3344 case DataType::Type::kInt16:
3345 case DataType::Type::kInt32:
3346 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
3347 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
3348 DCHECK_EQ(in.AsRegister<Register>(), EAX);
3349 __ cdq();
3350 break;
3351
3352 case DataType::Type::kFloat32:
3353 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
3354 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3355 break;
3356
3357 case DataType::Type::kFloat64:
3358 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
3359 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3360 break;
3361
3362 default:
3363 LOG(FATAL) << "Unexpected type conversion from " << input_type
3364 << " to " << result_type;
3365 }
3366 break;
3367
3368 case DataType::Type::kFloat32:
3369 switch (input_type) {
3370 case DataType::Type::kBool:
3371 case DataType::Type::kUint8:
3372 case DataType::Type::kInt8:
3373 case DataType::Type::kUint16:
3374 case DataType::Type::kInt16:
3375 case DataType::Type::kInt32:
3376 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3377 break;
3378
3379 case DataType::Type::kInt64: {
3380 size_t adjustment = 0;
3381
3382 // Create stack space for the call to
3383 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
3384 // TODO: enhance register allocator to ask for stack temporaries.
3385 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
3386 adjustment = DataType::Size(DataType::Type::kInt64);
3387 codegen_->IncreaseFrame(adjustment);
3388 }
3389
3390 // Load the value to the FP stack, using temporaries if needed.
3391 PushOntoFPStack(in, 0, adjustment, false, true);
3392
3393 if (out.IsStackSlot()) {
3394 __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
3395 } else {
3396 __ fstps(Address(ESP, 0));
3397 Location stack_temp = Location::StackSlot(0);
3398 codegen_->Move32(out, stack_temp);
3399 }
3400
3401 // Remove the temporary stack space we allocated.
3402 if (adjustment != 0) {
3403 codegen_->DecreaseFrame(adjustment);
3404 }
3405 break;
3406 }
3407
3408 case DataType::Type::kFloat64:
3409 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3410 break;
3411
3412 default:
3413 LOG(FATAL) << "Unexpected type conversion from " << input_type
3414 << " to " << result_type;
3415 }
3416 break;
3417
3418 case DataType::Type::kFloat64:
3419 switch (input_type) {
3420 case DataType::Type::kBool:
3421 case DataType::Type::kUint8:
3422 case DataType::Type::kInt8:
3423 case DataType::Type::kUint16:
3424 case DataType::Type::kInt16:
3425 case DataType::Type::kInt32:
3426 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3427 break;
3428
3429 case DataType::Type::kInt64: {
3430 size_t adjustment = 0;
3431
3432 // Create stack space for the call to
3433 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3434 // TODO: enhance register allocator to ask for stack temporaries.
3435 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3436 adjustment = DataType::Size(DataType::Type::kInt64);
3437 codegen_->IncreaseFrame(adjustment);
3438 }
3439
3440 // Load the value to the FP stack, using temporaries if needed.
3441 PushOntoFPStack(in, 0, adjustment, false, true);
3442
3443 if (out.IsDoubleStackSlot()) {
3444 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3445 } else {
3446 __ fstpl(Address(ESP, 0));
3447 Location stack_temp = Location::DoubleStackSlot(0);
3448 codegen_->Move64(out, stack_temp);
3449 }
3450
3451 // Remove the temporary stack space we allocated.
3452 if (adjustment != 0) {
3453 codegen_->DecreaseFrame(adjustment);
3454 }
3455 break;
3456 }
3457
3458 case DataType::Type::kFloat32:
3459 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3460 break;
3461
3462 default:
3463 LOG(FATAL) << "Unexpected type conversion from " << input_type
3464 << " to " << result_type;
3465 }
3466 break;
3467
3468 default:
3469 LOG(FATAL) << "Unexpected type conversion from " << input_type
3470 << " to " << result_type;
3471 }
3472 }
3473
VisitAdd(HAdd * add)3474 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3475 LocationSummary* locations =
3476 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3477 switch (add->GetResultType()) {
3478 case DataType::Type::kInt32: {
3479 locations->SetInAt(0, Location::RequiresRegister());
3480 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3481 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3482 break;
3483 }
3484
3485 case DataType::Type::kInt64: {
3486 locations->SetInAt(0, Location::RequiresRegister());
3487 locations->SetInAt(1, Location::Any());
3488 locations->SetOut(Location::SameAsFirstInput());
3489 break;
3490 }
3491
3492 case DataType::Type::kFloat32:
3493 case DataType::Type::kFloat64: {
3494 locations->SetInAt(0, Location::RequiresFpuRegister());
3495 if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3496 DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3497 } else if (add->InputAt(1)->IsConstant()) {
3498 locations->SetInAt(1, Location::RequiresFpuRegister());
3499 } else {
3500 locations->SetInAt(1, Location::Any());
3501 }
3502 locations->SetOut(Location::SameAsFirstInput());
3503 break;
3504 }
3505
3506 default:
3507 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3508 UNREACHABLE();
3509 }
3510 }
3511
VisitAdd(HAdd * add)3512 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3513 LocationSummary* locations = add->GetLocations();
3514 Location first = locations->InAt(0);
3515 Location second = locations->InAt(1);
3516 Location out = locations->Out();
3517
3518 switch (add->GetResultType()) {
3519 case DataType::Type::kInt32: {
3520 if (second.IsRegister()) {
3521 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3522 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3523 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3524 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3525 } else {
3526 __ leal(out.AsRegister<Register>(), Address(
3527 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3528 }
3529 } else if (second.IsConstant()) {
3530 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3531 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3532 __ addl(out.AsRegister<Register>(), Immediate(value));
3533 } else {
3534 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3535 }
3536 } else {
3537 DCHECK(first.Equals(locations->Out()));
3538 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3539 }
3540 break;
3541 }
3542
3543 case DataType::Type::kInt64: {
3544 if (second.IsRegisterPair()) {
3545 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3546 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3547 } else if (second.IsDoubleStackSlot()) {
3548 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3549 __ adcl(first.AsRegisterPairHigh<Register>(),
3550 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3551 } else {
3552 DCHECK(second.IsConstant()) << second;
3553 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3554 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3555 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3556 }
3557 break;
3558 }
3559
3560 case DataType::Type::kFloat32: {
3561 if (second.IsFpuRegister()) {
3562 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3563 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3564 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3565 DCHECK(const_area->IsEmittedAtUseSite());
3566 __ addss(first.AsFpuRegister<XmmRegister>(),
3567 codegen_->LiteralFloatAddress(
3568 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3569 const_area->GetBaseMethodAddress(),
3570 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3571 } else {
3572 DCHECK(second.IsStackSlot());
3573 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3574 }
3575 break;
3576 }
3577
3578 case DataType::Type::kFloat64: {
3579 if (second.IsFpuRegister()) {
3580 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3581 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3582 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3583 DCHECK(const_area->IsEmittedAtUseSite());
3584 __ addsd(first.AsFpuRegister<XmmRegister>(),
3585 codegen_->LiteralDoubleAddress(
3586 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3587 const_area->GetBaseMethodAddress(),
3588 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3589 } else {
3590 DCHECK(second.IsDoubleStackSlot());
3591 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3592 }
3593 break;
3594 }
3595
3596 default:
3597 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3598 }
3599 }
3600
VisitSub(HSub * sub)3601 void LocationsBuilderX86::VisitSub(HSub* sub) {
3602 LocationSummary* locations =
3603 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3604 switch (sub->GetResultType()) {
3605 case DataType::Type::kInt32:
3606 case DataType::Type::kInt64: {
3607 locations->SetInAt(0, Location::RequiresRegister());
3608 locations->SetInAt(1, Location::Any());
3609 locations->SetOut(Location::SameAsFirstInput());
3610 break;
3611 }
3612 case DataType::Type::kFloat32:
3613 case DataType::Type::kFloat64: {
3614 locations->SetInAt(0, Location::RequiresFpuRegister());
3615 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3616 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3617 } else if (sub->InputAt(1)->IsConstant()) {
3618 locations->SetInAt(1, Location::RequiresFpuRegister());
3619 } else {
3620 locations->SetInAt(1, Location::Any());
3621 }
3622 locations->SetOut(Location::SameAsFirstInput());
3623 break;
3624 }
3625
3626 default:
3627 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3628 }
3629 }
3630
VisitSub(HSub * sub)3631 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3632 LocationSummary* locations = sub->GetLocations();
3633 Location first = locations->InAt(0);
3634 Location second = locations->InAt(1);
3635 DCHECK(first.Equals(locations->Out()));
3636 switch (sub->GetResultType()) {
3637 case DataType::Type::kInt32: {
3638 if (second.IsRegister()) {
3639 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3640 } else if (second.IsConstant()) {
3641 __ subl(first.AsRegister<Register>(),
3642 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3643 } else {
3644 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3645 }
3646 break;
3647 }
3648
3649 case DataType::Type::kInt64: {
3650 if (second.IsRegisterPair()) {
3651 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3652 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3653 } else if (second.IsDoubleStackSlot()) {
3654 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3655 __ sbbl(first.AsRegisterPairHigh<Register>(),
3656 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3657 } else {
3658 DCHECK(second.IsConstant()) << second;
3659 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3660 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3661 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3662 }
3663 break;
3664 }
3665
3666 case DataType::Type::kFloat32: {
3667 if (second.IsFpuRegister()) {
3668 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3669 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3670 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3671 DCHECK(const_area->IsEmittedAtUseSite());
3672 __ subss(first.AsFpuRegister<XmmRegister>(),
3673 codegen_->LiteralFloatAddress(
3674 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3675 const_area->GetBaseMethodAddress(),
3676 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3677 } else {
3678 DCHECK(second.IsStackSlot());
3679 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3680 }
3681 break;
3682 }
3683
3684 case DataType::Type::kFloat64: {
3685 if (second.IsFpuRegister()) {
3686 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3687 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3688 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3689 DCHECK(const_area->IsEmittedAtUseSite());
3690 __ subsd(first.AsFpuRegister<XmmRegister>(),
3691 codegen_->LiteralDoubleAddress(
3692 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3693 const_area->GetBaseMethodAddress(),
3694 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3695 } else {
3696 DCHECK(second.IsDoubleStackSlot());
3697 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3698 }
3699 break;
3700 }
3701
3702 default:
3703 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3704 }
3705 }
3706
VisitMul(HMul * mul)3707 void LocationsBuilderX86::VisitMul(HMul* mul) {
3708 LocationSummary* locations =
3709 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3710 switch (mul->GetResultType()) {
3711 case DataType::Type::kInt32:
3712 locations->SetInAt(0, Location::RequiresRegister());
3713 locations->SetInAt(1, Location::Any());
3714 if (mul->InputAt(1)->IsIntConstant()) {
3715 // Can use 3 operand multiply.
3716 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3717 } else {
3718 locations->SetOut(Location::SameAsFirstInput());
3719 }
3720 break;
3721 case DataType::Type::kInt64: {
3722 locations->SetInAt(0, Location::RequiresRegister());
3723 locations->SetInAt(1, Location::Any());
3724 locations->SetOut(Location::SameAsFirstInput());
3725 // Needed for imul on 32bits with 64bits output.
3726 locations->AddTemp(Location::RegisterLocation(EAX));
3727 locations->AddTemp(Location::RegisterLocation(EDX));
3728 break;
3729 }
3730 case DataType::Type::kFloat32:
3731 case DataType::Type::kFloat64: {
3732 locations->SetInAt(0, Location::RequiresFpuRegister());
3733 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3734 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3735 } else if (mul->InputAt(1)->IsConstant()) {
3736 locations->SetInAt(1, Location::RequiresFpuRegister());
3737 } else {
3738 locations->SetInAt(1, Location::Any());
3739 }
3740 locations->SetOut(Location::SameAsFirstInput());
3741 break;
3742 }
3743
3744 default:
3745 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3746 }
3747 }
3748
VisitMul(HMul * mul)3749 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3750 LocationSummary* locations = mul->GetLocations();
3751 Location first = locations->InAt(0);
3752 Location second = locations->InAt(1);
3753 Location out = locations->Out();
3754
3755 switch (mul->GetResultType()) {
3756 case DataType::Type::kInt32:
3757 // The constant may have ended up in a register, so test explicitly to avoid
3758 // problems where the output may not be the same as the first operand.
3759 if (mul->InputAt(1)->IsIntConstant()) {
3760 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3761 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3762 } else if (second.IsRegister()) {
3763 DCHECK(first.Equals(out));
3764 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3765 } else {
3766 DCHECK(second.IsStackSlot());
3767 DCHECK(first.Equals(out));
3768 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3769 }
3770 break;
3771
3772 case DataType::Type::kInt64: {
3773 Register in1_hi = first.AsRegisterPairHigh<Register>();
3774 Register in1_lo = first.AsRegisterPairLow<Register>();
3775 Register eax = locations->GetTemp(0).AsRegister<Register>();
3776 Register edx = locations->GetTemp(1).AsRegister<Register>();
3777
3778 DCHECK_EQ(EAX, eax);
3779 DCHECK_EQ(EDX, edx);
3780
3781 // input: in1 - 64 bits, in2 - 64 bits.
3782 // output: in1
3783 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3784 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3785 // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3786 if (second.IsConstant()) {
3787 DCHECK(second.GetConstant()->IsLongConstant());
3788
3789 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3790 int32_t low_value = Low32Bits(value);
3791 int32_t high_value = High32Bits(value);
3792 Immediate low(low_value);
3793 Immediate high(high_value);
3794
3795 __ movl(eax, high);
3796 // eax <- in1.lo * in2.hi
3797 __ imull(eax, in1_lo);
3798 // in1.hi <- in1.hi * in2.lo
3799 __ imull(in1_hi, low);
3800 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3801 __ addl(in1_hi, eax);
3802 // move in2_lo to eax to prepare for double precision
3803 __ movl(eax, low);
3804 // edx:eax <- in1.lo * in2.lo
3805 __ mull(in1_lo);
3806 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3807 __ addl(in1_hi, edx);
3808 // in1.lo <- (in1.lo * in2.lo)[31:0];
3809 __ movl(in1_lo, eax);
3810 } else if (second.IsRegisterPair()) {
3811 Register in2_hi = second.AsRegisterPairHigh<Register>();
3812 Register in2_lo = second.AsRegisterPairLow<Register>();
3813
3814 __ movl(eax, in2_hi);
3815 // eax <- in1.lo * in2.hi
3816 __ imull(eax, in1_lo);
3817 // in1.hi <- in1.hi * in2.lo
3818 __ imull(in1_hi, in2_lo);
3819 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3820 __ addl(in1_hi, eax);
3821 // move in1_lo to eax to prepare for double precision
3822 __ movl(eax, in1_lo);
3823 // edx:eax <- in1.lo * in2.lo
3824 __ mull(in2_lo);
3825 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3826 __ addl(in1_hi, edx);
3827 // in1.lo <- (in1.lo * in2.lo)[31:0];
3828 __ movl(in1_lo, eax);
3829 } else {
3830 DCHECK(second.IsDoubleStackSlot()) << second;
3831 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3832 Address in2_lo(ESP, second.GetStackIndex());
3833
3834 __ movl(eax, in2_hi);
3835 // eax <- in1.lo * in2.hi
3836 __ imull(eax, in1_lo);
3837 // in1.hi <- in1.hi * in2.lo
3838 __ imull(in1_hi, in2_lo);
3839 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3840 __ addl(in1_hi, eax);
3841 // move in1_lo to eax to prepare for double precision
3842 __ movl(eax, in1_lo);
3843 // edx:eax <- in1.lo * in2.lo
3844 __ mull(in2_lo);
3845 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3846 __ addl(in1_hi, edx);
3847 // in1.lo <- (in1.lo * in2.lo)[31:0];
3848 __ movl(in1_lo, eax);
3849 }
3850
3851 break;
3852 }
3853
3854 case DataType::Type::kFloat32: {
3855 DCHECK(first.Equals(locations->Out()));
3856 if (second.IsFpuRegister()) {
3857 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3858 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3859 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3860 DCHECK(const_area->IsEmittedAtUseSite());
3861 __ mulss(first.AsFpuRegister<XmmRegister>(),
3862 codegen_->LiteralFloatAddress(
3863 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3864 const_area->GetBaseMethodAddress(),
3865 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3866 } else {
3867 DCHECK(second.IsStackSlot());
3868 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3869 }
3870 break;
3871 }
3872
3873 case DataType::Type::kFloat64: {
3874 DCHECK(first.Equals(locations->Out()));
3875 if (second.IsFpuRegister()) {
3876 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3877 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3878 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3879 DCHECK(const_area->IsEmittedAtUseSite());
3880 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3881 codegen_->LiteralDoubleAddress(
3882 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3883 const_area->GetBaseMethodAddress(),
3884 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3885 } else {
3886 DCHECK(second.IsDoubleStackSlot());
3887 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3888 }
3889 break;
3890 }
3891
3892 default:
3893 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3894 }
3895 }
3896
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3897 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3898 uint32_t temp_offset,
3899 uint32_t stack_adjustment,
3900 bool is_fp,
3901 bool is_wide) {
3902 if (source.IsStackSlot()) {
3903 DCHECK(!is_wide);
3904 if (is_fp) {
3905 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3906 } else {
3907 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3908 }
3909 } else if (source.IsDoubleStackSlot()) {
3910 DCHECK(is_wide);
3911 if (is_fp) {
3912 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3913 } else {
3914 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3915 }
3916 } else {
3917 // Write the value to the temporary location on the stack and load to FP stack.
3918 if (!is_wide) {
3919 Location stack_temp = Location::StackSlot(temp_offset);
3920 codegen_->Move32(stack_temp, source);
3921 if (is_fp) {
3922 __ flds(Address(ESP, temp_offset));
3923 } else {
3924 __ filds(Address(ESP, temp_offset));
3925 }
3926 } else {
3927 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3928 codegen_->Move64(stack_temp, source);
3929 if (is_fp) {
3930 __ fldl(Address(ESP, temp_offset));
3931 } else {
3932 __ fildl(Address(ESP, temp_offset));
3933 }
3934 }
3935 }
3936 }
3937
GenerateRemFP(HRem * rem)3938 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3939 DataType::Type type = rem->GetResultType();
3940 bool is_float = type == DataType::Type::kFloat32;
3941 size_t elem_size = DataType::Size(type);
3942 LocationSummary* locations = rem->GetLocations();
3943 Location first = locations->InAt(0);
3944 Location second = locations->InAt(1);
3945 Location out = locations->Out();
3946
3947 // Create stack space for 2 elements.
3948 // TODO: enhance register allocator to ask for stack temporaries.
3949 codegen_->IncreaseFrame(2 * elem_size);
3950
3951 // Load the values to the FP stack in reverse order, using temporaries if needed.
3952 const bool is_wide = !is_float;
3953 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
3954 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
3955
3956 // Loop doing FPREM until we stabilize.
3957 NearLabel retry;
3958 __ Bind(&retry);
3959 __ fprem();
3960
3961 // Move FP status to AX.
3962 __ fstsw();
3963
3964 // And see if the argument reduction is complete. This is signaled by the
3965 // C2 FPU flag bit set to 0.
3966 __ andl(EAX, Immediate(kC2ConditionMask));
3967 __ j(kNotEqual, &retry);
3968
3969 // We have settled on the final value. Retrieve it into an XMM register.
3970 // Store FP top of stack to real stack.
3971 if (is_float) {
3972 __ fsts(Address(ESP, 0));
3973 } else {
3974 __ fstl(Address(ESP, 0));
3975 }
3976
3977 // Pop the 2 items from the FP stack.
3978 __ fucompp();
3979
3980 // Load the value from the stack into an XMM register.
3981 DCHECK(out.IsFpuRegister()) << out;
3982 if (is_float) {
3983 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3984 } else {
3985 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3986 }
3987
3988 // And remove the temporary stack space we allocated.
3989 codegen_->DecreaseFrame(2 * elem_size);
3990 }
3991
3992
DivRemOneOrMinusOne(HBinaryOperation * instruction)3993 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3994 DCHECK(instruction->IsDiv() || instruction->IsRem());
3995
3996 LocationSummary* locations = instruction->GetLocations();
3997 DCHECK(locations->InAt(1).IsConstant());
3998 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3999
4000 Register out_register = locations->Out().AsRegister<Register>();
4001 Register input_register = locations->InAt(0).AsRegister<Register>();
4002 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4003
4004 DCHECK(imm == 1 || imm == -1);
4005
4006 if (instruction->IsRem()) {
4007 __ xorl(out_register, out_register);
4008 } else {
4009 __ movl(out_register, input_register);
4010 if (imm == -1) {
4011 __ negl(out_register);
4012 }
4013 }
4014 }
4015
RemByPowerOfTwo(HRem * instruction)4016 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
4017 LocationSummary* locations = instruction->GetLocations();
4018 Location second = locations->InAt(1);
4019
4020 Register out = locations->Out().AsRegister<Register>();
4021 Register numerator = locations->InAt(0).AsRegister<Register>();
4022
4023 int32_t imm = Int64FromConstant(second.GetConstant());
4024 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4025 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4026
4027 Register tmp = locations->GetTemp(0).AsRegister<Register>();
4028 NearLabel done;
4029 __ movl(out, numerator);
4030 __ andl(out, Immediate(abs_imm-1));
4031 __ j(Condition::kZero, &done);
4032 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4033 __ testl(numerator, numerator);
4034 __ cmovl(Condition::kLess, out, tmp);
4035 __ Bind(&done);
4036 }
4037
DivByPowerOfTwo(HDiv * instruction)4038 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
4039 LocationSummary* locations = instruction->GetLocations();
4040
4041 Register out_register = locations->Out().AsRegister<Register>();
4042 Register input_register = locations->InAt(0).AsRegister<Register>();
4043 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4044 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4045 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4046
4047 Register num = locations->GetTemp(0).AsRegister<Register>();
4048
4049 __ leal(num, Address(input_register, abs_imm - 1));
4050 __ testl(input_register, input_register);
4051 __ cmovl(kGreaterEqual, num, input_register);
4052 int shift = CTZ(imm);
4053 __ sarl(num, Immediate(shift));
4054
4055 if (imm < 0) {
4056 __ negl(num);
4057 }
4058
4059 __ movl(out_register, num);
4060 }
4061
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4062 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4063 DCHECK(instruction->IsDiv() || instruction->IsRem());
4064
4065 LocationSummary* locations = instruction->GetLocations();
4066 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4067
4068 Register eax = locations->InAt(0).AsRegister<Register>();
4069 Register out = locations->Out().AsRegister<Register>();
4070 Register num;
4071 Register edx;
4072
4073 if (instruction->IsDiv()) {
4074 edx = locations->GetTemp(0).AsRegister<Register>();
4075 num = locations->GetTemp(1).AsRegister<Register>();
4076 } else {
4077 edx = locations->Out().AsRegister<Register>();
4078 num = locations->GetTemp(0).AsRegister<Register>();
4079 }
4080
4081 DCHECK_EQ(EAX, eax);
4082 DCHECK_EQ(EDX, edx);
4083 if (instruction->IsDiv()) {
4084 DCHECK_EQ(EAX, out);
4085 } else {
4086 DCHECK_EQ(EDX, out);
4087 }
4088
4089 int64_t magic;
4090 int shift;
4091 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4092
4093 // Save the numerator.
4094 __ movl(num, eax);
4095
4096 // EAX = magic
4097 __ movl(eax, Immediate(magic));
4098
4099 // EDX:EAX = magic * numerator
4100 __ imull(num);
4101
4102 if (imm > 0 && magic < 0) {
4103 // EDX += num
4104 __ addl(edx, num);
4105 } else if (imm < 0 && magic > 0) {
4106 __ subl(edx, num);
4107 }
4108
4109 // Shift if needed.
4110 if (shift != 0) {
4111 __ sarl(edx, Immediate(shift));
4112 }
4113
4114 // EDX += 1 if EDX < 0
4115 __ movl(eax, edx);
4116 __ shrl(edx, Immediate(31));
4117 __ addl(edx, eax);
4118
4119 if (instruction->IsRem()) {
4120 __ movl(eax, num);
4121 __ imull(edx, Immediate(imm));
4122 __ subl(eax, edx);
4123 __ movl(edx, eax);
4124 } else {
4125 __ movl(eax, edx);
4126 }
4127 }
4128
GenerateDivRemIntegral(HBinaryOperation * instruction)4129 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4130 DCHECK(instruction->IsDiv() || instruction->IsRem());
4131
4132 LocationSummary* locations = instruction->GetLocations();
4133 Location out = locations->Out();
4134 Location first = locations->InAt(0);
4135 Location second = locations->InAt(1);
4136 bool is_div = instruction->IsDiv();
4137
4138 switch (instruction->GetResultType()) {
4139 case DataType::Type::kInt32: {
4140 DCHECK_EQ(EAX, first.AsRegister<Register>());
4141 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
4142
4143 if (second.IsConstant()) {
4144 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
4145
4146 if (imm == 0) {
4147 // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
4148 } else if (imm == 1 || imm == -1) {
4149 DivRemOneOrMinusOne(instruction);
4150 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4151 if (is_div) {
4152 DivByPowerOfTwo(instruction->AsDiv());
4153 } else {
4154 RemByPowerOfTwo(instruction->AsRem());
4155 }
4156 } else {
4157 DCHECK(imm <= -2 || imm >= 2);
4158 GenerateDivRemWithAnyConstant(instruction);
4159 }
4160 } else {
4161 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
4162 instruction, out.AsRegister<Register>(), is_div);
4163 codegen_->AddSlowPath(slow_path);
4164
4165 Register second_reg = second.AsRegister<Register>();
4166 // 0x80000000/-1 triggers an arithmetic exception!
4167 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
4168 // it's safe to just use negl instead of more complex comparisons.
4169
4170 __ cmpl(second_reg, Immediate(-1));
4171 __ j(kEqual, slow_path->GetEntryLabel());
4172
4173 // edx:eax <- sign-extended of eax
4174 __ cdq();
4175 // eax = quotient, edx = remainder
4176 __ idivl(second_reg);
4177 __ Bind(slow_path->GetExitLabel());
4178 }
4179 break;
4180 }
4181
4182 case DataType::Type::kInt64: {
4183 InvokeRuntimeCallingConvention calling_convention;
4184 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
4185 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
4186 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
4187 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
4188 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
4189 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
4190
4191 if (is_div) {
4192 codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
4193 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4194 } else {
4195 codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
4196 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4197 }
4198 break;
4199 }
4200
4201 default:
4202 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
4203 }
4204 }
4205
VisitDiv(HDiv * div)4206 void LocationsBuilderX86::VisitDiv(HDiv* div) {
4207 LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
4208 ? LocationSummary::kCallOnMainOnly
4209 : LocationSummary::kNoCall;
4210 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4211
4212 switch (div->GetResultType()) {
4213 case DataType::Type::kInt32: {
4214 locations->SetInAt(0, Location::RegisterLocation(EAX));
4215 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4216 locations->SetOut(Location::SameAsFirstInput());
4217 // Intel uses edx:eax as the dividend.
4218 locations->AddTemp(Location::RegisterLocation(EDX));
4219 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4220 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
4221 // output and request another temp.
4222 if (div->InputAt(1)->IsIntConstant()) {
4223 locations->AddTemp(Location::RequiresRegister());
4224 }
4225 break;
4226 }
4227 case DataType::Type::kInt64: {
4228 InvokeRuntimeCallingConvention calling_convention;
4229 locations->SetInAt(0, Location::RegisterPairLocation(
4230 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4231 locations->SetInAt(1, Location::RegisterPairLocation(
4232 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4233 // Runtime helper puts the result in EAX, EDX.
4234 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4235 break;
4236 }
4237 case DataType::Type::kFloat32:
4238 case DataType::Type::kFloat64: {
4239 locations->SetInAt(0, Location::RequiresFpuRegister());
4240 if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4241 DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
4242 } else if (div->InputAt(1)->IsConstant()) {
4243 locations->SetInAt(1, Location::RequiresFpuRegister());
4244 } else {
4245 locations->SetInAt(1, Location::Any());
4246 }
4247 locations->SetOut(Location::SameAsFirstInput());
4248 break;
4249 }
4250
4251 default:
4252 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4253 }
4254 }
4255
VisitDiv(HDiv * div)4256 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
4257 LocationSummary* locations = div->GetLocations();
4258 Location first = locations->InAt(0);
4259 Location second = locations->InAt(1);
4260
4261 switch (div->GetResultType()) {
4262 case DataType::Type::kInt32:
4263 case DataType::Type::kInt64: {
4264 GenerateDivRemIntegral(div);
4265 break;
4266 }
4267
4268 case DataType::Type::kFloat32: {
4269 if (second.IsFpuRegister()) {
4270 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4271 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4272 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4273 DCHECK(const_area->IsEmittedAtUseSite());
4274 __ divss(first.AsFpuRegister<XmmRegister>(),
4275 codegen_->LiteralFloatAddress(
4276 const_area->GetConstant()->AsFloatConstant()->GetValue(),
4277 const_area->GetBaseMethodAddress(),
4278 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4279 } else {
4280 DCHECK(second.IsStackSlot());
4281 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4282 }
4283 break;
4284 }
4285
4286 case DataType::Type::kFloat64: {
4287 if (second.IsFpuRegister()) {
4288 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4289 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4290 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4291 DCHECK(const_area->IsEmittedAtUseSite());
4292 __ divsd(first.AsFpuRegister<XmmRegister>(),
4293 codegen_->LiteralDoubleAddress(
4294 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
4295 const_area->GetBaseMethodAddress(),
4296 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4297 } else {
4298 DCHECK(second.IsDoubleStackSlot());
4299 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4300 }
4301 break;
4302 }
4303
4304 default:
4305 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4306 }
4307 }
4308
VisitRem(HRem * rem)4309 void LocationsBuilderX86::VisitRem(HRem* rem) {
4310 DataType::Type type = rem->GetResultType();
4311
4312 LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
4313 ? LocationSummary::kCallOnMainOnly
4314 : LocationSummary::kNoCall;
4315 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4316
4317 switch (type) {
4318 case DataType::Type::kInt32: {
4319 locations->SetInAt(0, Location::RegisterLocation(EAX));
4320 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4321 locations->SetOut(Location::RegisterLocation(EDX));
4322 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4323 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
4324 // output and request another temp.
4325 if (rem->InputAt(1)->IsIntConstant()) {
4326 locations->AddTemp(Location::RequiresRegister());
4327 }
4328 break;
4329 }
4330 case DataType::Type::kInt64: {
4331 InvokeRuntimeCallingConvention calling_convention;
4332 locations->SetInAt(0, Location::RegisterPairLocation(
4333 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4334 locations->SetInAt(1, Location::RegisterPairLocation(
4335 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4336 // Runtime helper puts the result in EAX, EDX.
4337 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4338 break;
4339 }
4340 case DataType::Type::kFloat64:
4341 case DataType::Type::kFloat32: {
4342 locations->SetInAt(0, Location::Any());
4343 locations->SetInAt(1, Location::Any());
4344 locations->SetOut(Location::RequiresFpuRegister());
4345 locations->AddTemp(Location::RegisterLocation(EAX));
4346 break;
4347 }
4348
4349 default:
4350 LOG(FATAL) << "Unexpected rem type " << type;
4351 }
4352 }
4353
VisitRem(HRem * rem)4354 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
4355 DataType::Type type = rem->GetResultType();
4356 switch (type) {
4357 case DataType::Type::kInt32:
4358 case DataType::Type::kInt64: {
4359 GenerateDivRemIntegral(rem);
4360 break;
4361 }
4362 case DataType::Type::kFloat32:
4363 case DataType::Type::kFloat64: {
4364 GenerateRemFP(rem);
4365 break;
4366 }
4367 default:
4368 LOG(FATAL) << "Unexpected rem type " << type;
4369 }
4370 }
4371
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4372 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4373 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4374 switch (minmax->GetResultType()) {
4375 case DataType::Type::kInt32:
4376 locations->SetInAt(0, Location::RequiresRegister());
4377 locations->SetInAt(1, Location::RequiresRegister());
4378 locations->SetOut(Location::SameAsFirstInput());
4379 break;
4380 case DataType::Type::kInt64:
4381 locations->SetInAt(0, Location::RequiresRegister());
4382 locations->SetInAt(1, Location::RequiresRegister());
4383 locations->SetOut(Location::SameAsFirstInput());
4384 // Register to use to perform a long subtract to set cc.
4385 locations->AddTemp(Location::RequiresRegister());
4386 break;
4387 case DataType::Type::kFloat32:
4388 locations->SetInAt(0, Location::RequiresFpuRegister());
4389 locations->SetInAt(1, Location::RequiresFpuRegister());
4390 locations->SetOut(Location::SameAsFirstInput());
4391 locations->AddTemp(Location::RequiresRegister());
4392 break;
4393 case DataType::Type::kFloat64:
4394 locations->SetInAt(0, Location::RequiresFpuRegister());
4395 locations->SetInAt(1, Location::RequiresFpuRegister());
4396 locations->SetOut(Location::SameAsFirstInput());
4397 break;
4398 default:
4399 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4400 }
4401 }
4402
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4403 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4404 bool is_min,
4405 DataType::Type type) {
4406 Location op1_loc = locations->InAt(0);
4407 Location op2_loc = locations->InAt(1);
4408
4409 // Shortcut for same input locations.
4410 if (op1_loc.Equals(op2_loc)) {
4411 // Can return immediately, as op1_loc == out_loc.
4412 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4413 // a copy here.
4414 DCHECK(locations->Out().Equals(op1_loc));
4415 return;
4416 }
4417
4418 if (type == DataType::Type::kInt64) {
4419 // Need to perform a subtract to get the sign right.
4420 // op1 is already in the same location as the output.
4421 Location output = locations->Out();
4422 Register output_lo = output.AsRegisterPairLow<Register>();
4423 Register output_hi = output.AsRegisterPairHigh<Register>();
4424
4425 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4426 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4427
4428 // The comparison is performed by subtracting the second operand from
4429 // the first operand and then setting the status flags in the same
4430 // manner as the SUB instruction."
4431 __ cmpl(output_lo, op2_lo);
4432
4433 // Now use a temp and the borrow to finish the subtraction of op2_hi.
4434 Register temp = locations->GetTemp(0).AsRegister<Register>();
4435 __ movl(temp, output_hi);
4436 __ sbbl(temp, op2_hi);
4437
4438 // Now the condition code is correct.
4439 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4440 __ cmovl(cond, output_lo, op2_lo);
4441 __ cmovl(cond, output_hi, op2_hi);
4442 } else {
4443 DCHECK_EQ(type, DataType::Type::kInt32);
4444 Register out = locations->Out().AsRegister<Register>();
4445 Register op2 = op2_loc.AsRegister<Register>();
4446
4447 // (out := op1)
4448 // out <=? op2
4449 // if out is min jmp done
4450 // out := op2
4451 // done:
4452
4453 __ cmpl(out, op2);
4454 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4455 __ cmovl(cond, out, op2);
4456 }
4457 }
4458
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4459 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4460 bool is_min,
4461 DataType::Type type) {
4462 Location op1_loc = locations->InAt(0);
4463 Location op2_loc = locations->InAt(1);
4464 Location out_loc = locations->Out();
4465 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4466
4467 // Shortcut for same input locations.
4468 if (op1_loc.Equals(op2_loc)) {
4469 DCHECK(out_loc.Equals(op1_loc));
4470 return;
4471 }
4472
4473 // (out := op1)
4474 // out <=? op2
4475 // if Nan jmp Nan_label
4476 // if out is min jmp done
4477 // if op2 is min jmp op2_label
4478 // handle -0/+0
4479 // jmp done
4480 // Nan_label:
4481 // out := NaN
4482 // op2_label:
4483 // out := op2
4484 // done:
4485 //
4486 // This removes one jmp, but needs to copy one input (op1) to out.
4487 //
4488 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4489
4490 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4491
4492 NearLabel nan, done, op2_label;
4493 if (type == DataType::Type::kFloat64) {
4494 __ ucomisd(out, op2);
4495 } else {
4496 DCHECK_EQ(type, DataType::Type::kFloat32);
4497 __ ucomiss(out, op2);
4498 }
4499
4500 __ j(Condition::kParityEven, &nan);
4501
4502 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4503 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4504
4505 // Handle 0.0/-0.0.
4506 if (is_min) {
4507 if (type == DataType::Type::kFloat64) {
4508 __ orpd(out, op2);
4509 } else {
4510 __ orps(out, op2);
4511 }
4512 } else {
4513 if (type == DataType::Type::kFloat64) {
4514 __ andpd(out, op2);
4515 } else {
4516 __ andps(out, op2);
4517 }
4518 }
4519 __ jmp(&done);
4520
4521 // NaN handling.
4522 __ Bind(&nan);
4523 if (type == DataType::Type::kFloat64) {
4524 // TODO: Use a constant from the constant table (requires extra input).
4525 __ LoadLongConstant(out, kDoubleNaN);
4526 } else {
4527 Register constant = locations->GetTemp(0).AsRegister<Register>();
4528 __ movl(constant, Immediate(kFloatNaN));
4529 __ movd(out, constant);
4530 }
4531 __ jmp(&done);
4532
4533 // out := op2;
4534 __ Bind(&op2_label);
4535 if (type == DataType::Type::kFloat64) {
4536 __ movsd(out, op2);
4537 } else {
4538 __ movss(out, op2);
4539 }
4540
4541 // Done.
4542 __ Bind(&done);
4543 }
4544
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4545 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4546 DataType::Type type = minmax->GetResultType();
4547 switch (type) {
4548 case DataType::Type::kInt32:
4549 case DataType::Type::kInt64:
4550 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4551 break;
4552 case DataType::Type::kFloat32:
4553 case DataType::Type::kFloat64:
4554 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4555 break;
4556 default:
4557 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4558 }
4559 }
4560
VisitMin(HMin * min)4561 void LocationsBuilderX86::VisitMin(HMin* min) {
4562 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4563 }
4564
VisitMin(HMin * min)4565 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4566 GenerateMinMax(min, /*is_min*/ true);
4567 }
4568
VisitMax(HMax * max)4569 void LocationsBuilderX86::VisitMax(HMax* max) {
4570 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4571 }
4572
VisitMax(HMax * max)4573 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4574 GenerateMinMax(max, /*is_min*/ false);
4575 }
4576
VisitAbs(HAbs * abs)4577 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4578 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4579 switch (abs->GetResultType()) {
4580 case DataType::Type::kInt32:
4581 locations->SetInAt(0, Location::RegisterLocation(EAX));
4582 locations->SetOut(Location::SameAsFirstInput());
4583 locations->AddTemp(Location::RegisterLocation(EDX));
4584 break;
4585 case DataType::Type::kInt64:
4586 locations->SetInAt(0, Location::RequiresRegister());
4587 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4588 locations->AddTemp(Location::RequiresRegister());
4589 break;
4590 case DataType::Type::kFloat32:
4591 locations->SetInAt(0, Location::RequiresFpuRegister());
4592 locations->SetOut(Location::SameAsFirstInput());
4593 locations->AddTemp(Location::RequiresFpuRegister());
4594 locations->AddTemp(Location::RequiresRegister());
4595 break;
4596 case DataType::Type::kFloat64:
4597 locations->SetInAt(0, Location::RequiresFpuRegister());
4598 locations->SetOut(Location::SameAsFirstInput());
4599 locations->AddTemp(Location::RequiresFpuRegister());
4600 break;
4601 default:
4602 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4603 }
4604 }
4605
VisitAbs(HAbs * abs)4606 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4607 LocationSummary* locations = abs->GetLocations();
4608 switch (abs->GetResultType()) {
4609 case DataType::Type::kInt32: {
4610 Register out = locations->Out().AsRegister<Register>();
4611 DCHECK_EQ(out, EAX);
4612 Register temp = locations->GetTemp(0).AsRegister<Register>();
4613 DCHECK_EQ(temp, EDX);
4614 // Sign extend EAX into EDX.
4615 __ cdq();
4616 // XOR EAX with sign.
4617 __ xorl(EAX, EDX);
4618 // Subtract out sign to correct.
4619 __ subl(EAX, EDX);
4620 // The result is in EAX.
4621 break;
4622 }
4623 case DataType::Type::kInt64: {
4624 Location input = locations->InAt(0);
4625 Register input_lo = input.AsRegisterPairLow<Register>();
4626 Register input_hi = input.AsRegisterPairHigh<Register>();
4627 Location output = locations->Out();
4628 Register output_lo = output.AsRegisterPairLow<Register>();
4629 Register output_hi = output.AsRegisterPairHigh<Register>();
4630 Register temp = locations->GetTemp(0).AsRegister<Register>();
4631 // Compute the sign into the temporary.
4632 __ movl(temp, input_hi);
4633 __ sarl(temp, Immediate(31));
4634 // Store the sign into the output.
4635 __ movl(output_lo, temp);
4636 __ movl(output_hi, temp);
4637 // XOR the input to the output.
4638 __ xorl(output_lo, input_lo);
4639 __ xorl(output_hi, input_hi);
4640 // Subtract the sign.
4641 __ subl(output_lo, temp);
4642 __ sbbl(output_hi, temp);
4643 break;
4644 }
4645 case DataType::Type::kFloat32: {
4646 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4647 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4648 Register constant = locations->GetTemp(1).AsRegister<Register>();
4649 __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4650 __ movd(temp, constant);
4651 __ andps(out, temp);
4652 break;
4653 }
4654 case DataType::Type::kFloat64: {
4655 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4656 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4657 // TODO: Use a constant from the constant table (requires extra input).
4658 __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4659 __ andpd(out, temp);
4660 break;
4661 }
4662 default:
4663 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4664 }
4665 }
4666
VisitDivZeroCheck(HDivZeroCheck * instruction)4667 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4668 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4669 switch (instruction->GetType()) {
4670 case DataType::Type::kBool:
4671 case DataType::Type::kUint8:
4672 case DataType::Type::kInt8:
4673 case DataType::Type::kUint16:
4674 case DataType::Type::kInt16:
4675 case DataType::Type::kInt32: {
4676 locations->SetInAt(0, Location::Any());
4677 break;
4678 }
4679 case DataType::Type::kInt64: {
4680 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4681 if (!instruction->IsConstant()) {
4682 locations->AddTemp(Location::RequiresRegister());
4683 }
4684 break;
4685 }
4686 default:
4687 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4688 }
4689 }
4690
VisitDivZeroCheck(HDivZeroCheck * instruction)4691 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4692 SlowPathCode* slow_path =
4693 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4694 codegen_->AddSlowPath(slow_path);
4695
4696 LocationSummary* locations = instruction->GetLocations();
4697 Location value = locations->InAt(0);
4698
4699 switch (instruction->GetType()) {
4700 case DataType::Type::kBool:
4701 case DataType::Type::kUint8:
4702 case DataType::Type::kInt8:
4703 case DataType::Type::kUint16:
4704 case DataType::Type::kInt16:
4705 case DataType::Type::kInt32: {
4706 if (value.IsRegister()) {
4707 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4708 __ j(kEqual, slow_path->GetEntryLabel());
4709 } else if (value.IsStackSlot()) {
4710 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4711 __ j(kEqual, slow_path->GetEntryLabel());
4712 } else {
4713 DCHECK(value.IsConstant()) << value;
4714 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4715 __ jmp(slow_path->GetEntryLabel());
4716 }
4717 }
4718 break;
4719 }
4720 case DataType::Type::kInt64: {
4721 if (value.IsRegisterPair()) {
4722 Register temp = locations->GetTemp(0).AsRegister<Register>();
4723 __ movl(temp, value.AsRegisterPairLow<Register>());
4724 __ orl(temp, value.AsRegisterPairHigh<Register>());
4725 __ j(kEqual, slow_path->GetEntryLabel());
4726 } else {
4727 DCHECK(value.IsConstant()) << value;
4728 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4729 __ jmp(slow_path->GetEntryLabel());
4730 }
4731 }
4732 break;
4733 }
4734 default:
4735 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4736 }
4737 }
4738
HandleShift(HBinaryOperation * op)4739 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4740 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4741
4742 LocationSummary* locations =
4743 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4744
4745 switch (op->GetResultType()) {
4746 case DataType::Type::kInt32:
4747 case DataType::Type::kInt64: {
4748 // Can't have Location::Any() and output SameAsFirstInput()
4749 locations->SetInAt(0, Location::RequiresRegister());
4750 // The shift count needs to be in CL or a constant.
4751 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4752 locations->SetOut(Location::SameAsFirstInput());
4753 break;
4754 }
4755 default:
4756 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4757 }
4758 }
4759
HandleShift(HBinaryOperation * op)4760 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4761 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4762
4763 LocationSummary* locations = op->GetLocations();
4764 Location first = locations->InAt(0);
4765 Location second = locations->InAt(1);
4766 DCHECK(first.Equals(locations->Out()));
4767
4768 switch (op->GetResultType()) {
4769 case DataType::Type::kInt32: {
4770 DCHECK(first.IsRegister());
4771 Register first_reg = first.AsRegister<Register>();
4772 if (second.IsRegister()) {
4773 Register second_reg = second.AsRegister<Register>();
4774 DCHECK_EQ(ECX, second_reg);
4775 if (op->IsShl()) {
4776 __ shll(first_reg, second_reg);
4777 } else if (op->IsShr()) {
4778 __ sarl(first_reg, second_reg);
4779 } else {
4780 __ shrl(first_reg, second_reg);
4781 }
4782 } else {
4783 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4784 if (shift == 0) {
4785 return;
4786 }
4787 Immediate imm(shift);
4788 if (op->IsShl()) {
4789 __ shll(first_reg, imm);
4790 } else if (op->IsShr()) {
4791 __ sarl(first_reg, imm);
4792 } else {
4793 __ shrl(first_reg, imm);
4794 }
4795 }
4796 break;
4797 }
4798 case DataType::Type::kInt64: {
4799 if (second.IsRegister()) {
4800 Register second_reg = second.AsRegister<Register>();
4801 DCHECK_EQ(ECX, second_reg);
4802 if (op->IsShl()) {
4803 GenerateShlLong(first, second_reg);
4804 } else if (op->IsShr()) {
4805 GenerateShrLong(first, second_reg);
4806 } else {
4807 GenerateUShrLong(first, second_reg);
4808 }
4809 } else {
4810 // Shift by a constant.
4811 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4812 // Nothing to do if the shift is 0, as the input is already the output.
4813 if (shift != 0) {
4814 if (op->IsShl()) {
4815 GenerateShlLong(first, shift);
4816 } else if (op->IsShr()) {
4817 GenerateShrLong(first, shift);
4818 } else {
4819 GenerateUShrLong(first, shift);
4820 }
4821 }
4822 }
4823 break;
4824 }
4825 default:
4826 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4827 }
4828 }
4829
GenerateShlLong(const Location & loc,int shift)4830 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4831 Register low = loc.AsRegisterPairLow<Register>();
4832 Register high = loc.AsRegisterPairHigh<Register>();
4833 if (shift == 1) {
4834 // This is just an addition.
4835 __ addl(low, low);
4836 __ adcl(high, high);
4837 } else if (shift == 32) {
4838 // Shift by 32 is easy. High gets low, and low gets 0.
4839 codegen_->EmitParallelMoves(
4840 loc.ToLow(),
4841 loc.ToHigh(),
4842 DataType::Type::kInt32,
4843 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4844 loc.ToLow(),
4845 DataType::Type::kInt32);
4846 } else if (shift > 32) {
4847 // Low part becomes 0. High part is low part << (shift-32).
4848 __ movl(high, low);
4849 __ shll(high, Immediate(shift - 32));
4850 __ xorl(low, low);
4851 } else {
4852 // Between 1 and 31.
4853 __ shld(high, low, Immediate(shift));
4854 __ shll(low, Immediate(shift));
4855 }
4856 }
4857
GenerateShlLong(const Location & loc,Register shifter)4858 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4859 NearLabel done;
4860 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4861 __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4862 __ testl(shifter, Immediate(32));
4863 __ j(kEqual, &done);
4864 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4865 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4866 __ Bind(&done);
4867 }
4868
GenerateShrLong(const Location & loc,int shift)4869 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4870 Register low = loc.AsRegisterPairLow<Register>();
4871 Register high = loc.AsRegisterPairHigh<Register>();
4872 if (shift == 32) {
4873 // Need to copy the sign.
4874 DCHECK_NE(low, high);
4875 __ movl(low, high);
4876 __ sarl(high, Immediate(31));
4877 } else if (shift > 32) {
4878 DCHECK_NE(low, high);
4879 // High part becomes sign. Low part is shifted by shift - 32.
4880 __ movl(low, high);
4881 __ sarl(high, Immediate(31));
4882 __ sarl(low, Immediate(shift - 32));
4883 } else {
4884 // Between 1 and 31.
4885 __ shrd(low, high, Immediate(shift));
4886 __ sarl(high, Immediate(shift));
4887 }
4888 }
4889
GenerateShrLong(const Location & loc,Register shifter)4890 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4891 NearLabel done;
4892 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4893 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4894 __ testl(shifter, Immediate(32));
4895 __ j(kEqual, &done);
4896 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4897 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4898 __ Bind(&done);
4899 }
4900
GenerateUShrLong(const Location & loc,int shift)4901 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4902 Register low = loc.AsRegisterPairLow<Register>();
4903 Register high = loc.AsRegisterPairHigh<Register>();
4904 if (shift == 32) {
4905 // Shift by 32 is easy. Low gets high, and high gets 0.
4906 codegen_->EmitParallelMoves(
4907 loc.ToHigh(),
4908 loc.ToLow(),
4909 DataType::Type::kInt32,
4910 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4911 loc.ToHigh(),
4912 DataType::Type::kInt32);
4913 } else if (shift > 32) {
4914 // Low part is high >> (shift - 32). High part becomes 0.
4915 __ movl(low, high);
4916 __ shrl(low, Immediate(shift - 32));
4917 __ xorl(high, high);
4918 } else {
4919 // Between 1 and 31.
4920 __ shrd(low, high, Immediate(shift));
4921 __ shrl(high, Immediate(shift));
4922 }
4923 }
4924
GenerateUShrLong(const Location & loc,Register shifter)4925 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4926 NearLabel done;
4927 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4928 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4929 __ testl(shifter, Immediate(32));
4930 __ j(kEqual, &done);
4931 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4932 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4933 __ Bind(&done);
4934 }
4935
VisitRor(HRor * ror)4936 void LocationsBuilderX86::VisitRor(HRor* ror) {
4937 LocationSummary* locations =
4938 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4939
4940 switch (ror->GetResultType()) {
4941 case DataType::Type::kInt64:
4942 // Add the temporary needed.
4943 locations->AddTemp(Location::RequiresRegister());
4944 FALLTHROUGH_INTENDED;
4945 case DataType::Type::kInt32:
4946 locations->SetInAt(0, Location::RequiresRegister());
4947 // The shift count needs to be in CL (unless it is a constant).
4948 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4949 locations->SetOut(Location::SameAsFirstInput());
4950 break;
4951 default:
4952 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4953 UNREACHABLE();
4954 }
4955 }
4956
VisitRor(HRor * ror)4957 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4958 LocationSummary* locations = ror->GetLocations();
4959 Location first = locations->InAt(0);
4960 Location second = locations->InAt(1);
4961
4962 if (ror->GetResultType() == DataType::Type::kInt32) {
4963 Register first_reg = first.AsRegister<Register>();
4964 if (second.IsRegister()) {
4965 Register second_reg = second.AsRegister<Register>();
4966 __ rorl(first_reg, second_reg);
4967 } else {
4968 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4969 __ rorl(first_reg, imm);
4970 }
4971 return;
4972 }
4973
4974 DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64);
4975 Register first_reg_lo = first.AsRegisterPairLow<Register>();
4976 Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4977 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4978 if (second.IsRegister()) {
4979 Register second_reg = second.AsRegister<Register>();
4980 DCHECK_EQ(second_reg, ECX);
4981 __ movl(temp_reg, first_reg_hi);
4982 __ shrd(first_reg_hi, first_reg_lo, second_reg);
4983 __ shrd(first_reg_lo, temp_reg, second_reg);
4984 __ movl(temp_reg, first_reg_hi);
4985 __ testl(second_reg, Immediate(32));
4986 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4987 __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4988 } else {
4989 int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4990 if (shift_amt == 0) {
4991 // Already fine.
4992 return;
4993 }
4994 if (shift_amt == 32) {
4995 // Just swap.
4996 __ movl(temp_reg, first_reg_lo);
4997 __ movl(first_reg_lo, first_reg_hi);
4998 __ movl(first_reg_hi, temp_reg);
4999 return;
5000 }
5001
5002 Immediate imm(shift_amt);
5003 // Save the constents of the low value.
5004 __ movl(temp_reg, first_reg_lo);
5005
5006 // Shift right into low, feeding bits from high.
5007 __ shrd(first_reg_lo, first_reg_hi, imm);
5008
5009 // Shift right into high, feeding bits from the original low.
5010 __ shrd(first_reg_hi, temp_reg, imm);
5011
5012 // Swap if needed.
5013 if (shift_amt > 32) {
5014 __ movl(temp_reg, first_reg_lo);
5015 __ movl(first_reg_lo, first_reg_hi);
5016 __ movl(first_reg_hi, temp_reg);
5017 }
5018 }
5019 }
5020
VisitShl(HShl * shl)5021 void LocationsBuilderX86::VisitShl(HShl* shl) {
5022 HandleShift(shl);
5023 }
5024
VisitShl(HShl * shl)5025 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
5026 HandleShift(shl);
5027 }
5028
VisitShr(HShr * shr)5029 void LocationsBuilderX86::VisitShr(HShr* shr) {
5030 HandleShift(shr);
5031 }
5032
VisitShr(HShr * shr)5033 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
5034 HandleShift(shr);
5035 }
5036
VisitUShr(HUShr * ushr)5037 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
5038 HandleShift(ushr);
5039 }
5040
VisitUShr(HUShr * ushr)5041 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
5042 HandleShift(ushr);
5043 }
5044
VisitNewInstance(HNewInstance * instruction)5045 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
5046 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5047 instruction, LocationSummary::kCallOnMainOnly);
5048 locations->SetOut(Location::RegisterLocation(EAX));
5049 InvokeRuntimeCallingConvention calling_convention;
5050 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5051 }
5052
VisitNewInstance(HNewInstance * instruction)5053 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
5054 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5055 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5056 DCHECK(!codegen_->IsLeafMethod());
5057 }
5058
VisitNewArray(HNewArray * instruction)5059 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
5060 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5061 instruction, LocationSummary::kCallOnMainOnly);
5062 locations->SetOut(Location::RegisterLocation(EAX));
5063 InvokeRuntimeCallingConvention calling_convention;
5064 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5065 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5066 }
5067
VisitNewArray(HNewArray * instruction)5068 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
5069 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5070 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5071 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5072 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5073 DCHECK(!codegen_->IsLeafMethod());
5074 }
5075
VisitParameterValue(HParameterValue * instruction)5076 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
5077 LocationSummary* locations =
5078 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5079 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5080 if (location.IsStackSlot()) {
5081 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5082 } else if (location.IsDoubleStackSlot()) {
5083 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5084 }
5085 locations->SetOut(location);
5086 }
5087
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5088 void InstructionCodeGeneratorX86::VisitParameterValue(
5089 HParameterValue* instruction ATTRIBUTE_UNUSED) {
5090 }
5091
VisitCurrentMethod(HCurrentMethod * instruction)5092 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
5093 LocationSummary* locations =
5094 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5095 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5096 }
5097
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5098 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5099 }
5100
VisitClassTableGet(HClassTableGet * instruction)5101 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
5102 LocationSummary* locations =
5103 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5104 locations->SetInAt(0, Location::RequiresRegister());
5105 locations->SetOut(Location::RequiresRegister());
5106 }
5107
VisitClassTableGet(HClassTableGet * instruction)5108 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
5109 LocationSummary* locations = instruction->GetLocations();
5110 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5111 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5112 instruction->GetIndex(), kX86PointerSize).SizeValue();
5113 __ movl(locations->Out().AsRegister<Register>(),
5114 Address(locations->InAt(0).AsRegister<Register>(), method_offset));
5115 } else {
5116 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5117 instruction->GetIndex(), kX86PointerSize));
5118 __ movl(locations->Out().AsRegister<Register>(),
5119 Address(locations->InAt(0).AsRegister<Register>(),
5120 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
5121 // temp = temp->GetImtEntryAt(method_offset);
5122 __ movl(locations->Out().AsRegister<Register>(),
5123 Address(locations->Out().AsRegister<Register>(), method_offset));
5124 }
5125 }
5126
VisitNot(HNot * not_)5127 void LocationsBuilderX86::VisitNot(HNot* not_) {
5128 LocationSummary* locations =
5129 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5130 locations->SetInAt(0, Location::RequiresRegister());
5131 locations->SetOut(Location::SameAsFirstInput());
5132 }
5133
VisitNot(HNot * not_)5134 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
5135 LocationSummary* locations = not_->GetLocations();
5136 Location in = locations->InAt(0);
5137 Location out = locations->Out();
5138 DCHECK(in.Equals(out));
5139 switch (not_->GetResultType()) {
5140 case DataType::Type::kInt32:
5141 __ notl(out.AsRegister<Register>());
5142 break;
5143
5144 case DataType::Type::kInt64:
5145 __ notl(out.AsRegisterPairLow<Register>());
5146 __ notl(out.AsRegisterPairHigh<Register>());
5147 break;
5148
5149 default:
5150 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5151 }
5152 }
5153
VisitBooleanNot(HBooleanNot * bool_not)5154 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
5155 LocationSummary* locations =
5156 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5157 locations->SetInAt(0, Location::RequiresRegister());
5158 locations->SetOut(Location::SameAsFirstInput());
5159 }
5160
VisitBooleanNot(HBooleanNot * bool_not)5161 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
5162 LocationSummary* locations = bool_not->GetLocations();
5163 Location in = locations->InAt(0);
5164 Location out = locations->Out();
5165 DCHECK(in.Equals(out));
5166 __ xorl(out.AsRegister<Register>(), Immediate(1));
5167 }
5168
VisitCompare(HCompare * compare)5169 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
5170 LocationSummary* locations =
5171 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5172 switch (compare->InputAt(0)->GetType()) {
5173 case DataType::Type::kBool:
5174 case DataType::Type::kUint8:
5175 case DataType::Type::kInt8:
5176 case DataType::Type::kUint16:
5177 case DataType::Type::kInt16:
5178 case DataType::Type::kInt32:
5179 case DataType::Type::kInt64: {
5180 locations->SetInAt(0, Location::RequiresRegister());
5181 locations->SetInAt(1, Location::Any());
5182 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5183 break;
5184 }
5185 case DataType::Type::kFloat32:
5186 case DataType::Type::kFloat64: {
5187 locations->SetInAt(0, Location::RequiresFpuRegister());
5188 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
5189 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
5190 } else if (compare->InputAt(1)->IsConstant()) {
5191 locations->SetInAt(1, Location::RequiresFpuRegister());
5192 } else {
5193 locations->SetInAt(1, Location::Any());
5194 }
5195 locations->SetOut(Location::RequiresRegister());
5196 break;
5197 }
5198 default:
5199 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5200 }
5201 }
5202
VisitCompare(HCompare * compare)5203 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
5204 LocationSummary* locations = compare->GetLocations();
5205 Register out = locations->Out().AsRegister<Register>();
5206 Location left = locations->InAt(0);
5207 Location right = locations->InAt(1);
5208
5209 NearLabel less, greater, done;
5210 Condition less_cond = kLess;
5211
5212 switch (compare->InputAt(0)->GetType()) {
5213 case DataType::Type::kBool:
5214 case DataType::Type::kUint8:
5215 case DataType::Type::kInt8:
5216 case DataType::Type::kUint16:
5217 case DataType::Type::kInt16:
5218 case DataType::Type::kInt32: {
5219 codegen_->GenerateIntCompare(left, right);
5220 break;
5221 }
5222 case DataType::Type::kInt64: {
5223 Register left_low = left.AsRegisterPairLow<Register>();
5224 Register left_high = left.AsRegisterPairHigh<Register>();
5225 int32_t val_low = 0;
5226 int32_t val_high = 0;
5227 bool right_is_const = false;
5228
5229 if (right.IsConstant()) {
5230 DCHECK(right.GetConstant()->IsLongConstant());
5231 right_is_const = true;
5232 int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
5233 val_low = Low32Bits(val);
5234 val_high = High32Bits(val);
5235 }
5236
5237 if (right.IsRegisterPair()) {
5238 __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
5239 } else if (right.IsDoubleStackSlot()) {
5240 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
5241 } else {
5242 DCHECK(right_is_const) << right;
5243 codegen_->Compare32BitValue(left_high, val_high);
5244 }
5245 __ j(kLess, &less); // Signed compare.
5246 __ j(kGreater, &greater); // Signed compare.
5247 if (right.IsRegisterPair()) {
5248 __ cmpl(left_low, right.AsRegisterPairLow<Register>());
5249 } else if (right.IsDoubleStackSlot()) {
5250 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
5251 } else {
5252 DCHECK(right_is_const) << right;
5253 codegen_->Compare32BitValue(left_low, val_low);
5254 }
5255 less_cond = kBelow; // for CF (unsigned).
5256 break;
5257 }
5258 case DataType::Type::kFloat32: {
5259 GenerateFPCompare(left, right, compare, false);
5260 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5261 less_cond = kBelow; // for CF (floats).
5262 break;
5263 }
5264 case DataType::Type::kFloat64: {
5265 GenerateFPCompare(left, right, compare, true);
5266 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5267 less_cond = kBelow; // for CF (floats).
5268 break;
5269 }
5270 default:
5271 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5272 }
5273
5274 __ movl(out, Immediate(0));
5275 __ j(kEqual, &done);
5276 __ j(less_cond, &less);
5277
5278 __ Bind(&greater);
5279 __ movl(out, Immediate(1));
5280 __ jmp(&done);
5281
5282 __ Bind(&less);
5283 __ movl(out, Immediate(-1));
5284
5285 __ Bind(&done);
5286 }
5287
VisitPhi(HPhi * instruction)5288 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
5289 LocationSummary* locations =
5290 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5291 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5292 locations->SetInAt(i, Location::Any());
5293 }
5294 locations->SetOut(Location::Any());
5295 }
5296
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5297 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5298 LOG(FATAL) << "Unreachable";
5299 }
5300
GenerateMemoryBarrier(MemBarrierKind kind)5301 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
5302 /*
5303 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
5304 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
5305 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5306 */
5307 switch (kind) {
5308 case MemBarrierKind::kAnyAny: {
5309 MemoryFence();
5310 break;
5311 }
5312 case MemBarrierKind::kAnyStore:
5313 case MemBarrierKind::kLoadAny:
5314 case MemBarrierKind::kStoreStore: {
5315 // nop
5316 break;
5317 }
5318 case MemBarrierKind::kNTStoreStore:
5319 // Non-Temporal Store/Store needs an explicit fence.
5320 MemoryFence(/* non-temporal= */ true);
5321 break;
5322 }
5323 }
5324
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)5325 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
5326 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
5327 ArtMethod* method ATTRIBUTE_UNUSED) {
5328 return desired_dispatch_info;
5329 }
5330
GetInvokeExtraParameter(HInvoke * invoke,Register temp)5331 Register CodeGeneratorX86::GetInvokeExtraParameter(HInvoke* invoke, Register temp) {
5332 if (invoke->IsInvokeStaticOrDirect()) {
5333 return GetInvokeStaticOrDirectExtraParameter(invoke->AsInvokeStaticOrDirect(), temp);
5334 }
5335 DCHECK(invoke->IsInvokeInterface());
5336 Location location =
5337 invoke->GetLocations()->InAt(invoke->AsInvokeInterface()->GetSpecialInputIndex());
5338 return location.AsRegister<Register>();
5339 }
5340
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)5341 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
5342 Register temp) {
5343 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
5344 if (!invoke->GetLocations()->Intrinsified()) {
5345 return location.AsRegister<Register>();
5346 }
5347 // For intrinsics we allow any location, so it may be on the stack.
5348 if (!location.IsRegister()) {
5349 __ movl(temp, Address(ESP, location.GetStackIndex()));
5350 return temp;
5351 }
5352 // For register locations, check if the register was saved. If so, get it from the stack.
5353 // Note: There is a chance that the register was saved but not overwritten, so we could
5354 // save one load. However, since this is just an intrinsic slow path we prefer this
5355 // simple and more robust approach rather that trying to determine if that's the case.
5356 SlowPathCode* slow_path = GetCurrentSlowPath();
5357 DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
5358 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
5359 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
5360 __ movl(temp, Address(ESP, stack_offset));
5361 return temp;
5362 }
5363 return location.AsRegister<Register>();
5364 }
5365
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)5366 void CodeGeneratorX86::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
5367 switch (load_kind) {
5368 case MethodLoadKind::kBootImageLinkTimePcRelative: {
5369 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5370 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5371 __ leal(temp.AsRegister<Register>(),
5372 Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5373 RecordBootImageMethodPatch(invoke);
5374 break;
5375 }
5376 case MethodLoadKind::kBootImageRelRo: {
5377 size_t index = invoke->IsInvokeInterface()
5378 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5379 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5380 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5381 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5382 RecordBootImageRelRoPatch(
5383 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress(),
5384 GetBootImageOffset(invoke));
5385 break;
5386 }
5387 case MethodLoadKind::kBssEntry: {
5388 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5389 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5390 RecordMethodBssEntryPatch(invoke);
5391 // No need for memory fence, thanks to the x86 memory model.
5392 break;
5393 }
5394 case MethodLoadKind::kJitDirectAddress: {
5395 __ movl(temp.AsRegister<Register>(),
5396 Immediate(reinterpret_cast32<uint32_t>(invoke->GetResolvedMethod())));
5397 break;
5398 }
5399 case MethodLoadKind::kRuntimeCall: {
5400 // Test situation, don't do anything.
5401 break;
5402 }
5403 default: {
5404 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
5405 UNREACHABLE();
5406 }
5407 }
5408 }
5409
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)5410 void CodeGeneratorX86::GenerateStaticOrDirectCall(
5411 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
5412 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
5413 switch (invoke->GetMethodLoadKind()) {
5414 case MethodLoadKind::kStringInit: {
5415 // temp = thread->string_init_entrypoint
5416 uint32_t offset =
5417 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
5418 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
5419 break;
5420 }
5421 case MethodLoadKind::kRecursive: {
5422 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
5423 break;
5424 }
5425 case MethodLoadKind::kRuntimeCall: {
5426 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5427 return; // No code pointer retrieval; the runtime performs the call directly.
5428 }
5429 case MethodLoadKind::kBootImageLinkTimePcRelative:
5430 // For kCallCriticalNative we skip loading the method and do the call directly.
5431 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
5432 break;
5433 }
5434 FALLTHROUGH_INTENDED;
5435 default: {
5436 LoadMethod(invoke->GetMethodLoadKind(), callee_method, invoke);
5437 }
5438 }
5439
5440 switch (invoke->GetCodePtrLocation()) {
5441 case CodePtrLocation::kCallSelf:
5442 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
5443 __ call(GetFrameEntryLabel());
5444 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5445 break;
5446 case CodePtrLocation::kCallCriticalNative: {
5447 size_t out_frame_size =
5448 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
5449 kNativeStackAlignment,
5450 GetCriticalNativeDirectCallFrameSize>(invoke);
5451 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
5452 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5453 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5454 __ call(Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5455 RecordBootImageJniEntrypointPatch(invoke);
5456 } else {
5457 // (callee_method + offset_of_jni_entry_point)()
5458 __ call(Address(callee_method.AsRegister<Register>(),
5459 ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
5460 }
5461 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5462 if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
5463 // Create space for conversion.
5464 out_frame_size = 8u;
5465 IncreaseFrame(out_frame_size);
5466 }
5467 // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
5468 switch (invoke->GetType()) {
5469 case DataType::Type::kBool:
5470 __ movzxb(EAX, AL);
5471 break;
5472 case DataType::Type::kInt8:
5473 __ movsxb(EAX, AL);
5474 break;
5475 case DataType::Type::kUint16:
5476 __ movzxw(EAX, EAX);
5477 break;
5478 case DataType::Type::kInt16:
5479 __ movsxw(EAX, EAX);
5480 break;
5481 case DataType::Type::kFloat32:
5482 __ fstps(Address(ESP, 0));
5483 __ movss(XMM0, Address(ESP, 0));
5484 break;
5485 case DataType::Type::kFloat64:
5486 __ fstpl(Address(ESP, 0));
5487 __ movsd(XMM0, Address(ESP, 0));
5488 break;
5489 case DataType::Type::kInt32:
5490 case DataType::Type::kInt64:
5491 case DataType::Type::kVoid:
5492 break;
5493 default:
5494 DCHECK(false) << invoke->GetType();
5495 break;
5496 }
5497 if (out_frame_size != 0u) {
5498 DecreaseFrame(out_frame_size);
5499 }
5500 break;
5501 }
5502 case CodePtrLocation::kCallArtMethod:
5503 // (callee_method + offset_of_quick_compiled_code)()
5504 __ call(Address(callee_method.AsRegister<Register>(),
5505 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5506 kX86PointerSize).Int32Value()));
5507 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5508 break;
5509 }
5510
5511 DCHECK(!IsLeafMethod());
5512 }
5513
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5514 void CodeGeneratorX86::GenerateVirtualCall(
5515 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5516 Register temp = temp_in.AsRegister<Register>();
5517 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5518 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5519
5520 // Use the calling convention instead of the location of the receiver, as
5521 // intrinsics may have put the receiver in a different register. In the intrinsics
5522 // slow path, the arguments have been moved to the right place, so here we are
5523 // guaranteed that the receiver is the first register of the calling convention.
5524 InvokeDexCallingConvention calling_convention;
5525 Register receiver = calling_convention.GetRegisterAt(0);
5526 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5527 // /* HeapReference<Class> */ temp = receiver->klass_
5528 __ movl(temp, Address(receiver, class_offset));
5529 MaybeRecordImplicitNullCheck(invoke);
5530 // Instead of simply (possibly) unpoisoning `temp` here, we should
5531 // emit a read barrier for the previous class reference load.
5532 // However this is not required in practice, as this is an
5533 // intermediate/temporary reference and because the current
5534 // concurrent copying collector keeps the from-space memory
5535 // intact/accessible until the end of the marking phase (the
5536 // concurrent copying collector may not in the future).
5537 __ MaybeUnpoisonHeapReference(temp);
5538
5539 MaybeGenerateInlineCacheCheck(invoke, temp);
5540
5541 // temp = temp->GetMethodAt(method_offset);
5542 __ movl(temp, Address(temp, method_offset));
5543 // call temp->GetEntryPoint();
5544 __ call(Address(
5545 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5546 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5547 }
5548
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5549 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5550 uint32_t intrinsic_data) {
5551 boot_image_other_patches_.emplace_back(
5552 method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5553 __ Bind(&boot_image_other_patches_.back().label);
5554 }
5555
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5556 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5557 uint32_t boot_image_offset) {
5558 boot_image_other_patches_.emplace_back(
5559 method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5560 __ Bind(&boot_image_other_patches_.back().label);
5561 }
5562
RecordBootImageMethodPatch(HInvoke * invoke)5563 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvoke* invoke) {
5564 size_t index = invoke->IsInvokeInterface()
5565 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5566 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5567 HX86ComputeBaseMethodAddress* method_address =
5568 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5569 boot_image_method_patches_.emplace_back(
5570 method_address,
5571 invoke->GetResolvedMethodReference().dex_file,
5572 invoke->GetResolvedMethodReference().index);
5573 __ Bind(&boot_image_method_patches_.back().label);
5574 }
5575
RecordMethodBssEntryPatch(HInvoke * invoke)5576 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvoke* invoke) {
5577 size_t index = invoke->IsInvokeInterface()
5578 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5579 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5580 DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
5581 GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
5582 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
5583 invoke->GetMethodReference().dex_file));
5584 HX86ComputeBaseMethodAddress* method_address =
5585 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5586 // Add the patch entry and bind its label at the end of the instruction.
5587 method_bss_entry_patches_.emplace_back(
5588 method_address,
5589 invoke->GetMethodReference().dex_file,
5590 invoke->GetMethodReference().index);
5591 __ Bind(&method_bss_entry_patches_.back().label);
5592 }
5593
RecordBootImageTypePatch(HLoadClass * load_class)5594 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5595 HX86ComputeBaseMethodAddress* method_address =
5596 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5597 boot_image_type_patches_.emplace_back(
5598 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5599 __ Bind(&boot_image_type_patches_.back().label);
5600 }
5601
NewTypeBssEntryPatch(HLoadClass * load_class)5602 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5603 HX86ComputeBaseMethodAddress* method_address =
5604 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5605 ArenaDeque<X86PcRelativePatchInfo>* patches = nullptr;
5606 switch (load_class->GetLoadKind()) {
5607 case HLoadClass::LoadKind::kBssEntry:
5608 patches = &type_bss_entry_patches_;
5609 break;
5610 case HLoadClass::LoadKind::kBssEntryPublic:
5611 patches = &public_type_bss_entry_patches_;
5612 break;
5613 case HLoadClass::LoadKind::kBssEntryPackage:
5614 patches = &package_type_bss_entry_patches_;
5615 break;
5616 default:
5617 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5618 UNREACHABLE();
5619 }
5620 patches->emplace_back(
5621 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5622 return &patches->back().label;
5623 }
5624
RecordBootImageStringPatch(HLoadString * load_string)5625 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5626 HX86ComputeBaseMethodAddress* method_address =
5627 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5628 boot_image_string_patches_.emplace_back(
5629 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5630 __ Bind(&boot_image_string_patches_.back().label);
5631 }
5632
NewStringBssEntryPatch(HLoadString * load_string)5633 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5634 HX86ComputeBaseMethodAddress* method_address =
5635 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5636 string_bss_entry_patches_.emplace_back(
5637 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5638 return &string_bss_entry_patches_.back().label;
5639 }
5640
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)5641 void CodeGeneratorX86::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
5642 HX86ComputeBaseMethodAddress* method_address =
5643 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5644 boot_image_jni_entrypoint_patches_.emplace_back(
5645 method_address,
5646 invoke->GetResolvedMethodReference().dex_file,
5647 invoke->GetResolvedMethodReference().index);
5648 __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
5649 }
5650
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5651 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5652 uint32_t boot_image_reference,
5653 HInvokeStaticOrDirect* invoke) {
5654 if (GetCompilerOptions().IsBootImage()) {
5655 HX86ComputeBaseMethodAddress* method_address =
5656 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5657 DCHECK(method_address != nullptr);
5658 Register method_address_reg =
5659 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5660 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5661 RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5662 } else if (GetCompilerOptions().GetCompilePic()) {
5663 HX86ComputeBaseMethodAddress* method_address =
5664 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5665 DCHECK(method_address != nullptr);
5666 Register method_address_reg =
5667 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5668 __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5669 RecordBootImageRelRoPatch(method_address, boot_image_reference);
5670 } else {
5671 DCHECK(GetCompilerOptions().IsJitCompiler());
5672 gc::Heap* heap = Runtime::Current()->GetHeap();
5673 DCHECK(!heap->GetBootImageSpaces().empty());
5674 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5675 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5676 }
5677 }
5678
LoadIntrinsicDeclaringClass(Register reg,HInvokeStaticOrDirect * invoke)5679 void CodeGeneratorX86::LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke) {
5680 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5681 if (GetCompilerOptions().IsBootImage()) {
5682 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5683 HX86ComputeBaseMethodAddress* method_address =
5684 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5685 DCHECK(method_address != nullptr);
5686 Register method_address_reg =
5687 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5688 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5689 MethodReference target_method = invoke->GetResolvedMethodReference();
5690 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5691 boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5692 __ Bind(&boot_image_type_patches_.back().label);
5693 } else {
5694 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5695 LoadBootImageAddress(reg, boot_image_offset, invoke);
5696 }
5697 }
5698
5699 // The label points to the end of the "movl" or another instruction but the literal offset
5700 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5701 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5702
5703 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5704 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5705 const ArenaDeque<X86PcRelativePatchInfo>& infos,
5706 ArenaVector<linker::LinkerPatch>* linker_patches) {
5707 for (const X86PcRelativePatchInfo& info : infos) {
5708 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5709 linker_patches->push_back(Factory(literal_offset,
5710 info.target_dex_file,
5711 GetMethodAddressOffset(info.method_address),
5712 info.offset_or_index));
5713 }
5714 }
5715
5716 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5717 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5718 const DexFile* target_dex_file,
5719 uint32_t pc_insn_offset,
5720 uint32_t boot_image_offset) {
5721 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
5722 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5723 }
5724
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5725 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5726 DCHECK(linker_patches->empty());
5727 size_t size =
5728 boot_image_method_patches_.size() +
5729 method_bss_entry_patches_.size() +
5730 boot_image_type_patches_.size() +
5731 type_bss_entry_patches_.size() +
5732 public_type_bss_entry_patches_.size() +
5733 package_type_bss_entry_patches_.size() +
5734 boot_image_string_patches_.size() +
5735 string_bss_entry_patches_.size() +
5736 boot_image_jni_entrypoint_patches_.size() +
5737 boot_image_other_patches_.size();
5738 linker_patches->reserve(size);
5739 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5740 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5741 boot_image_method_patches_, linker_patches);
5742 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5743 boot_image_type_patches_, linker_patches);
5744 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5745 boot_image_string_patches_, linker_patches);
5746 } else {
5747 DCHECK(boot_image_method_patches_.empty());
5748 DCHECK(boot_image_type_patches_.empty());
5749 DCHECK(boot_image_string_patches_.empty());
5750 }
5751 if (GetCompilerOptions().IsBootImage()) {
5752 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5753 boot_image_other_patches_, linker_patches);
5754 } else {
5755 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
5756 boot_image_other_patches_, linker_patches);
5757 }
5758 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5759 method_bss_entry_patches_, linker_patches);
5760 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5761 type_bss_entry_patches_, linker_patches);
5762 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5763 public_type_bss_entry_patches_, linker_patches);
5764 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5765 package_type_bss_entry_patches_, linker_patches);
5766 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5767 string_bss_entry_patches_, linker_patches);
5768 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5769 boot_image_jni_entrypoint_patches_, linker_patches);
5770 DCHECK_EQ(size, linker_patches->size());
5771 }
5772
MarkGCCard(Register temp,Register card,Register object,Register value,bool emit_null_check)5773 void CodeGeneratorX86::MarkGCCard(
5774 Register temp, Register card, Register object, Register value, bool emit_null_check) {
5775 NearLabel is_null;
5776 if (emit_null_check) {
5777 __ testl(value, value);
5778 __ j(kEqual, &is_null);
5779 }
5780 // Load the address of the card table into `card`.
5781 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5782 // Calculate the offset (in the card table) of the card corresponding to
5783 // `object`.
5784 __ movl(temp, object);
5785 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5786 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5787 // `object`'s card.
5788 //
5789 // Register `card` contains the address of the card table. Note that the card
5790 // table's base is biased during its creation so that it always starts at an
5791 // address whose least-significant byte is equal to `kCardDirty` (see
5792 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5793 // below writes the `kCardDirty` (byte) value into the `object`'s card
5794 // (located at `card + object >> kCardShift`).
5795 //
5796 // This dual use of the value in register `card` (1. to calculate the location
5797 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5798 // (no need to explicitly load `kCardDirty` as an immediate value).
5799 __ movb(Address(temp, card, TIMES_1, 0),
5800 X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
5801 if (emit_null_check) {
5802 __ Bind(&is_null);
5803 }
5804 }
5805
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5806 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
5807 DCHECK(instruction->IsInstanceFieldGet() ||
5808 instruction->IsStaticFieldGet() ||
5809 instruction->IsPredicatedInstanceFieldGet());
5810
5811 bool object_field_get_with_read_barrier =
5812 gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5813 bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
5814 LocationSummary* locations =
5815 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5816 gUseReadBarrier
5817 ? LocationSummary::kCallOnSlowPath
5818 : LocationSummary::kNoCall);
5819 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5820 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5821 }
5822 // receiver_input
5823 locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
5824 if (is_predicated) {
5825 if (DataType::IsFloatingPointType(instruction->GetType())) {
5826 locations->SetInAt(0, Location::RequiresFpuRegister());
5827 } else {
5828 locations->SetInAt(0, Location::RequiresRegister());
5829 }
5830 }
5831 if (DataType::IsFloatingPointType(instruction->GetType())) {
5832 locations->SetOut(is_predicated ? Location::SameAsFirstInput()
5833 : Location::RequiresFpuRegister());
5834 } else {
5835 // The output overlaps in case of long: we don't want the low move
5836 // to overwrite the object's location. Likewise, in the case of
5837 // an object field get with read barriers enabled, we do not want
5838 // the move to overwrite the object's location, as we need it to emit
5839 // the read barrier.
5840 locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(),
5841 (object_field_get_with_read_barrier ||
5842 instruction->GetType() == DataType::Type::kInt64 ||
5843 is_predicated)
5844 ? Location::kOutputOverlap
5845 : Location::kNoOutputOverlap);
5846 }
5847
5848 if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
5849 // Long values can be loaded atomically into an XMM using movsd.
5850 // So we use an XMM register as a temp to achieve atomicity (first
5851 // load the temp into the XMM and then copy the XMM into the
5852 // output, 32 bits at a time).
5853 locations->AddTemp(Location::RequiresFpuRegister());
5854 }
5855 }
5856
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5857 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
5858 const FieldInfo& field_info) {
5859 DCHECK(instruction->IsInstanceFieldGet() ||
5860 instruction->IsStaticFieldGet() ||
5861 instruction->IsPredicatedInstanceFieldGet());
5862
5863 LocationSummary* locations = instruction->GetLocations();
5864 Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
5865 Register base = base_loc.AsRegister<Register>();
5866 Location out = locations->Out();
5867 bool is_volatile = field_info.IsVolatile();
5868 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5869 DataType::Type load_type = instruction->GetType();
5870 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5871
5872 if (load_type == DataType::Type::kReference) {
5873 // /* HeapReference<Object> */ out = *(base + offset)
5874 if (gUseReadBarrier && kUseBakerReadBarrier) {
5875 // Note that a potential implicit null check is handled in this
5876 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
5877 codegen_->GenerateFieldLoadWithBakerReadBarrier(
5878 instruction, out, base, offset, /* needs_null_check= */ true);
5879 if (is_volatile) {
5880 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5881 }
5882 } else {
5883 __ movl(out.AsRegister<Register>(), Address(base, offset));
5884 codegen_->MaybeRecordImplicitNullCheck(instruction);
5885 if (is_volatile) {
5886 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5887 }
5888 // If read barriers are enabled, emit read barriers other than
5889 // Baker's using a slow path (and also unpoison the loaded
5890 // reference, if heap poisoning is enabled).
5891 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5892 }
5893 } else {
5894 Address src(base, offset);
5895 XmmRegister temp = (load_type == DataType::Type::kInt64 && is_volatile)
5896 ? locations->GetTemp(0).AsFpuRegister<XmmRegister>()
5897 : kNoXmmRegister;
5898 codegen_->LoadFromMemoryNoBarrier(load_type, out, src, instruction, temp, is_volatile);
5899 if (is_volatile) {
5900 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5901 }
5902 }
5903 }
5904
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5905 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction,
5906 const FieldInfo& field_info,
5907 WriteBarrierKind write_barrier_kind) {
5908 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5909
5910 LocationSummary* locations =
5911 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5912 locations->SetInAt(0, Location::RequiresRegister());
5913 bool is_volatile = field_info.IsVolatile();
5914 DataType::Type field_type = field_info.GetFieldType();
5915 bool is_byte_type = DataType::Size(field_type) == 1u;
5916
5917 // The register allocator does not support multiple
5918 // inputs that die at entry with one in a specific register.
5919 if (is_byte_type) {
5920 // Ensure the value is in a byte register.
5921 locations->SetInAt(1, Location::RegisterLocation(EAX));
5922 } else if (DataType::IsFloatingPointType(field_type)) {
5923 if (is_volatile && field_type == DataType::Type::kFloat64) {
5924 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5925 locations->SetInAt(1, Location::RequiresFpuRegister());
5926 } else {
5927 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5928 }
5929 } else if (is_volatile && field_type == DataType::Type::kInt64) {
5930 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5931 locations->SetInAt(1, Location::RequiresRegister());
5932
5933 // 64bits value can be atomically written to an address with movsd and an XMM register.
5934 // We need two XMM registers because there's no easier way to (bit) copy a register pair
5935 // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
5936 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
5937 // isolated cases when we need this it isn't worth adding the extra complexity.
5938 locations->AddTemp(Location::RequiresFpuRegister());
5939 locations->AddTemp(Location::RequiresFpuRegister());
5940 } else {
5941 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5942
5943 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5944 if (write_barrier_kind != WriteBarrierKind::kDontEmit) {
5945 locations->AddTemp(Location::RequiresRegister());
5946 // Ensure the card is in a byte register.
5947 locations->AddTemp(Location::RegisterLocation(ECX));
5948 } else if (kPoisonHeapReferences) {
5949 locations->AddTemp(Location::RequiresRegister());
5950 }
5951 }
5952 }
5953 }
5954
HandleFieldSet(HInstruction * instruction,uint32_t value_index,DataType::Type field_type,Address field_addr,Register base,bool is_volatile,bool value_can_be_null,WriteBarrierKind write_barrier_kind)5955 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
5956 uint32_t value_index,
5957 DataType::Type field_type,
5958 Address field_addr,
5959 Register base,
5960 bool is_volatile,
5961 bool value_can_be_null,
5962 WriteBarrierKind write_barrier_kind) {
5963 LocationSummary* locations = instruction->GetLocations();
5964 Location value = locations->InAt(value_index);
5965 bool needs_write_barrier =
5966 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index));
5967
5968 if (is_volatile) {
5969 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5970 }
5971
5972 bool maybe_record_implicit_null_check_done = false;
5973
5974 switch (field_type) {
5975 case DataType::Type::kBool:
5976 case DataType::Type::kUint8:
5977 case DataType::Type::kInt8: {
5978 if (value.IsConstant()) {
5979 __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5980 } else {
5981 __ movb(field_addr, value.AsRegister<ByteRegister>());
5982 }
5983 break;
5984 }
5985
5986 case DataType::Type::kUint16:
5987 case DataType::Type::kInt16: {
5988 if (value.IsConstant()) {
5989 __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5990 } else {
5991 __ movw(field_addr, value.AsRegister<Register>());
5992 }
5993 break;
5994 }
5995
5996 case DataType::Type::kInt32:
5997 case DataType::Type::kReference: {
5998 if (kPoisonHeapReferences && needs_write_barrier) {
5999 // Note that in the case where `value` is a null reference,
6000 // we do not enter this block, as the reference does not
6001 // need poisoning.
6002 DCHECK_EQ(field_type, DataType::Type::kReference);
6003 Register temp = locations->GetTemp(0).AsRegister<Register>();
6004 __ movl(temp, value.AsRegister<Register>());
6005 __ PoisonHeapReference(temp);
6006 __ movl(field_addr, temp);
6007 } else if (value.IsConstant()) {
6008 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6009 __ movl(field_addr, Immediate(v));
6010 } else {
6011 DCHECK(value.IsRegister()) << value;
6012 __ movl(field_addr, value.AsRegister<Register>());
6013 }
6014 break;
6015 }
6016
6017 case DataType::Type::kInt64: {
6018 if (is_volatile) {
6019 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
6020 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
6021 __ movd(temp1, value.AsRegisterPairLow<Register>());
6022 __ movd(temp2, value.AsRegisterPairHigh<Register>());
6023 __ punpckldq(temp1, temp2);
6024 __ movsd(field_addr, temp1);
6025 codegen_->MaybeRecordImplicitNullCheck(instruction);
6026 } else if (value.IsConstant()) {
6027 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
6028 __ movl(field_addr, Immediate(Low32Bits(v)));
6029 codegen_->MaybeRecordImplicitNullCheck(instruction);
6030 __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
6031 } else {
6032 __ movl(field_addr, value.AsRegisterPairLow<Register>());
6033 codegen_->MaybeRecordImplicitNullCheck(instruction);
6034 __ movl(Address::displace(field_addr, kX86WordSize), value.AsRegisterPairHigh<Register>());
6035 }
6036 maybe_record_implicit_null_check_done = true;
6037 break;
6038 }
6039
6040 case DataType::Type::kFloat32: {
6041 if (value.IsConstant()) {
6042 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6043 __ movl(field_addr, Immediate(v));
6044 } else {
6045 __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
6046 }
6047 break;
6048 }
6049
6050 case DataType::Type::kFloat64: {
6051 if (value.IsConstant()) {
6052 DCHECK(!is_volatile);
6053 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
6054 __ movl(field_addr, Immediate(Low32Bits(v)));
6055 codegen_->MaybeRecordImplicitNullCheck(instruction);
6056 __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
6057 maybe_record_implicit_null_check_done = true;
6058 } else {
6059 __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
6060 }
6061 break;
6062 }
6063
6064 case DataType::Type::kUint32:
6065 case DataType::Type::kUint64:
6066 case DataType::Type::kVoid:
6067 LOG(FATAL) << "Unreachable type " << field_type;
6068 UNREACHABLE();
6069 }
6070
6071 if (!maybe_record_implicit_null_check_done) {
6072 codegen_->MaybeRecordImplicitNullCheck(instruction);
6073 }
6074
6075 if (needs_write_barrier && write_barrier_kind != WriteBarrierKind::kDontEmit) {
6076 Register temp = locations->GetTemp(0).AsRegister<Register>();
6077 Register card = locations->GetTemp(1).AsRegister<Register>();
6078 codegen_->MarkGCCard(
6079 temp,
6080 card,
6081 base,
6082 value.AsRegister<Register>(),
6083 value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
6084 }
6085
6086 if (is_volatile) {
6087 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
6088 }
6089 }
6090
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6091 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
6092 const FieldInfo& field_info,
6093 bool value_can_be_null,
6094 WriteBarrierKind write_barrier_kind) {
6095 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6096
6097 LocationSummary* locations = instruction->GetLocations();
6098 Register base = locations->InAt(0).AsRegister<Register>();
6099 bool is_volatile = field_info.IsVolatile();
6100 DataType::Type field_type = field_info.GetFieldType();
6101 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6102 bool is_predicated =
6103 instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
6104
6105 Address field_addr(base, offset);
6106
6107 NearLabel pred_is_null;
6108 if (is_predicated) {
6109 __ testl(base, base);
6110 __ j(kEqual, &pred_is_null);
6111 }
6112
6113 HandleFieldSet(instruction,
6114 /* value_index= */ 1,
6115 field_type,
6116 field_addr,
6117 base,
6118 is_volatile,
6119 value_can_be_null,
6120 write_barrier_kind);
6121
6122 if (is_predicated) {
6123 __ Bind(&pred_is_null);
6124 }
6125 }
6126
VisitStaticFieldGet(HStaticFieldGet * instruction)6127 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6128 HandleFieldGet(instruction, instruction->GetFieldInfo());
6129 }
6130
VisitStaticFieldGet(HStaticFieldGet * instruction)6131 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6132 HandleFieldGet(instruction, instruction->GetFieldInfo());
6133 }
6134
VisitStaticFieldSet(HStaticFieldSet * instruction)6135 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6136 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6137 }
6138
VisitStaticFieldSet(HStaticFieldSet * instruction)6139 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6140 HandleFieldSet(instruction,
6141 instruction->GetFieldInfo(),
6142 instruction->GetValueCanBeNull(),
6143 instruction->GetWriteBarrierKind());
6144 }
6145
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6146 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6147 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6148 }
6149
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6150 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6151 HandleFieldSet(instruction,
6152 instruction->GetFieldInfo(),
6153 instruction->GetValueCanBeNull(),
6154 instruction->GetWriteBarrierKind());
6155 }
6156
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6157 void LocationsBuilderX86::VisitPredicatedInstanceFieldGet(
6158 HPredicatedInstanceFieldGet* instruction) {
6159 HandleFieldGet(instruction, instruction->GetFieldInfo());
6160 }
6161
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6162 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6163 HandleFieldGet(instruction, instruction->GetFieldInfo());
6164 }
6165
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6166 void InstructionCodeGeneratorX86::VisitPredicatedInstanceFieldGet(
6167 HPredicatedInstanceFieldGet* instruction) {
6168 NearLabel finish;
6169 LocationSummary* locations = instruction->GetLocations();
6170 Register recv = locations->InAt(1).AsRegister<Register>();
6171 __ testl(recv, recv);
6172 __ j(kZero, &finish);
6173 HandleFieldGet(instruction, instruction->GetFieldInfo());
6174 __ Bind(&finish);
6175 }
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6176 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6177 HandleFieldGet(instruction, instruction->GetFieldInfo());
6178 }
6179
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6180 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6181 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
6182 }
6183
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6184 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6185 __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
6186 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6187 }
6188
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6189 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
6190 HUnresolvedInstanceFieldGet* instruction) {
6191 FieldAccessCallingConventionX86 calling_convention;
6192 codegen_->CreateUnresolvedFieldLocationSummary(
6193 instruction, instruction->GetFieldType(), calling_convention);
6194 }
6195
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6196 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
6197 HUnresolvedInstanceFieldGet* instruction) {
6198 FieldAccessCallingConventionX86 calling_convention;
6199 codegen_->GenerateUnresolvedFieldAccess(instruction,
6200 instruction->GetFieldType(),
6201 instruction->GetFieldIndex(),
6202 instruction->GetDexPc(),
6203 calling_convention);
6204 }
6205
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6206 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
6207 HUnresolvedInstanceFieldSet* instruction) {
6208 FieldAccessCallingConventionX86 calling_convention;
6209 codegen_->CreateUnresolvedFieldLocationSummary(
6210 instruction, instruction->GetFieldType(), calling_convention);
6211 }
6212
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6213 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
6214 HUnresolvedInstanceFieldSet* instruction) {
6215 FieldAccessCallingConventionX86 calling_convention;
6216 codegen_->GenerateUnresolvedFieldAccess(instruction,
6217 instruction->GetFieldType(),
6218 instruction->GetFieldIndex(),
6219 instruction->GetDexPc(),
6220 calling_convention);
6221 }
6222
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6223 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
6224 HUnresolvedStaticFieldGet* instruction) {
6225 FieldAccessCallingConventionX86 calling_convention;
6226 codegen_->CreateUnresolvedFieldLocationSummary(
6227 instruction, instruction->GetFieldType(), calling_convention);
6228 }
6229
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6230 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
6231 HUnresolvedStaticFieldGet* instruction) {
6232 FieldAccessCallingConventionX86 calling_convention;
6233 codegen_->GenerateUnresolvedFieldAccess(instruction,
6234 instruction->GetFieldType(),
6235 instruction->GetFieldIndex(),
6236 instruction->GetDexPc(),
6237 calling_convention);
6238 }
6239
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6240 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
6241 HUnresolvedStaticFieldSet* instruction) {
6242 FieldAccessCallingConventionX86 calling_convention;
6243 codegen_->CreateUnresolvedFieldLocationSummary(
6244 instruction, instruction->GetFieldType(), calling_convention);
6245 }
6246
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6247 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
6248 HUnresolvedStaticFieldSet* instruction) {
6249 FieldAccessCallingConventionX86 calling_convention;
6250 codegen_->GenerateUnresolvedFieldAccess(instruction,
6251 instruction->GetFieldType(),
6252 instruction->GetFieldIndex(),
6253 instruction->GetDexPc(),
6254 calling_convention);
6255 }
6256
VisitNullCheck(HNullCheck * instruction)6257 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
6258 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6259 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
6260 ? Location::RequiresRegister()
6261 : Location::Any();
6262 locations->SetInAt(0, loc);
6263 }
6264
GenerateImplicitNullCheck(HNullCheck * instruction)6265 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
6266 if (CanMoveNullCheckToUser(instruction)) {
6267 return;
6268 }
6269 LocationSummary* locations = instruction->GetLocations();
6270 Location obj = locations->InAt(0);
6271
6272 __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
6273 RecordPcInfo(instruction, instruction->GetDexPc());
6274 }
6275
GenerateExplicitNullCheck(HNullCheck * instruction)6276 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
6277 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
6278 AddSlowPath(slow_path);
6279
6280 LocationSummary* locations = instruction->GetLocations();
6281 Location obj = locations->InAt(0);
6282
6283 if (obj.IsRegister()) {
6284 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
6285 } else if (obj.IsStackSlot()) {
6286 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
6287 } else {
6288 DCHECK(obj.IsConstant()) << obj;
6289 DCHECK(obj.GetConstant()->IsNullConstant());
6290 __ jmp(slow_path->GetEntryLabel());
6291 return;
6292 }
6293 __ j(kEqual, slow_path->GetEntryLabel());
6294 }
6295
VisitNullCheck(HNullCheck * instruction)6296 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
6297 codegen_->GenerateNullCheck(instruction);
6298 }
6299
VisitArrayGet(HArrayGet * instruction)6300 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
6301 bool object_array_get_with_read_barrier =
6302 gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
6303 LocationSummary* locations =
6304 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6305 object_array_get_with_read_barrier
6306 ? LocationSummary::kCallOnSlowPath
6307 : LocationSummary::kNoCall);
6308 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6309 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6310 }
6311 locations->SetInAt(0, Location::RequiresRegister());
6312 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6313 if (DataType::IsFloatingPointType(instruction->GetType())) {
6314 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6315 } else {
6316 // The output overlaps in case of long: we don't want the low move
6317 // to overwrite the array's location. Likewise, in the case of an
6318 // object array get with read barriers enabled, we do not want the
6319 // move to overwrite the array's location, as we need it to emit
6320 // the read barrier.
6321 locations->SetOut(
6322 Location::RequiresRegister(),
6323 (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
6324 ? Location::kOutputOverlap
6325 : Location::kNoOutputOverlap);
6326 }
6327 }
6328
VisitArrayGet(HArrayGet * instruction)6329 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
6330 LocationSummary* locations = instruction->GetLocations();
6331 Location obj_loc = locations->InAt(0);
6332 Register obj = obj_loc.AsRegister<Register>();
6333 Location index = locations->InAt(1);
6334 Location out_loc = locations->Out();
6335 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6336
6337 DataType::Type type = instruction->GetType();
6338 if (type == DataType::Type::kReference) {
6339 static_assert(
6340 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6341 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6342 // /* HeapReference<Object> */ out =
6343 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6344 if (gUseReadBarrier && kUseBakerReadBarrier) {
6345 // Note that a potential implicit null check is handled in this
6346 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
6347 codegen_->GenerateArrayLoadWithBakerReadBarrier(
6348 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
6349 } else {
6350 Register out = out_loc.AsRegister<Register>();
6351 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6352 codegen_->MaybeRecordImplicitNullCheck(instruction);
6353 // If read barriers are enabled, emit read barriers other than
6354 // Baker's using a slow path (and also unpoison the loaded
6355 // reference, if heap poisoning is enabled).
6356 if (index.IsConstant()) {
6357 uint32_t offset =
6358 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
6359 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6360 } else {
6361 codegen_->MaybeGenerateReadBarrierSlow(
6362 instruction, out_loc, out_loc, obj_loc, data_offset, index);
6363 }
6364 }
6365 } else if (type == DataType::Type::kUint16
6366 && mirror::kUseStringCompression
6367 && instruction->IsStringCharAt()) {
6368 // Branch cases into compressed and uncompressed for each index's type.
6369 Register out = out_loc.AsRegister<Register>();
6370 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6371 NearLabel done, not_compressed;
6372 __ testb(Address(obj, count_offset), Immediate(1));
6373 codegen_->MaybeRecordImplicitNullCheck(instruction);
6374 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6375 "Expecting 0=compressed, 1=uncompressed");
6376 __ j(kNotZero, ¬_compressed);
6377 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6378 __ jmp(&done);
6379 __ Bind(¬_compressed);
6380 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6381 __ Bind(&done);
6382 } else {
6383 ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
6384 Address src = CodeGeneratorX86::ArrayAddress(obj, index, scale, data_offset);
6385 codegen_->LoadFromMemoryNoBarrier(type, out_loc, src, instruction);
6386 }
6387 }
6388
VisitArraySet(HArraySet * instruction)6389 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
6390 DataType::Type value_type = instruction->GetComponentType();
6391
6392 bool needs_write_barrier =
6393 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6394 bool needs_type_check = instruction->NeedsTypeCheck();
6395
6396 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6397 instruction,
6398 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6399
6400 bool is_byte_type = DataType::Size(value_type) == 1u;
6401 // We need the inputs to be different than the output in case of long operation.
6402 // In case of a byte operation, the register allocator does not support multiple
6403 // inputs that die at entry with one in a specific register.
6404 locations->SetInAt(0, Location::RequiresRegister());
6405 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6406 if (is_byte_type) {
6407 // Ensure the value is in a byte register.
6408 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
6409 } else if (DataType::IsFloatingPointType(value_type)) {
6410 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
6411 } else {
6412 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
6413 }
6414 if (needs_write_barrier) {
6415 // Used by reference poisoning or emitting write barrier.
6416 locations->AddTemp(Location::RequiresRegister());
6417 if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
6418 // Only used when emitting a write barrier. Ensure the card is in a byte register.
6419 locations->AddTemp(Location::RegisterLocation(ECX));
6420 }
6421 }
6422 }
6423
VisitArraySet(HArraySet * instruction)6424 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
6425 LocationSummary* locations = instruction->GetLocations();
6426 Location array_loc = locations->InAt(0);
6427 Register array = array_loc.AsRegister<Register>();
6428 Location index = locations->InAt(1);
6429 Location value = locations->InAt(2);
6430 DataType::Type value_type = instruction->GetComponentType();
6431 bool needs_type_check = instruction->NeedsTypeCheck();
6432 bool needs_write_barrier =
6433 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6434
6435 switch (value_type) {
6436 case DataType::Type::kBool:
6437 case DataType::Type::kUint8:
6438 case DataType::Type::kInt8: {
6439 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
6440 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
6441 if (value.IsRegister()) {
6442 __ movb(address, value.AsRegister<ByteRegister>());
6443 } else {
6444 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6445 }
6446 codegen_->MaybeRecordImplicitNullCheck(instruction);
6447 break;
6448 }
6449
6450 case DataType::Type::kUint16:
6451 case DataType::Type::kInt16: {
6452 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
6453 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
6454 if (value.IsRegister()) {
6455 __ movw(address, value.AsRegister<Register>());
6456 } else {
6457 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6458 }
6459 codegen_->MaybeRecordImplicitNullCheck(instruction);
6460 break;
6461 }
6462
6463 case DataType::Type::kReference: {
6464 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6465 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6466
6467 if (!value.IsRegister()) {
6468 // Just setting null.
6469 DCHECK(instruction->InputAt(2)->IsNullConstant());
6470 DCHECK(value.IsConstant()) << value;
6471 __ movl(address, Immediate(0));
6472 codegen_->MaybeRecordImplicitNullCheck(instruction);
6473 DCHECK(!needs_write_barrier);
6474 DCHECK(!needs_type_check);
6475 break;
6476 }
6477
6478 DCHECK(needs_write_barrier);
6479 Register register_value = value.AsRegister<Register>();
6480 Location temp_loc = locations->GetTemp(0);
6481 Register temp = temp_loc.AsRegister<Register>();
6482
6483 bool can_value_be_null = instruction->GetValueCanBeNull();
6484 NearLabel do_store;
6485 if (can_value_be_null) {
6486 __ testl(register_value, register_value);
6487 __ j(kEqual, &do_store);
6488 }
6489
6490 SlowPathCode* slow_path = nullptr;
6491 if (needs_type_check) {
6492 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6493 codegen_->AddSlowPath(slow_path);
6494
6495 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6496 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6497 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6498
6499 // Note that when Baker read barriers are enabled, the type
6500 // checks are performed without read barriers. This is fine,
6501 // even in the case where a class object is in the from-space
6502 // after the flip, as a comparison involving such a type would
6503 // not produce a false positive; it may of course produce a
6504 // false negative, in which case we would take the ArraySet
6505 // slow path.
6506
6507 // /* HeapReference<Class> */ temp = array->klass_
6508 __ movl(temp, Address(array, class_offset));
6509 codegen_->MaybeRecordImplicitNullCheck(instruction);
6510 __ MaybeUnpoisonHeapReference(temp);
6511
6512 // /* HeapReference<Class> */ temp = temp->component_type_
6513 __ movl(temp, Address(temp, component_offset));
6514 // If heap poisoning is enabled, no need to unpoison `temp`
6515 // nor the object reference in `register_value->klass`, as
6516 // we are comparing two poisoned references.
6517 __ cmpl(temp, Address(register_value, class_offset));
6518
6519 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6520 NearLabel do_put;
6521 __ j(kEqual, &do_put);
6522 // If heap poisoning is enabled, the `temp` reference has
6523 // not been unpoisoned yet; unpoison it now.
6524 __ MaybeUnpoisonHeapReference(temp);
6525
6526 // If heap poisoning is enabled, no need to unpoison the
6527 // heap reference loaded below, as it is only used for a
6528 // comparison with null.
6529 __ cmpl(Address(temp, super_offset), Immediate(0));
6530 __ j(kNotEqual, slow_path->GetEntryLabel());
6531 __ Bind(&do_put);
6532 } else {
6533 __ j(kNotEqual, slow_path->GetEntryLabel());
6534 }
6535 }
6536
6537 if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
6538 DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
6539 << " Already null checked so we shouldn't do it again.";
6540 Register card = locations->GetTemp(1).AsRegister<Register>();
6541 codegen_->MarkGCCard(temp,
6542 card,
6543 array,
6544 value.AsRegister<Register>(),
6545 /* emit_null_check= */ false);
6546 }
6547
6548 if (can_value_be_null) {
6549 DCHECK(do_store.IsLinked());
6550 __ Bind(&do_store);
6551 }
6552
6553 Register source = register_value;
6554 if (kPoisonHeapReferences) {
6555 __ movl(temp, register_value);
6556 __ PoisonHeapReference(temp);
6557 source = temp;
6558 }
6559
6560 __ movl(address, source);
6561
6562 if (can_value_be_null || !needs_type_check) {
6563 codegen_->MaybeRecordImplicitNullCheck(instruction);
6564 }
6565
6566 if (slow_path != nullptr) {
6567 __ Bind(slow_path->GetExitLabel());
6568 }
6569
6570 break;
6571 }
6572
6573 case DataType::Type::kInt32: {
6574 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6575 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6576 if (value.IsRegister()) {
6577 __ movl(address, value.AsRegister<Register>());
6578 } else {
6579 DCHECK(value.IsConstant()) << value;
6580 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6581 __ movl(address, Immediate(v));
6582 }
6583 codegen_->MaybeRecordImplicitNullCheck(instruction);
6584 break;
6585 }
6586
6587 case DataType::Type::kInt64: {
6588 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6589 if (value.IsRegisterPair()) {
6590 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6591 value.AsRegisterPairLow<Register>());
6592 codegen_->MaybeRecordImplicitNullCheck(instruction);
6593 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6594 value.AsRegisterPairHigh<Register>());
6595 } else {
6596 DCHECK(value.IsConstant());
6597 int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6598 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6599 Immediate(Low32Bits(val)));
6600 codegen_->MaybeRecordImplicitNullCheck(instruction);
6601 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6602 Immediate(High32Bits(val)));
6603 }
6604 break;
6605 }
6606
6607 case DataType::Type::kFloat32: {
6608 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6609 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6610 if (value.IsFpuRegister()) {
6611 __ movss(address, value.AsFpuRegister<XmmRegister>());
6612 } else {
6613 DCHECK(value.IsConstant());
6614 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6615 __ movl(address, Immediate(v));
6616 }
6617 codegen_->MaybeRecordImplicitNullCheck(instruction);
6618 break;
6619 }
6620
6621 case DataType::Type::kFloat64: {
6622 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6623 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6624 if (value.IsFpuRegister()) {
6625 __ movsd(address, value.AsFpuRegister<XmmRegister>());
6626 } else {
6627 DCHECK(value.IsConstant());
6628 Address address_hi =
6629 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6630 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6631 __ movl(address, Immediate(Low32Bits(v)));
6632 codegen_->MaybeRecordImplicitNullCheck(instruction);
6633 __ movl(address_hi, Immediate(High32Bits(v)));
6634 }
6635 break;
6636 }
6637
6638 case DataType::Type::kUint32:
6639 case DataType::Type::kUint64:
6640 case DataType::Type::kVoid:
6641 LOG(FATAL) << "Unreachable type " << instruction->GetType();
6642 UNREACHABLE();
6643 }
6644 }
6645
VisitArrayLength(HArrayLength * instruction)6646 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6647 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6648 locations->SetInAt(0, Location::RequiresRegister());
6649 if (!instruction->IsEmittedAtUseSite()) {
6650 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6651 }
6652 }
6653
VisitArrayLength(HArrayLength * instruction)6654 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6655 if (instruction->IsEmittedAtUseSite()) {
6656 return;
6657 }
6658
6659 LocationSummary* locations = instruction->GetLocations();
6660 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6661 Register obj = locations->InAt(0).AsRegister<Register>();
6662 Register out = locations->Out().AsRegister<Register>();
6663 __ movl(out, Address(obj, offset));
6664 codegen_->MaybeRecordImplicitNullCheck(instruction);
6665 // Mask out most significant bit in case the array is String's array of char.
6666 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6667 __ shrl(out, Immediate(1));
6668 }
6669 }
6670
VisitBoundsCheck(HBoundsCheck * instruction)6671 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6672 RegisterSet caller_saves = RegisterSet::Empty();
6673 InvokeRuntimeCallingConvention calling_convention;
6674 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6675 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6676 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6677 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6678 HInstruction* length = instruction->InputAt(1);
6679 if (!length->IsEmittedAtUseSite()) {
6680 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6681 }
6682 // Need register to see array's length.
6683 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6684 locations->AddTemp(Location::RequiresRegister());
6685 }
6686 }
6687
VisitBoundsCheck(HBoundsCheck * instruction)6688 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6689 const bool is_string_compressed_char_at =
6690 mirror::kUseStringCompression && instruction->IsStringCharAt();
6691 LocationSummary* locations = instruction->GetLocations();
6692 Location index_loc = locations->InAt(0);
6693 Location length_loc = locations->InAt(1);
6694 SlowPathCode* slow_path =
6695 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6696
6697 if (length_loc.IsConstant()) {
6698 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6699 if (index_loc.IsConstant()) {
6700 // BCE will remove the bounds check if we are guarenteed to pass.
6701 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6702 if (index < 0 || index >= length) {
6703 codegen_->AddSlowPath(slow_path);
6704 __ jmp(slow_path->GetEntryLabel());
6705 } else {
6706 // Some optimization after BCE may have generated this, and we should not
6707 // generate a bounds check if it is a valid range.
6708 }
6709 return;
6710 }
6711
6712 // We have to reverse the jump condition because the length is the constant.
6713 Register index_reg = index_loc.AsRegister<Register>();
6714 __ cmpl(index_reg, Immediate(length));
6715 codegen_->AddSlowPath(slow_path);
6716 __ j(kAboveEqual, slow_path->GetEntryLabel());
6717 } else {
6718 HInstruction* array_length = instruction->InputAt(1);
6719 if (array_length->IsEmittedAtUseSite()) {
6720 // Address the length field in the array.
6721 DCHECK(array_length->IsArrayLength());
6722 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6723 Location array_loc = array_length->GetLocations()->InAt(0);
6724 Address array_len(array_loc.AsRegister<Register>(), len_offset);
6725 if (is_string_compressed_char_at) {
6726 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6727 // the string compression flag) with the in-memory length and avoid the temporary.
6728 Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6729 __ movl(length_reg, array_len);
6730 codegen_->MaybeRecordImplicitNullCheck(array_length);
6731 __ shrl(length_reg, Immediate(1));
6732 codegen_->GenerateIntCompare(length_reg, index_loc);
6733 } else {
6734 // Checking bounds for general case:
6735 // Array of char or string's array with feature compression off.
6736 if (index_loc.IsConstant()) {
6737 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6738 __ cmpl(array_len, Immediate(value));
6739 } else {
6740 __ cmpl(array_len, index_loc.AsRegister<Register>());
6741 }
6742 codegen_->MaybeRecordImplicitNullCheck(array_length);
6743 }
6744 } else {
6745 codegen_->GenerateIntCompare(length_loc, index_loc);
6746 }
6747 codegen_->AddSlowPath(slow_path);
6748 __ j(kBelowEqual, slow_path->GetEntryLabel());
6749 }
6750 }
6751
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)6752 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
6753 LOG(FATAL) << "Unreachable";
6754 }
6755
VisitParallelMove(HParallelMove * instruction)6756 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6757 if (instruction->GetNext()->IsSuspendCheck() &&
6758 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6759 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6760 // The back edge will generate the suspend check.
6761 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6762 }
6763
6764 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6765 }
6766
VisitSuspendCheck(HSuspendCheck * instruction)6767 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6768 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6769 instruction, LocationSummary::kCallOnSlowPath);
6770 // In suspend check slow path, usually there are no caller-save registers at all.
6771 // If SIMD instructions are present, however, we force spilling all live SIMD
6772 // registers in full width (since the runtime only saves/restores lower part).
6773 locations->SetCustomSlowPathCallerSaves(
6774 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6775 }
6776
VisitSuspendCheck(HSuspendCheck * instruction)6777 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6778 HBasicBlock* block = instruction->GetBlock();
6779 if (block->GetLoopInformation() != nullptr) {
6780 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6781 // The back edge will generate the suspend check.
6782 return;
6783 }
6784 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6785 // The goto will generate the suspend check.
6786 return;
6787 }
6788 GenerateSuspendCheck(instruction, nullptr);
6789 }
6790
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6791 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
6792 HBasicBlock* successor) {
6793 SuspendCheckSlowPathX86* slow_path =
6794 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
6795 if (slow_path == nullptr) {
6796 slow_path =
6797 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
6798 instruction->SetSlowPath(slow_path);
6799 codegen_->AddSlowPath(slow_path);
6800 if (successor != nullptr) {
6801 DCHECK(successor->IsLoopHeader());
6802 }
6803 } else {
6804 DCHECK_EQ(slow_path->GetSuccessor(), successor);
6805 }
6806
6807 __ fs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
6808 Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6809 if (successor == nullptr) {
6810 __ j(kNotZero, slow_path->GetEntryLabel());
6811 __ Bind(slow_path->GetReturnLabel());
6812 } else {
6813 __ j(kZero, codegen_->GetLabelOf(successor));
6814 __ jmp(slow_path->GetEntryLabel());
6815 }
6816 }
6817
GetAssembler() const6818 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
6819 return codegen_->GetAssembler();
6820 }
6821
MoveMemoryToMemory(int dst,int src,int number_of_words)6822 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
6823 ScratchRegisterScope ensure_scratch(
6824 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6825 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6826 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6827
6828 // Now that temp register is available (possibly spilled), move blocks of memory.
6829 for (int i = 0; i < number_of_words; i++) {
6830 __ movl(temp_reg, Address(ESP, src + stack_offset));
6831 __ movl(Address(ESP, dst + stack_offset), temp_reg);
6832 stack_offset += kX86WordSize;
6833 }
6834 }
6835
EmitMove(size_t index)6836 void ParallelMoveResolverX86::EmitMove(size_t index) {
6837 MoveOperands* move = moves_[index];
6838 Location source = move->GetSource();
6839 Location destination = move->GetDestination();
6840
6841 if (source.IsRegister()) {
6842 if (destination.IsRegister()) {
6843 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6844 } else if (destination.IsFpuRegister()) {
6845 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
6846 } else {
6847 DCHECK(destination.IsStackSlot());
6848 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
6849 }
6850 } else if (source.IsRegisterPair()) {
6851 if (destination.IsRegisterPair()) {
6852 __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
6853 DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
6854 __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
6855 } else if (destination.IsFpuRegister()) {
6856 size_t elem_size = DataType::Size(DataType::Type::kInt32);
6857 // Push the 2 source registers to the stack.
6858 __ pushl(source.AsRegisterPairHigh<Register>());
6859 __ cfi().AdjustCFAOffset(elem_size);
6860 __ pushl(source.AsRegisterPairLow<Register>());
6861 __ cfi().AdjustCFAOffset(elem_size);
6862 // Load the destination register.
6863 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
6864 // And remove the temporary stack space we allocated.
6865 codegen_->DecreaseFrame(2 * elem_size);
6866 } else {
6867 DCHECK(destination.IsDoubleStackSlot());
6868 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
6869 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
6870 source.AsRegisterPairHigh<Register>());
6871 }
6872 } else if (source.IsFpuRegister()) {
6873 if (destination.IsRegister()) {
6874 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
6875 } else if (destination.IsFpuRegister()) {
6876 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6877 } else if (destination.IsRegisterPair()) {
6878 size_t elem_size = DataType::Size(DataType::Type::kInt32);
6879 // Create stack space for 2 elements.
6880 codegen_->IncreaseFrame(2 * elem_size);
6881 // Store the source register.
6882 __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
6883 // And pop the values into destination registers.
6884 __ popl(destination.AsRegisterPairLow<Register>());
6885 __ cfi().AdjustCFAOffset(-elem_size);
6886 __ popl(destination.AsRegisterPairHigh<Register>());
6887 __ cfi().AdjustCFAOffset(-elem_size);
6888 } else if (destination.IsStackSlot()) {
6889 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6890 } else if (destination.IsDoubleStackSlot()) {
6891 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6892 } else {
6893 DCHECK(destination.IsSIMDStackSlot());
6894 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6895 }
6896 } else if (source.IsStackSlot()) {
6897 if (destination.IsRegister()) {
6898 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
6899 } else if (destination.IsFpuRegister()) {
6900 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6901 } else {
6902 DCHECK(destination.IsStackSlot());
6903 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6904 }
6905 } else if (source.IsDoubleStackSlot()) {
6906 if (destination.IsRegisterPair()) {
6907 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
6908 __ movl(destination.AsRegisterPairHigh<Register>(),
6909 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
6910 } else if (destination.IsFpuRegister()) {
6911 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6912 } else {
6913 DCHECK(destination.IsDoubleStackSlot()) << destination;
6914 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6915 }
6916 } else if (source.IsSIMDStackSlot()) {
6917 if (destination.IsFpuRegister()) {
6918 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6919 } else {
6920 DCHECK(destination.IsSIMDStackSlot());
6921 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6922 }
6923 } else if (source.IsConstant()) {
6924 HConstant* constant = source.GetConstant();
6925 if (constant->IsIntConstant() || constant->IsNullConstant()) {
6926 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6927 if (destination.IsRegister()) {
6928 if (value == 0) {
6929 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
6930 } else {
6931 __ movl(destination.AsRegister<Register>(), Immediate(value));
6932 }
6933 } else {
6934 DCHECK(destination.IsStackSlot()) << destination;
6935 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
6936 }
6937 } else if (constant->IsFloatConstant()) {
6938 float fp_value = constant->AsFloatConstant()->GetValue();
6939 int32_t value = bit_cast<int32_t, float>(fp_value);
6940 Immediate imm(value);
6941 if (destination.IsFpuRegister()) {
6942 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6943 if (value == 0) {
6944 // Easy handling of 0.0.
6945 __ xorps(dest, dest);
6946 } else {
6947 ScratchRegisterScope ensure_scratch(
6948 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6949 Register temp = static_cast<Register>(ensure_scratch.GetRegister());
6950 __ movl(temp, Immediate(value));
6951 __ movd(dest, temp);
6952 }
6953 } else {
6954 DCHECK(destination.IsStackSlot()) << destination;
6955 __ movl(Address(ESP, destination.GetStackIndex()), imm);
6956 }
6957 } else if (constant->IsLongConstant()) {
6958 int64_t value = constant->AsLongConstant()->GetValue();
6959 int32_t low_value = Low32Bits(value);
6960 int32_t high_value = High32Bits(value);
6961 Immediate low(low_value);
6962 Immediate high(high_value);
6963 if (destination.IsDoubleStackSlot()) {
6964 __ movl(Address(ESP, destination.GetStackIndex()), low);
6965 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6966 } else {
6967 __ movl(destination.AsRegisterPairLow<Register>(), low);
6968 __ movl(destination.AsRegisterPairHigh<Register>(), high);
6969 }
6970 } else {
6971 DCHECK(constant->IsDoubleConstant());
6972 double dbl_value = constant->AsDoubleConstant()->GetValue();
6973 int64_t value = bit_cast<int64_t, double>(dbl_value);
6974 int32_t low_value = Low32Bits(value);
6975 int32_t high_value = High32Bits(value);
6976 Immediate low(low_value);
6977 Immediate high(high_value);
6978 if (destination.IsFpuRegister()) {
6979 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6980 if (value == 0) {
6981 // Easy handling of 0.0.
6982 __ xorpd(dest, dest);
6983 } else {
6984 __ pushl(high);
6985 __ cfi().AdjustCFAOffset(4);
6986 __ pushl(low);
6987 __ cfi().AdjustCFAOffset(4);
6988 __ movsd(dest, Address(ESP, 0));
6989 codegen_->DecreaseFrame(8);
6990 }
6991 } else {
6992 DCHECK(destination.IsDoubleStackSlot()) << destination;
6993 __ movl(Address(ESP, destination.GetStackIndex()), low);
6994 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6995 }
6996 }
6997 } else {
6998 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
6999 }
7000 }
7001
Exchange(Register reg,int mem)7002 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
7003 Register suggested_scratch = reg == EAX ? EBX : EAX;
7004 ScratchRegisterScope ensure_scratch(
7005 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
7006
7007 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7008 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
7009 __ movl(Address(ESP, mem + stack_offset), reg);
7010 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
7011 }
7012
Exchange32(XmmRegister reg,int mem)7013 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
7014 ScratchRegisterScope ensure_scratch(
7015 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7016
7017 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
7018 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7019 __ movl(temp_reg, Address(ESP, mem + stack_offset));
7020 __ movss(Address(ESP, mem + stack_offset), reg);
7021 __ movd(reg, temp_reg);
7022 }
7023
Exchange128(XmmRegister reg,int mem)7024 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
7025 size_t extra_slot = 4 * kX86WordSize;
7026 codegen_->IncreaseFrame(extra_slot);
7027 __ movups(Address(ESP, 0), XmmRegister(reg));
7028 ExchangeMemory(0, mem + extra_slot, 4);
7029 __ movups(XmmRegister(reg), Address(ESP, 0));
7030 codegen_->DecreaseFrame(extra_slot);
7031 }
7032
ExchangeMemory(int mem1,int mem2,int number_of_words)7033 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
7034 ScratchRegisterScope ensure_scratch1(
7035 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7036
7037 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
7038 ScratchRegisterScope ensure_scratch2(
7039 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
7040
7041 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
7042 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
7043
7044 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
7045 for (int i = 0; i < number_of_words; i++) {
7046 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
7047 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
7048 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
7049 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
7050 stack_offset += kX86WordSize;
7051 }
7052 }
7053
EmitSwap(size_t index)7054 void ParallelMoveResolverX86::EmitSwap(size_t index) {
7055 MoveOperands* move = moves_[index];
7056 Location source = move->GetSource();
7057 Location destination = move->GetDestination();
7058
7059 if (source.IsRegister() && destination.IsRegister()) {
7060 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
7061 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
7062 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7063 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
7064 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7065 } else if (source.IsRegister() && destination.IsStackSlot()) {
7066 Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
7067 } else if (source.IsStackSlot() && destination.IsRegister()) {
7068 Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
7069 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7070 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
7071 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7072 // Use XOR Swap algorithm to avoid a temporary.
7073 DCHECK_NE(source.reg(), destination.reg());
7074 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7075 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
7076 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7077 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
7078 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7079 } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
7080 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7081 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
7082 // Take advantage of the 16 bytes in the XMM register.
7083 XmmRegister reg = source.AsFpuRegister<XmmRegister>();
7084 Address stack(ESP, destination.GetStackIndex());
7085 // Load the double into the high doubleword.
7086 __ movhpd(reg, stack);
7087
7088 // Store the low double into the destination.
7089 __ movsd(stack, reg);
7090
7091 // Move the high double to the low double.
7092 __ psrldq(reg, Immediate(8));
7093 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
7094 // Take advantage of the 16 bytes in the XMM register.
7095 XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
7096 Address stack(ESP, source.GetStackIndex());
7097 // Load the double into the high doubleword.
7098 __ movhpd(reg, stack);
7099
7100 // Store the low double into the destination.
7101 __ movsd(stack, reg);
7102
7103 // Move the high double to the low double.
7104 __ psrldq(reg, Immediate(8));
7105 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
7106 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7107 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
7108 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7109 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
7110 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7111 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
7112 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7113 } else {
7114 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
7115 }
7116 }
7117
SpillScratch(int reg)7118 void ParallelMoveResolverX86::SpillScratch(int reg) {
7119 __ pushl(static_cast<Register>(reg));
7120 }
7121
RestoreScratch(int reg)7122 void ParallelMoveResolverX86::RestoreScratch(int reg) {
7123 __ popl(static_cast<Register>(reg));
7124 }
7125
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7126 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
7127 HLoadClass::LoadKind desired_class_load_kind) {
7128 switch (desired_class_load_kind) {
7129 case HLoadClass::LoadKind::kInvalid:
7130 LOG(FATAL) << "UNREACHABLE";
7131 UNREACHABLE();
7132 case HLoadClass::LoadKind::kReferrersClass:
7133 break;
7134 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7135 case HLoadClass::LoadKind::kBootImageRelRo:
7136 case HLoadClass::LoadKind::kBssEntry:
7137 case HLoadClass::LoadKind::kBssEntryPublic:
7138 case HLoadClass::LoadKind::kBssEntryPackage:
7139 DCHECK(!GetCompilerOptions().IsJitCompiler());
7140 break;
7141 case HLoadClass::LoadKind::kJitBootImageAddress:
7142 case HLoadClass::LoadKind::kJitTableAddress:
7143 DCHECK(GetCompilerOptions().IsJitCompiler());
7144 break;
7145 case HLoadClass::LoadKind::kRuntimeCall:
7146 break;
7147 }
7148 return desired_class_load_kind;
7149 }
7150
VisitLoadClass(HLoadClass * cls)7151 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
7152 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7153 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7154 InvokeRuntimeCallingConvention calling_convention;
7155 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7156 cls,
7157 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
7158 Location::RegisterLocation(EAX));
7159 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
7160 return;
7161 }
7162 DCHECK_EQ(cls->NeedsAccessCheck(),
7163 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7164 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7165
7166 const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
7167 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7168 ? LocationSummary::kCallOnSlowPath
7169 : LocationSummary::kNoCall;
7170 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7171 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7172 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7173 }
7174
7175 if (load_kind == HLoadClass::LoadKind::kReferrersClass || cls->HasPcRelativeLoadKind()) {
7176 locations->SetInAt(0, Location::RequiresRegister());
7177 }
7178 locations->SetOut(Location::RequiresRegister());
7179 if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) {
7180 if (!gUseReadBarrier || kUseBakerReadBarrier) {
7181 // Rely on the type resolution and/or initialization to save everything.
7182 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7183 } else {
7184 // For non-Baker read barrier we have a temp-clobbering call.
7185 }
7186 }
7187 }
7188
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)7189 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
7190 dex::TypeIndex type_index,
7191 Handle<mirror::Class> handle) {
7192 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
7193 // Add a patch entry and return the label.
7194 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
7195 PatchInfo<Label>* info = &jit_class_patches_.back();
7196 return &info->label;
7197 }
7198
7199 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7200 // move.
VisitLoadClass(HLoadClass * cls)7201 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7202 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7203 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7204 codegen_->GenerateLoadClassRuntimeCall(cls);
7205 return;
7206 }
7207 DCHECK_EQ(cls->NeedsAccessCheck(),
7208 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7209 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7210
7211 LocationSummary* locations = cls->GetLocations();
7212 Location out_loc = locations->Out();
7213 Register out = out_loc.AsRegister<Register>();
7214
7215 bool generate_null_check = false;
7216 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
7217 ? kWithoutReadBarrier
7218 : gCompilerReadBarrierOption;
7219 switch (load_kind) {
7220 case HLoadClass::LoadKind::kReferrersClass: {
7221 DCHECK(!cls->CanCallRuntime());
7222 DCHECK(!cls->MustGenerateClinitCheck());
7223 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7224 Register current_method = locations->InAt(0).AsRegister<Register>();
7225 GenerateGcRootFieldLoad(
7226 cls,
7227 out_loc,
7228 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
7229 /* fixup_label= */ nullptr,
7230 read_barrier_option);
7231 break;
7232 }
7233 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7234 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7235 codegen_->GetCompilerOptions().IsBootImageExtension());
7236 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7237 Register method_address = locations->InAt(0).AsRegister<Register>();
7238 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7239 codegen_->RecordBootImageTypePatch(cls);
7240 break;
7241 }
7242 case HLoadClass::LoadKind::kBootImageRelRo: {
7243 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7244 Register method_address = locations->InAt(0).AsRegister<Register>();
7245 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7246 codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7247 CodeGenerator::GetBootImageOffset(cls));
7248 break;
7249 }
7250 case HLoadClass::LoadKind::kBssEntry:
7251 case HLoadClass::LoadKind::kBssEntryPublic:
7252 case HLoadClass::LoadKind::kBssEntryPackage: {
7253 Register method_address = locations->InAt(0).AsRegister<Register>();
7254 Address address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7255 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
7256 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7257 // No need for memory fence, thanks to the x86 memory model.
7258 generate_null_check = true;
7259 break;
7260 }
7261 case HLoadClass::LoadKind::kJitBootImageAddress: {
7262 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7263 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7264 DCHECK_NE(address, 0u);
7265 __ movl(out, Immediate(address));
7266 break;
7267 }
7268 case HLoadClass::LoadKind::kJitTableAddress: {
7269 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7270 Label* fixup_label = codegen_->NewJitRootClassPatch(
7271 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
7272 // /* GcRoot<mirror::Class> */ out = *address
7273 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7274 break;
7275 }
7276 case HLoadClass::LoadKind::kRuntimeCall:
7277 case HLoadClass::LoadKind::kInvalid:
7278 LOG(FATAL) << "UNREACHABLE";
7279 UNREACHABLE();
7280 }
7281
7282 if (generate_null_check || cls->MustGenerateClinitCheck()) {
7283 DCHECK(cls->CanCallRuntime());
7284 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
7285 codegen_->AddSlowPath(slow_path);
7286
7287 if (generate_null_check) {
7288 __ testl(out, out);
7289 __ j(kEqual, slow_path->GetEntryLabel());
7290 }
7291
7292 if (cls->MustGenerateClinitCheck()) {
7293 GenerateClassInitializationCheck(slow_path, out);
7294 } else {
7295 __ Bind(slow_path->GetExitLabel());
7296 }
7297 }
7298 }
7299
VisitLoadMethodHandle(HLoadMethodHandle * load)7300 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7301 InvokeRuntimeCallingConvention calling_convention;
7302 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7303 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7304 }
7305
VisitLoadMethodHandle(HLoadMethodHandle * load)7306 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7307 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7308 }
7309
VisitLoadMethodType(HLoadMethodType * load)7310 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
7311 InvokeRuntimeCallingConvention calling_convention;
7312 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7313 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7314 }
7315
VisitLoadMethodType(HLoadMethodType * load)7316 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
7317 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7318 }
7319
VisitClinitCheck(HClinitCheck * check)7320 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
7321 LocationSummary* locations =
7322 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7323 locations->SetInAt(0, Location::RequiresRegister());
7324 if (check->HasUses()) {
7325 locations->SetOut(Location::SameAsFirstInput());
7326 }
7327 // Rely on the type initialization to save everything we need.
7328 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7329 }
7330
VisitClinitCheck(HClinitCheck * check)7331 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
7332 // We assume the class to not be null.
7333 SlowPathCode* slow_path =
7334 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
7335 codegen_->AddSlowPath(slow_path);
7336 GenerateClassInitializationCheck(slow_path,
7337 check->GetLocations()->InAt(0).AsRegister<Register>());
7338 }
7339
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)7340 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
7341 SlowPathCode* slow_path, Register class_reg) {
7342 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
7343 __ j(kBelow, slow_path->GetEntryLabel());
7344 __ Bind(slow_path->GetExitLabel());
7345 }
7346
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)7347 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
7348 Register temp) {
7349 uint32_t path_to_root = check->GetBitstringPathToRoot();
7350 uint32_t mask = check->GetBitstringMask();
7351 DCHECK(IsPowerOfTwo(mask + 1));
7352 size_t mask_bits = WhichPowerOf2(mask + 1);
7353
7354 if (mask_bits == 16u) {
7355 // Compare the bitstring in memory.
7356 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
7357 } else {
7358 // /* uint32_t */ temp = temp->status_
7359 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
7360 // Compare the bitstring bits using SUB.
7361 __ subl(temp, Immediate(path_to_root));
7362 // Shift out bits that do not contribute to the comparison.
7363 __ shll(temp, Immediate(32u - mask_bits));
7364 }
7365 }
7366
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7367 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
7368 HLoadString::LoadKind desired_string_load_kind) {
7369 switch (desired_string_load_kind) {
7370 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7371 case HLoadString::LoadKind::kBootImageRelRo:
7372 case HLoadString::LoadKind::kBssEntry:
7373 DCHECK(!GetCompilerOptions().IsJitCompiler());
7374 break;
7375 case HLoadString::LoadKind::kJitBootImageAddress:
7376 case HLoadString::LoadKind::kJitTableAddress:
7377 DCHECK(GetCompilerOptions().IsJitCompiler());
7378 break;
7379 case HLoadString::LoadKind::kRuntimeCall:
7380 break;
7381 }
7382 return desired_string_load_kind;
7383 }
7384
VisitLoadString(HLoadString * load)7385 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
7386 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
7387 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7388 HLoadString::LoadKind load_kind = load->GetLoadKind();
7389 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
7390 load_kind == HLoadString::LoadKind::kBootImageRelRo ||
7391 load_kind == HLoadString::LoadKind::kBssEntry) {
7392 locations->SetInAt(0, Location::RequiresRegister());
7393 }
7394 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7395 locations->SetOut(Location::RegisterLocation(EAX));
7396 } else {
7397 locations->SetOut(Location::RequiresRegister());
7398 if (load_kind == HLoadString::LoadKind::kBssEntry) {
7399 if (!gUseReadBarrier || kUseBakerReadBarrier) {
7400 // Rely on the pResolveString to save everything.
7401 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7402 } else {
7403 // For non-Baker read barrier we have a temp-clobbering call.
7404 }
7405 }
7406 }
7407 }
7408
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7409 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
7410 dex::StringIndex string_index,
7411 Handle<mirror::String> handle) {
7412 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7413 // Add a patch entry and return the label.
7414 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7415 PatchInfo<Label>* info = &jit_string_patches_.back();
7416 return &info->label;
7417 }
7418
7419 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7420 // move.
VisitLoadString(HLoadString * load)7421 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7422 LocationSummary* locations = load->GetLocations();
7423 Location out_loc = locations->Out();
7424 Register out = out_loc.AsRegister<Register>();
7425
7426 switch (load->GetLoadKind()) {
7427 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7428 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7429 codegen_->GetCompilerOptions().IsBootImageExtension());
7430 Register method_address = locations->InAt(0).AsRegister<Register>();
7431 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7432 codegen_->RecordBootImageStringPatch(load);
7433 return;
7434 }
7435 case HLoadString::LoadKind::kBootImageRelRo: {
7436 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7437 Register method_address = locations->InAt(0).AsRegister<Register>();
7438 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7439 codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7440 CodeGenerator::GetBootImageOffset(load));
7441 return;
7442 }
7443 case HLoadString::LoadKind::kBssEntry: {
7444 Register method_address = locations->InAt(0).AsRegister<Register>();
7445 Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7446 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7447 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
7448 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
7449 // No need for memory fence, thanks to the x86 memory model.
7450 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
7451 codegen_->AddSlowPath(slow_path);
7452 __ testl(out, out);
7453 __ j(kEqual, slow_path->GetEntryLabel());
7454 __ Bind(slow_path->GetExitLabel());
7455 return;
7456 }
7457 case HLoadString::LoadKind::kJitBootImageAddress: {
7458 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7459 DCHECK_NE(address, 0u);
7460 __ movl(out, Immediate(address));
7461 return;
7462 }
7463 case HLoadString::LoadKind::kJitTableAddress: {
7464 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7465 Label* fixup_label = codegen_->NewJitRootStringPatch(
7466 load->GetDexFile(), load->GetStringIndex(), load->GetString());
7467 // /* GcRoot<mirror::String> */ out = *address
7468 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
7469 return;
7470 }
7471 default:
7472 break;
7473 }
7474
7475 // TODO: Re-add the compiler code to do string dex cache lookup again.
7476 InvokeRuntimeCallingConvention calling_convention;
7477 DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
7478 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
7479 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7480 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7481 }
7482
GetExceptionTlsAddress()7483 static Address GetExceptionTlsAddress() {
7484 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
7485 }
7486
VisitLoadException(HLoadException * load)7487 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
7488 LocationSummary* locations =
7489 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7490 locations->SetOut(Location::RequiresRegister());
7491 }
7492
VisitLoadException(HLoadException * load)7493 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
7494 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
7495 }
7496
VisitClearException(HClearException * clear)7497 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
7498 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7499 }
7500
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)7501 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
7502 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7503 }
7504
VisitThrow(HThrow * instruction)7505 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7506 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7507 instruction, LocationSummary::kCallOnMainOnly);
7508 InvokeRuntimeCallingConvention calling_convention;
7509 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7510 }
7511
VisitThrow(HThrow * instruction)7512 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7513 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7514 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7515 }
7516
7517 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)7518 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
7519 if (gUseReadBarrier &&
7520 !kUseBakerReadBarrier &&
7521 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7522 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7523 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7524 return 1;
7525 }
7526 return 0;
7527 }
7528
7529 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7530 // interface pointer, the current interface is compared in memory.
7531 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)7532 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
7533 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7534 return 2;
7535 }
7536 return 1 + NumberOfInstanceOfTemps(type_check_kind);
7537 }
7538
VisitInstanceOf(HInstanceOf * instruction)7539 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7540 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7541 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7542 bool baker_read_barrier_slow_path = false;
7543 switch (type_check_kind) {
7544 case TypeCheckKind::kExactCheck:
7545 case TypeCheckKind::kAbstractClassCheck:
7546 case TypeCheckKind::kClassHierarchyCheck:
7547 case TypeCheckKind::kArrayObjectCheck: {
7548 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
7549 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7550 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
7551 break;
7552 }
7553 case TypeCheckKind::kArrayCheck:
7554 case TypeCheckKind::kUnresolvedCheck:
7555 case TypeCheckKind::kInterfaceCheck:
7556 call_kind = LocationSummary::kCallOnSlowPath;
7557 break;
7558 case TypeCheckKind::kBitstringCheck:
7559 break;
7560 }
7561
7562 LocationSummary* locations =
7563 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7564 if (baker_read_barrier_slow_path) {
7565 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7566 }
7567 locations->SetInAt(0, Location::RequiresRegister());
7568 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7569 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7570 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7571 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7572 } else {
7573 locations->SetInAt(1, Location::Any());
7574 }
7575 // Note that TypeCheckSlowPathX86 uses this "out" register too.
7576 locations->SetOut(Location::RequiresRegister());
7577 // When read barriers are enabled, we need a temporary register for some cases.
7578 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
7579 }
7580
VisitInstanceOf(HInstanceOf * instruction)7581 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7582 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7583 LocationSummary* locations = instruction->GetLocations();
7584 Location obj_loc = locations->InAt(0);
7585 Register obj = obj_loc.AsRegister<Register>();
7586 Location cls = locations->InAt(1);
7587 Location out_loc = locations->Out();
7588 Register out = out_loc.AsRegister<Register>();
7589 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
7590 DCHECK_LE(num_temps, 1u);
7591 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7592 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7593 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7594 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7595 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7596 SlowPathCode* slow_path = nullptr;
7597 NearLabel done, zero;
7598
7599 // Return 0 if `obj` is null.
7600 // Avoid null check if we know obj is not null.
7601 if (instruction->MustDoNullCheck()) {
7602 __ testl(obj, obj);
7603 __ j(kEqual, &zero);
7604 }
7605
7606 switch (type_check_kind) {
7607 case TypeCheckKind::kExactCheck: {
7608 ReadBarrierOption read_barrier_option =
7609 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7610 // /* HeapReference<Class> */ out = obj->klass_
7611 GenerateReferenceLoadTwoRegisters(instruction,
7612 out_loc,
7613 obj_loc,
7614 class_offset,
7615 read_barrier_option);
7616 if (cls.IsRegister()) {
7617 __ cmpl(out, cls.AsRegister<Register>());
7618 } else {
7619 DCHECK(cls.IsStackSlot()) << cls;
7620 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7621 }
7622
7623 // Classes must be equal for the instanceof to succeed.
7624 __ j(kNotEqual, &zero);
7625 __ movl(out, Immediate(1));
7626 __ jmp(&done);
7627 break;
7628 }
7629
7630 case TypeCheckKind::kAbstractClassCheck: {
7631 ReadBarrierOption read_barrier_option =
7632 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7633 // /* HeapReference<Class> */ out = obj->klass_
7634 GenerateReferenceLoadTwoRegisters(instruction,
7635 out_loc,
7636 obj_loc,
7637 class_offset,
7638 read_barrier_option);
7639 // If the class is abstract, we eagerly fetch the super class of the
7640 // object to avoid doing a comparison we know will fail.
7641 NearLabel loop;
7642 __ Bind(&loop);
7643 // /* HeapReference<Class> */ out = out->super_class_
7644 GenerateReferenceLoadOneRegister(instruction,
7645 out_loc,
7646 super_offset,
7647 maybe_temp_loc,
7648 read_barrier_option);
7649 __ testl(out, out);
7650 // If `out` is null, we use it for the result, and jump to `done`.
7651 __ j(kEqual, &done);
7652 if (cls.IsRegister()) {
7653 __ cmpl(out, cls.AsRegister<Register>());
7654 } else {
7655 DCHECK(cls.IsStackSlot()) << cls;
7656 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7657 }
7658 __ j(kNotEqual, &loop);
7659 __ movl(out, Immediate(1));
7660 if (zero.IsLinked()) {
7661 __ jmp(&done);
7662 }
7663 break;
7664 }
7665
7666 case TypeCheckKind::kClassHierarchyCheck: {
7667 ReadBarrierOption read_barrier_option =
7668 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7669 // /* HeapReference<Class> */ out = obj->klass_
7670 GenerateReferenceLoadTwoRegisters(instruction,
7671 out_loc,
7672 obj_loc,
7673 class_offset,
7674 read_barrier_option);
7675 // Walk over the class hierarchy to find a match.
7676 NearLabel loop, success;
7677 __ Bind(&loop);
7678 if (cls.IsRegister()) {
7679 __ cmpl(out, cls.AsRegister<Register>());
7680 } else {
7681 DCHECK(cls.IsStackSlot()) << cls;
7682 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7683 }
7684 __ j(kEqual, &success);
7685 // /* HeapReference<Class> */ out = out->super_class_
7686 GenerateReferenceLoadOneRegister(instruction,
7687 out_loc,
7688 super_offset,
7689 maybe_temp_loc,
7690 read_barrier_option);
7691 __ testl(out, out);
7692 __ j(kNotEqual, &loop);
7693 // If `out` is null, we use it for the result, and jump to `done`.
7694 __ jmp(&done);
7695 __ Bind(&success);
7696 __ movl(out, Immediate(1));
7697 if (zero.IsLinked()) {
7698 __ jmp(&done);
7699 }
7700 break;
7701 }
7702
7703 case TypeCheckKind::kArrayObjectCheck: {
7704 ReadBarrierOption read_barrier_option =
7705 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7706 // /* HeapReference<Class> */ out = obj->klass_
7707 GenerateReferenceLoadTwoRegisters(instruction,
7708 out_loc,
7709 obj_loc,
7710 class_offset,
7711 read_barrier_option);
7712 // Do an exact check.
7713 NearLabel exact_check;
7714 if (cls.IsRegister()) {
7715 __ cmpl(out, cls.AsRegister<Register>());
7716 } else {
7717 DCHECK(cls.IsStackSlot()) << cls;
7718 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7719 }
7720 __ j(kEqual, &exact_check);
7721 // Otherwise, we need to check that the object's class is a non-primitive array.
7722 // /* HeapReference<Class> */ out = out->component_type_
7723 GenerateReferenceLoadOneRegister(instruction,
7724 out_loc,
7725 component_offset,
7726 maybe_temp_loc,
7727 read_barrier_option);
7728 __ testl(out, out);
7729 // If `out` is null, we use it for the result, and jump to `done`.
7730 __ j(kEqual, &done);
7731 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7732 __ j(kNotEqual, &zero);
7733 __ Bind(&exact_check);
7734 __ movl(out, Immediate(1));
7735 __ jmp(&done);
7736 break;
7737 }
7738
7739 case TypeCheckKind::kArrayCheck: {
7740 // No read barrier since the slow path will retry upon failure.
7741 // /* HeapReference<Class> */ out = obj->klass_
7742 GenerateReferenceLoadTwoRegisters(instruction,
7743 out_loc,
7744 obj_loc,
7745 class_offset,
7746 kWithoutReadBarrier);
7747 if (cls.IsRegister()) {
7748 __ cmpl(out, cls.AsRegister<Register>());
7749 } else {
7750 DCHECK(cls.IsStackSlot()) << cls;
7751 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7752 }
7753 DCHECK(locations->OnlyCallsOnSlowPath());
7754 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7755 instruction, /* is_fatal= */ false);
7756 codegen_->AddSlowPath(slow_path);
7757 __ j(kNotEqual, slow_path->GetEntryLabel());
7758 __ movl(out, Immediate(1));
7759 if (zero.IsLinked()) {
7760 __ jmp(&done);
7761 }
7762 break;
7763 }
7764
7765 case TypeCheckKind::kUnresolvedCheck:
7766 case TypeCheckKind::kInterfaceCheck: {
7767 // Note that we indeed only call on slow path, but we always go
7768 // into the slow path for the unresolved and interface check
7769 // cases.
7770 //
7771 // We cannot directly call the InstanceofNonTrivial runtime
7772 // entry point without resorting to a type checking slow path
7773 // here (i.e. by calling InvokeRuntime directly), as it would
7774 // require to assign fixed registers for the inputs of this
7775 // HInstanceOf instruction (following the runtime calling
7776 // convention), which might be cluttered by the potential first
7777 // read barrier emission at the beginning of this method.
7778 //
7779 // TODO: Introduce a new runtime entry point taking the object
7780 // to test (instead of its class) as argument, and let it deal
7781 // with the read barrier issues. This will let us refactor this
7782 // case of the `switch` code as it was previously (with a direct
7783 // call to the runtime not using a type checking slow path).
7784 // This should also be beneficial for the other cases above.
7785 DCHECK(locations->OnlyCallsOnSlowPath());
7786 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7787 instruction, /* is_fatal= */ false);
7788 codegen_->AddSlowPath(slow_path);
7789 __ jmp(slow_path->GetEntryLabel());
7790 if (zero.IsLinked()) {
7791 __ jmp(&done);
7792 }
7793 break;
7794 }
7795
7796 case TypeCheckKind::kBitstringCheck: {
7797 // /* HeapReference<Class> */ temp = obj->klass_
7798 GenerateReferenceLoadTwoRegisters(instruction,
7799 out_loc,
7800 obj_loc,
7801 class_offset,
7802 kWithoutReadBarrier);
7803
7804 GenerateBitstringTypeCheckCompare(instruction, out);
7805 __ j(kNotEqual, &zero);
7806 __ movl(out, Immediate(1));
7807 __ jmp(&done);
7808 break;
7809 }
7810 }
7811
7812 if (zero.IsLinked()) {
7813 __ Bind(&zero);
7814 __ xorl(out, out);
7815 }
7816
7817 if (done.IsLinked()) {
7818 __ Bind(&done);
7819 }
7820
7821 if (slow_path != nullptr) {
7822 __ Bind(slow_path->GetExitLabel());
7823 }
7824 }
7825
VisitCheckCast(HCheckCast * instruction)7826 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
7827 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7828 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
7829 LocationSummary* locations =
7830 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7831 locations->SetInAt(0, Location::RequiresRegister());
7832 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7833 // Require a register for the interface check since there is a loop that compares the class to
7834 // a memory address.
7835 locations->SetInAt(1, Location::RequiresRegister());
7836 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7837 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7838 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7839 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7840 } else {
7841 locations->SetInAt(1, Location::Any());
7842 }
7843 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7844 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7845 }
7846
VisitCheckCast(HCheckCast * instruction)7847 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
7848 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7849 LocationSummary* locations = instruction->GetLocations();
7850 Location obj_loc = locations->InAt(0);
7851 Register obj = obj_loc.AsRegister<Register>();
7852 Location cls = locations->InAt(1);
7853 Location temp_loc = locations->GetTemp(0);
7854 Register temp = temp_loc.AsRegister<Register>();
7855 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7856 DCHECK_GE(num_temps, 1u);
7857 DCHECK_LE(num_temps, 2u);
7858 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
7859 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7860 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7861 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7862 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7863 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7864 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7865 const uint32_t object_array_data_offset =
7866 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7867
7868 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7869 SlowPathCode* type_check_slow_path =
7870 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7871 instruction, is_type_check_slow_path_fatal);
7872 codegen_->AddSlowPath(type_check_slow_path);
7873
7874 NearLabel done;
7875 // Avoid null check if we know obj is not null.
7876 if (instruction->MustDoNullCheck()) {
7877 __ testl(obj, obj);
7878 __ j(kEqual, &done);
7879 }
7880
7881 switch (type_check_kind) {
7882 case TypeCheckKind::kExactCheck:
7883 case TypeCheckKind::kArrayCheck: {
7884 // /* HeapReference<Class> */ temp = obj->klass_
7885 GenerateReferenceLoadTwoRegisters(instruction,
7886 temp_loc,
7887 obj_loc,
7888 class_offset,
7889 kWithoutReadBarrier);
7890
7891 if (cls.IsRegister()) {
7892 __ cmpl(temp, cls.AsRegister<Register>());
7893 } else {
7894 DCHECK(cls.IsStackSlot()) << cls;
7895 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7896 }
7897 // Jump to slow path for throwing the exception or doing a
7898 // more involved array check.
7899 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7900 break;
7901 }
7902
7903 case TypeCheckKind::kAbstractClassCheck: {
7904 // /* HeapReference<Class> */ temp = obj->klass_
7905 GenerateReferenceLoadTwoRegisters(instruction,
7906 temp_loc,
7907 obj_loc,
7908 class_offset,
7909 kWithoutReadBarrier);
7910
7911 // If the class is abstract, we eagerly fetch the super class of the
7912 // object to avoid doing a comparison we know will fail.
7913 NearLabel loop;
7914 __ Bind(&loop);
7915 // /* HeapReference<Class> */ temp = temp->super_class_
7916 GenerateReferenceLoadOneRegister(instruction,
7917 temp_loc,
7918 super_offset,
7919 maybe_temp2_loc,
7920 kWithoutReadBarrier);
7921
7922 // If the class reference currently in `temp` is null, jump to the slow path to throw the
7923 // exception.
7924 __ testl(temp, temp);
7925 __ j(kZero, type_check_slow_path->GetEntryLabel());
7926
7927 // Otherwise, compare the classes
7928 if (cls.IsRegister()) {
7929 __ cmpl(temp, cls.AsRegister<Register>());
7930 } else {
7931 DCHECK(cls.IsStackSlot()) << cls;
7932 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7933 }
7934 __ j(kNotEqual, &loop);
7935 break;
7936 }
7937
7938 case TypeCheckKind::kClassHierarchyCheck: {
7939 // /* HeapReference<Class> */ temp = obj->klass_
7940 GenerateReferenceLoadTwoRegisters(instruction,
7941 temp_loc,
7942 obj_loc,
7943 class_offset,
7944 kWithoutReadBarrier);
7945
7946 // Walk over the class hierarchy to find a match.
7947 NearLabel loop;
7948 __ Bind(&loop);
7949 if (cls.IsRegister()) {
7950 __ cmpl(temp, cls.AsRegister<Register>());
7951 } else {
7952 DCHECK(cls.IsStackSlot()) << cls;
7953 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7954 }
7955 __ j(kEqual, &done);
7956
7957 // /* HeapReference<Class> */ temp = temp->super_class_
7958 GenerateReferenceLoadOneRegister(instruction,
7959 temp_loc,
7960 super_offset,
7961 maybe_temp2_loc,
7962 kWithoutReadBarrier);
7963
7964 // If the class reference currently in `temp` is not null, jump
7965 // back at the beginning of the loop.
7966 __ testl(temp, temp);
7967 __ j(kNotZero, &loop);
7968 // Otherwise, jump to the slow path to throw the exception.;
7969 __ jmp(type_check_slow_path->GetEntryLabel());
7970 break;
7971 }
7972
7973 case TypeCheckKind::kArrayObjectCheck: {
7974 // /* HeapReference<Class> */ temp = obj->klass_
7975 GenerateReferenceLoadTwoRegisters(instruction,
7976 temp_loc,
7977 obj_loc,
7978 class_offset,
7979 kWithoutReadBarrier);
7980
7981 // Do an exact check.
7982 if (cls.IsRegister()) {
7983 __ cmpl(temp, cls.AsRegister<Register>());
7984 } else {
7985 DCHECK(cls.IsStackSlot()) << cls;
7986 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7987 }
7988 __ j(kEqual, &done);
7989
7990 // Otherwise, we need to check that the object's class is a non-primitive array.
7991 // /* HeapReference<Class> */ temp = temp->component_type_
7992 GenerateReferenceLoadOneRegister(instruction,
7993 temp_loc,
7994 component_offset,
7995 maybe_temp2_loc,
7996 kWithoutReadBarrier);
7997
7998 // If the component type is null (i.e. the object not an array), jump to the slow path to
7999 // throw the exception. Otherwise proceed with the check.
8000 __ testl(temp, temp);
8001 __ j(kZero, type_check_slow_path->GetEntryLabel());
8002
8003 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
8004 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8005 break;
8006 }
8007
8008 case TypeCheckKind::kUnresolvedCheck:
8009 // We always go into the type check slow path for the unresolved check case.
8010 // We cannot directly call the CheckCast runtime entry point
8011 // without resorting to a type checking slow path here (i.e. by
8012 // calling InvokeRuntime directly), as it would require to
8013 // assign fixed registers for the inputs of this HInstanceOf
8014 // instruction (following the runtime calling convention), which
8015 // might be cluttered by the potential first read barrier
8016 // emission at the beginning of this method.
8017 __ jmp(type_check_slow_path->GetEntryLabel());
8018 break;
8019
8020 case TypeCheckKind::kInterfaceCheck: {
8021 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
8022 // We can not get false positives by doing this.
8023 // /* HeapReference<Class> */ temp = obj->klass_
8024 GenerateReferenceLoadTwoRegisters(instruction,
8025 temp_loc,
8026 obj_loc,
8027 class_offset,
8028 kWithoutReadBarrier);
8029
8030 // /* HeapReference<Class> */ temp = temp->iftable_
8031 GenerateReferenceLoadTwoRegisters(instruction,
8032 temp_loc,
8033 temp_loc,
8034 iftable_offset,
8035 kWithoutReadBarrier);
8036 // Iftable is never null.
8037 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
8038 // Maybe poison the `cls` for direct comparison with memory.
8039 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
8040 // Loop through the iftable and check if any class matches.
8041 NearLabel start_loop;
8042 __ Bind(&start_loop);
8043 // Need to subtract first to handle the empty array case.
8044 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
8045 __ j(kNegative, type_check_slow_path->GetEntryLabel());
8046 // Go to next interface if the classes do not match.
8047 __ cmpl(cls.AsRegister<Register>(),
8048 CodeGeneratorX86::ArrayAddress(temp,
8049 maybe_temp2_loc,
8050 TIMES_4,
8051 object_array_data_offset));
8052 __ j(kNotEqual, &start_loop);
8053 // If `cls` was poisoned above, unpoison it.
8054 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8055 break;
8056 }
8057
8058 case TypeCheckKind::kBitstringCheck: {
8059 // /* HeapReference<Class> */ temp = obj->klass_
8060 GenerateReferenceLoadTwoRegisters(instruction,
8061 temp_loc,
8062 obj_loc,
8063 class_offset,
8064 kWithoutReadBarrier);
8065
8066 GenerateBitstringTypeCheckCompare(instruction, temp);
8067 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8068 break;
8069 }
8070 }
8071 __ Bind(&done);
8072
8073 __ Bind(type_check_slow_path->GetExitLabel());
8074 }
8075
VisitMonitorOperation(HMonitorOperation * instruction)8076 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8077 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8078 instruction, LocationSummary::kCallOnMainOnly);
8079 InvokeRuntimeCallingConvention calling_convention;
8080 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
8081 }
8082
VisitMonitorOperation(HMonitorOperation * instruction)8083 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8084 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
8085 : kQuickUnlockObject,
8086 instruction,
8087 instruction->GetDexPc());
8088 if (instruction->IsEnter()) {
8089 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8090 } else {
8091 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8092 }
8093 }
8094
VisitX86AndNot(HX86AndNot * instruction)8095 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
8096 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8097 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
8098 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8099 locations->SetInAt(0, Location::RequiresRegister());
8100 locations->SetInAt(1, Location::RequiresRegister());
8101 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8102 }
8103
VisitX86AndNot(HX86AndNot * instruction)8104 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
8105 LocationSummary* locations = instruction->GetLocations();
8106 Location first = locations->InAt(0);
8107 Location second = locations->InAt(1);
8108 Location dest = locations->Out();
8109 if (instruction->GetResultType() == DataType::Type::kInt32) {
8110 __ andn(dest.AsRegister<Register>(),
8111 first.AsRegister<Register>(),
8112 second.AsRegister<Register>());
8113 } else {
8114 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8115 __ andn(dest.AsRegisterPairLow<Register>(),
8116 first.AsRegisterPairLow<Register>(),
8117 second.AsRegisterPairLow<Register>());
8118 __ andn(dest.AsRegisterPairHigh<Register>(),
8119 first.AsRegisterPairHigh<Register>(),
8120 second.AsRegisterPairHigh<Register>());
8121 }
8122 }
8123
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8124 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
8125 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8126 DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
8127 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8128 locations->SetInAt(0, Location::RequiresRegister());
8129 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8130 }
8131
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8132 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
8133 HX86MaskOrResetLeastSetBit* instruction) {
8134 LocationSummary* locations = instruction->GetLocations();
8135 Location src = locations->InAt(0);
8136 Location dest = locations->Out();
8137 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
8138 switch (instruction->GetOpKind()) {
8139 case HInstruction::kAnd:
8140 __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
8141 break;
8142 case HInstruction::kXor:
8143 __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
8144 break;
8145 default:
8146 LOG(FATAL) << "Unreachable";
8147 }
8148 }
8149
VisitAnd(HAnd * instruction)8150 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)8151 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)8152 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
8153
HandleBitwiseOperation(HBinaryOperation * instruction)8154 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8155 LocationSummary* locations =
8156 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8157 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8158 || instruction->GetResultType() == DataType::Type::kInt64);
8159 locations->SetInAt(0, Location::RequiresRegister());
8160 locations->SetInAt(1, Location::Any());
8161 locations->SetOut(Location::SameAsFirstInput());
8162 }
8163
VisitAnd(HAnd * instruction)8164 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
8165 HandleBitwiseOperation(instruction);
8166 }
8167
VisitOr(HOr * instruction)8168 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
8169 HandleBitwiseOperation(instruction);
8170 }
8171
VisitXor(HXor * instruction)8172 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
8173 HandleBitwiseOperation(instruction);
8174 }
8175
HandleBitwiseOperation(HBinaryOperation * instruction)8176 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8177 LocationSummary* locations = instruction->GetLocations();
8178 Location first = locations->InAt(0);
8179 Location second = locations->InAt(1);
8180 DCHECK(first.Equals(locations->Out()));
8181
8182 if (instruction->GetResultType() == DataType::Type::kInt32) {
8183 if (second.IsRegister()) {
8184 if (instruction->IsAnd()) {
8185 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
8186 } else if (instruction->IsOr()) {
8187 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
8188 } else {
8189 DCHECK(instruction->IsXor());
8190 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
8191 }
8192 } else if (second.IsConstant()) {
8193 if (instruction->IsAnd()) {
8194 __ andl(first.AsRegister<Register>(),
8195 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8196 } else if (instruction->IsOr()) {
8197 __ orl(first.AsRegister<Register>(),
8198 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8199 } else {
8200 DCHECK(instruction->IsXor());
8201 __ xorl(first.AsRegister<Register>(),
8202 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8203 }
8204 } else {
8205 if (instruction->IsAnd()) {
8206 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8207 } else if (instruction->IsOr()) {
8208 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8209 } else {
8210 DCHECK(instruction->IsXor());
8211 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8212 }
8213 }
8214 } else {
8215 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8216 if (second.IsRegisterPair()) {
8217 if (instruction->IsAnd()) {
8218 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8219 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8220 } else if (instruction->IsOr()) {
8221 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8222 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8223 } else {
8224 DCHECK(instruction->IsXor());
8225 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8226 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8227 }
8228 } else if (second.IsDoubleStackSlot()) {
8229 if (instruction->IsAnd()) {
8230 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8231 __ andl(first.AsRegisterPairHigh<Register>(),
8232 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8233 } else if (instruction->IsOr()) {
8234 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8235 __ orl(first.AsRegisterPairHigh<Register>(),
8236 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8237 } else {
8238 DCHECK(instruction->IsXor());
8239 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8240 __ xorl(first.AsRegisterPairHigh<Register>(),
8241 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8242 }
8243 } else {
8244 DCHECK(second.IsConstant()) << second;
8245 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
8246 int32_t low_value = Low32Bits(value);
8247 int32_t high_value = High32Bits(value);
8248 Immediate low(low_value);
8249 Immediate high(high_value);
8250 Register first_low = first.AsRegisterPairLow<Register>();
8251 Register first_high = first.AsRegisterPairHigh<Register>();
8252 if (instruction->IsAnd()) {
8253 if (low_value == 0) {
8254 __ xorl(first_low, first_low);
8255 } else if (low_value != -1) {
8256 __ andl(first_low, low);
8257 }
8258 if (high_value == 0) {
8259 __ xorl(first_high, first_high);
8260 } else if (high_value != -1) {
8261 __ andl(first_high, high);
8262 }
8263 } else if (instruction->IsOr()) {
8264 if (low_value != 0) {
8265 __ orl(first_low, low);
8266 }
8267 if (high_value != 0) {
8268 __ orl(first_high, high);
8269 }
8270 } else {
8271 DCHECK(instruction->IsXor());
8272 if (low_value != 0) {
8273 __ xorl(first_low, low);
8274 }
8275 if (high_value != 0) {
8276 __ xorl(first_high, high);
8277 }
8278 }
8279 }
8280 }
8281 }
8282
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8283 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
8284 HInstruction* instruction,
8285 Location out,
8286 uint32_t offset,
8287 Location maybe_temp,
8288 ReadBarrierOption read_barrier_option) {
8289 Register out_reg = out.AsRegister<Register>();
8290 if (read_barrier_option == kWithReadBarrier) {
8291 CHECK(gUseReadBarrier);
8292 if (kUseBakerReadBarrier) {
8293 // Load with fast path based Baker's read barrier.
8294 // /* HeapReference<Object> */ out = *(out + offset)
8295 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8296 instruction, out, out_reg, offset, /* needs_null_check= */ false);
8297 } else {
8298 // Load with slow path based read barrier.
8299 // Save the value of `out` into `maybe_temp` before overwriting it
8300 // in the following move operation, as we will need it for the
8301 // read barrier below.
8302 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8303 __ movl(maybe_temp.AsRegister<Register>(), out_reg);
8304 // /* HeapReference<Object> */ out = *(out + offset)
8305 __ movl(out_reg, Address(out_reg, offset));
8306 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
8307 }
8308 } else {
8309 // Plain load with no read barrier.
8310 // /* HeapReference<Object> */ out = *(out + offset)
8311 __ movl(out_reg, Address(out_reg, offset));
8312 __ MaybeUnpoisonHeapReference(out_reg);
8313 }
8314 }
8315
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)8316 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
8317 HInstruction* instruction,
8318 Location out,
8319 Location obj,
8320 uint32_t offset,
8321 ReadBarrierOption read_barrier_option) {
8322 Register out_reg = out.AsRegister<Register>();
8323 Register obj_reg = obj.AsRegister<Register>();
8324 if (read_barrier_option == kWithReadBarrier) {
8325 CHECK(gUseReadBarrier);
8326 if (kUseBakerReadBarrier) {
8327 // Load with fast path based Baker's read barrier.
8328 // /* HeapReference<Object> */ out = *(obj + offset)
8329 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8330 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
8331 } else {
8332 // Load with slow path based read barrier.
8333 // /* HeapReference<Object> */ out = *(obj + offset)
8334 __ movl(out_reg, Address(obj_reg, offset));
8335 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
8336 }
8337 } else {
8338 // Plain load with no read barrier.
8339 // /* HeapReference<Object> */ out = *(obj + offset)
8340 __ movl(out_reg, Address(obj_reg, offset));
8341 __ MaybeUnpoisonHeapReference(out_reg);
8342 }
8343 }
8344
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)8345 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
8346 HInstruction* instruction,
8347 Location root,
8348 const Address& address,
8349 Label* fixup_label,
8350 ReadBarrierOption read_barrier_option) {
8351 Register root_reg = root.AsRegister<Register>();
8352 if (read_barrier_option == kWithReadBarrier) {
8353 DCHECK(gUseReadBarrier);
8354 if (kUseBakerReadBarrier) {
8355 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
8356 // Baker's read barrier are used:
8357 //
8358 // root = obj.field;
8359 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
8360 // if (temp != null) {
8361 // root = temp(root)
8362 // }
8363
8364 // /* GcRoot<mirror::Object> */ root = *address
8365 __ movl(root_reg, address);
8366 if (fixup_label != nullptr) {
8367 __ Bind(fixup_label);
8368 }
8369 static_assert(
8370 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
8371 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
8372 "have different sizes.");
8373 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
8374 "art::mirror::CompressedReference<mirror::Object> and int32_t "
8375 "have different sizes.");
8376
8377 // Slow path marking the GC root `root`.
8378 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8379 instruction, root, /* unpoison_ref_before_marking= */ false);
8380 codegen_->AddSlowPath(slow_path);
8381
8382 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
8383 const int32_t entry_point_offset =
8384 Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
8385 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
8386 // The entrypoint is null when the GC is not marking.
8387 __ j(kNotEqual, slow_path->GetEntryLabel());
8388 __ Bind(slow_path->GetExitLabel());
8389 } else {
8390 // GC root loaded through a slow path for read barriers other
8391 // than Baker's.
8392 // /* GcRoot<mirror::Object>* */ root = address
8393 __ leal(root_reg, address);
8394 if (fixup_label != nullptr) {
8395 __ Bind(fixup_label);
8396 }
8397 // /* mirror::Object* */ root = root->Read()
8398 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8399 }
8400 } else {
8401 // Plain GC root load with no read barrier.
8402 // /* GcRoot<mirror::Object> */ root = *address
8403 __ movl(root_reg, address);
8404 if (fixup_label != nullptr) {
8405 __ Bind(fixup_label);
8406 }
8407 // Note that GC roots are not affected by heap poisoning, thus we
8408 // do not have to unpoison `root_reg` here.
8409 }
8410 }
8411
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)8412 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8413 Location ref,
8414 Register obj,
8415 uint32_t offset,
8416 bool needs_null_check) {
8417 DCHECK(gUseReadBarrier);
8418 DCHECK(kUseBakerReadBarrier);
8419
8420 // /* HeapReference<Object> */ ref = *(obj + offset)
8421 Address src(obj, offset);
8422 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8423 }
8424
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)8425 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8426 Location ref,
8427 Register obj,
8428 uint32_t data_offset,
8429 Location index,
8430 bool needs_null_check) {
8431 DCHECK(gUseReadBarrier);
8432 DCHECK(kUseBakerReadBarrier);
8433
8434 static_assert(
8435 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8436 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8437 // /* HeapReference<Object> */ ref =
8438 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
8439 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
8440 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8441 }
8442
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)8443 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8444 Location ref,
8445 Register obj,
8446 const Address& src,
8447 bool needs_null_check,
8448 bool always_update_field,
8449 Register* temp) {
8450 DCHECK(gUseReadBarrier);
8451 DCHECK(kUseBakerReadBarrier);
8452
8453 // In slow path based read barriers, the read barrier call is
8454 // inserted after the original load. However, in fast path based
8455 // Baker's read barriers, we need to perform the load of
8456 // mirror::Object::monitor_ *before* the original reference load.
8457 // This load-load ordering is required by the read barrier.
8458 // The fast path/slow path (for Baker's algorithm) should look like:
8459 //
8460 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8461 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
8462 // HeapReference<Object> ref = *src; // Original reference load.
8463 // bool is_gray = (rb_state == ReadBarrier::GrayState());
8464 // if (is_gray) {
8465 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
8466 // }
8467 //
8468 // Note: the original implementation in ReadBarrier::Barrier is
8469 // slightly more complex as:
8470 // - it implements the load-load fence using a data dependency on
8471 // the high-bits of rb_state, which are expected to be all zeroes
8472 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
8473 // which is a no-op thanks to the x86 memory model);
8474 // - it performs additional checks that we do not do here for
8475 // performance reasons.
8476
8477 Register ref_reg = ref.AsRegister<Register>();
8478 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8479
8480 // Given the numeric representation, it's enough to check the low bit of the rb_state.
8481 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8482 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8483 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8484 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8485 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8486
8487 // if (rb_state == ReadBarrier::GrayState())
8488 // ref = ReadBarrier::Mark(ref);
8489 // At this point, just do the "if" and make sure that flags are preserved until the branch.
8490 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8491 if (needs_null_check) {
8492 MaybeRecordImplicitNullCheck(instruction);
8493 }
8494
8495 // Load fence to prevent load-load reordering.
8496 // Note that this is a no-op, thanks to the x86 memory model.
8497 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8498
8499 // The actual reference load.
8500 // /* HeapReference<Object> */ ref = *src
8501 __ movl(ref_reg, src); // Flags are unaffected.
8502
8503 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8504 // Slow path marking the object `ref` when it is gray.
8505 SlowPathCode* slow_path;
8506 if (always_update_field) {
8507 DCHECK(temp != nullptr);
8508 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8509 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8510 } else {
8511 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8512 instruction, ref, /* unpoison_ref_before_marking= */ true);
8513 }
8514 AddSlowPath(slow_path);
8515
8516 // We have done the "if" of the gray bit check above, now branch based on the flags.
8517 __ j(kNotZero, slow_path->GetEntryLabel());
8518
8519 // Object* ref = ref_addr->AsMirrorPtr()
8520 __ MaybeUnpoisonHeapReference(ref_reg);
8521
8522 __ Bind(slow_path->GetExitLabel());
8523 }
8524
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8525 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8526 Location out,
8527 Location ref,
8528 Location obj,
8529 uint32_t offset,
8530 Location index) {
8531 DCHECK(gUseReadBarrier);
8532
8533 // Insert a slow path based read barrier *after* the reference load.
8534 //
8535 // If heap poisoning is enabled, the unpoisoning of the loaded
8536 // reference will be carried out by the runtime within the slow
8537 // path.
8538 //
8539 // Note that `ref` currently does not get unpoisoned (when heap
8540 // poisoning is enabled), which is alright as the `ref` argument is
8541 // not used by the artReadBarrierSlow entry point.
8542 //
8543 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8544 SlowPathCode* slow_path = new (GetScopedAllocator())
8545 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8546 AddSlowPath(slow_path);
8547
8548 __ jmp(slow_path->GetEntryLabel());
8549 __ Bind(slow_path->GetExitLabel());
8550 }
8551
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8552 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8553 Location out,
8554 Location ref,
8555 Location obj,
8556 uint32_t offset,
8557 Location index) {
8558 if (gUseReadBarrier) {
8559 // Baker's read barriers shall be handled by the fast path
8560 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8561 DCHECK(!kUseBakerReadBarrier);
8562 // If heap poisoning is enabled, unpoisoning will be taken care of
8563 // by the runtime within the slow path.
8564 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8565 } else if (kPoisonHeapReferences) {
8566 __ UnpoisonHeapReference(out.AsRegister<Register>());
8567 }
8568 }
8569
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8570 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8571 Location out,
8572 Location root) {
8573 DCHECK(gUseReadBarrier);
8574
8575 // Insert a slow path based read barrier *after* the GC root load.
8576 //
8577 // Note that GC roots are not affected by heap poisoning, so we do
8578 // not need to do anything special for this here.
8579 SlowPathCode* slow_path =
8580 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8581 AddSlowPath(slow_path);
8582
8583 __ jmp(slow_path->GetEntryLabel());
8584 __ Bind(slow_path->GetExitLabel());
8585 }
8586
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8587 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8588 // Nothing to do, this should be removed during prepare for register allocator.
8589 LOG(FATAL) << "Unreachable";
8590 }
8591
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8592 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8593 // Nothing to do, this should be removed during prepare for register allocator.
8594 LOG(FATAL) << "Unreachable";
8595 }
8596
8597 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8598 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8599 LocationSummary* locations =
8600 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8601 locations->SetInAt(0, Location::RequiresRegister());
8602 }
8603
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8604 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8605 int32_t lower_bound,
8606 uint32_t num_entries,
8607 HBasicBlock* switch_block,
8608 HBasicBlock* default_block) {
8609 // Figure out the correct compare values and jump conditions.
8610 // Handle the first compare/branch as a special case because it might
8611 // jump to the default case.
8612 DCHECK_GT(num_entries, 2u);
8613 Condition first_condition;
8614 uint32_t index;
8615 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8616 if (lower_bound != 0) {
8617 first_condition = kLess;
8618 __ cmpl(value_reg, Immediate(lower_bound));
8619 __ j(first_condition, codegen_->GetLabelOf(default_block));
8620 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8621
8622 index = 1;
8623 } else {
8624 // Handle all the compare/jumps below.
8625 first_condition = kBelow;
8626 index = 0;
8627 }
8628
8629 // Handle the rest of the compare/jumps.
8630 for (; index + 1 < num_entries; index += 2) {
8631 int32_t compare_to_value = lower_bound + index + 1;
8632 __ cmpl(value_reg, Immediate(compare_to_value));
8633 // Jump to successors[index] if value < case_value[index].
8634 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8635 // Jump to successors[index + 1] if value == case_value[index + 1].
8636 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8637 }
8638
8639 if (index != num_entries) {
8640 // There are an odd number of entries. Handle the last one.
8641 DCHECK_EQ(index + 1, num_entries);
8642 __ cmpl(value_reg, Immediate(lower_bound + index));
8643 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8644 }
8645
8646 // And the default for any other value.
8647 if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8648 __ jmp(codegen_->GetLabelOf(default_block));
8649 }
8650 }
8651
VisitPackedSwitch(HPackedSwitch * switch_instr)8652 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8653 int32_t lower_bound = switch_instr->GetStartValue();
8654 uint32_t num_entries = switch_instr->GetNumEntries();
8655 LocationSummary* locations = switch_instr->GetLocations();
8656 Register value_reg = locations->InAt(0).AsRegister<Register>();
8657
8658 GenPackedSwitchWithCompares(value_reg,
8659 lower_bound,
8660 num_entries,
8661 switch_instr->GetBlock(),
8662 switch_instr->GetDefaultBlock());
8663 }
8664
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8665 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8666 LocationSummary* locations =
8667 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8668 locations->SetInAt(0, Location::RequiresRegister());
8669
8670 // Constant area pointer.
8671 locations->SetInAt(1, Location::RequiresRegister());
8672
8673 // And the temporary we need.
8674 locations->AddTemp(Location::RequiresRegister());
8675 }
8676
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8677 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8678 int32_t lower_bound = switch_instr->GetStartValue();
8679 uint32_t num_entries = switch_instr->GetNumEntries();
8680 LocationSummary* locations = switch_instr->GetLocations();
8681 Register value_reg = locations->InAt(0).AsRegister<Register>();
8682 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8683
8684 if (num_entries <= kPackedSwitchJumpTableThreshold) {
8685 GenPackedSwitchWithCompares(value_reg,
8686 lower_bound,
8687 num_entries,
8688 switch_instr->GetBlock(),
8689 default_block);
8690 return;
8691 }
8692
8693 // Optimizing has a jump area.
8694 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
8695 Register constant_area = locations->InAt(1).AsRegister<Register>();
8696
8697 // Remove the bias, if needed.
8698 if (lower_bound != 0) {
8699 __ leal(temp_reg, Address(value_reg, -lower_bound));
8700 value_reg = temp_reg;
8701 }
8702
8703 // Is the value in range?
8704 DCHECK_GE(num_entries, 1u);
8705 __ cmpl(value_reg, Immediate(num_entries - 1));
8706 __ j(kAbove, codegen_->GetLabelOf(default_block));
8707
8708 // We are in the range of the table.
8709 // Load (target-constant_area) from the jump table, indexing by the value.
8710 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
8711
8712 // Compute the actual target address by adding in constant_area.
8713 __ addl(temp_reg, constant_area);
8714
8715 // And jump.
8716 __ jmp(temp_reg);
8717 }
8718
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8719 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
8720 HX86ComputeBaseMethodAddress* insn) {
8721 LocationSummary* locations =
8722 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8723 locations->SetOut(Location::RequiresRegister());
8724 }
8725
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8726 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
8727 HX86ComputeBaseMethodAddress* insn) {
8728 LocationSummary* locations = insn->GetLocations();
8729 Register reg = locations->Out().AsRegister<Register>();
8730
8731 // Generate call to next instruction.
8732 Label next_instruction;
8733 __ call(&next_instruction);
8734 __ Bind(&next_instruction);
8735
8736 // Remember this offset for later use with constant area.
8737 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
8738
8739 // Grab the return address off the stack.
8740 __ popl(reg);
8741 }
8742
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8743 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
8744 HX86LoadFromConstantTable* insn) {
8745 LocationSummary* locations =
8746 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8747
8748 locations->SetInAt(0, Location::RequiresRegister());
8749 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
8750
8751 // If we don't need to be materialized, we only need the inputs to be set.
8752 if (insn->IsEmittedAtUseSite()) {
8753 return;
8754 }
8755
8756 switch (insn->GetType()) {
8757 case DataType::Type::kFloat32:
8758 case DataType::Type::kFloat64:
8759 locations->SetOut(Location::RequiresFpuRegister());
8760 break;
8761
8762 case DataType::Type::kInt32:
8763 locations->SetOut(Location::RequiresRegister());
8764 break;
8765
8766 default:
8767 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8768 }
8769 }
8770
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8771 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
8772 if (insn->IsEmittedAtUseSite()) {
8773 return;
8774 }
8775
8776 LocationSummary* locations = insn->GetLocations();
8777 Location out = locations->Out();
8778 Register const_area = locations->InAt(0).AsRegister<Register>();
8779 HConstant *value = insn->GetConstant();
8780
8781 switch (insn->GetType()) {
8782 case DataType::Type::kFloat32:
8783 __ movss(out.AsFpuRegister<XmmRegister>(),
8784 codegen_->LiteralFloatAddress(
8785 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8786 break;
8787
8788 case DataType::Type::kFloat64:
8789 __ movsd(out.AsFpuRegister<XmmRegister>(),
8790 codegen_->LiteralDoubleAddress(
8791 value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8792 break;
8793
8794 case DataType::Type::kInt32:
8795 __ movl(out.AsRegister<Register>(),
8796 codegen_->LiteralInt32Address(
8797 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8798 break;
8799
8800 default:
8801 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8802 }
8803 }
8804
8805 /**
8806 * Class to handle late fixup of offsets into constant area.
8807 */
8808 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8809 public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)8810 RIPFixup(CodeGeneratorX86& codegen,
8811 HX86ComputeBaseMethodAddress* base_method_address,
8812 size_t offset)
8813 : codegen_(&codegen),
8814 base_method_address_(base_method_address),
8815 offset_into_constant_area_(offset) {}
8816
8817 protected:
SetOffset(size_t offset)8818 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8819
8820 CodeGeneratorX86* codegen_;
8821 HX86ComputeBaseMethodAddress* base_method_address_;
8822
8823 private:
Process(const MemoryRegion & region,int pos)8824 void Process(const MemoryRegion& region, int pos) override {
8825 // Patch the correct offset for the instruction. The place to patch is the
8826 // last 4 bytes of the instruction.
8827 // The value to patch is the distance from the offset in the constant area
8828 // from the address computed by the HX86ComputeBaseMethodAddress instruction.
8829 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8830 int32_t relative_position =
8831 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
8832
8833 // Patch in the right value.
8834 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8835 }
8836
8837 // Location in constant area that the fixup refers to.
8838 int32_t offset_into_constant_area_;
8839 };
8840
8841 /**
8842 * Class to handle late fixup of offsets to a jump table that will be created in the
8843 * constant area.
8844 */
8845 class JumpTableRIPFixup : public RIPFixup {
8846 public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)8847 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
8848 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
8849 switch_instr_(switch_instr) {}
8850
CreateJumpTable()8851 void CreateJumpTable() {
8852 X86Assembler* assembler = codegen_->GetAssembler();
8853
8854 // Ensure that the reference to the jump table has the correct offset.
8855 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8856 SetOffset(offset_in_constant_table);
8857
8858 // The label values in the jump table are computed relative to the
8859 // instruction addressing the constant area.
8860 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
8861
8862 // Populate the jump table with the correct values for the jump table.
8863 int32_t num_entries = switch_instr_->GetNumEntries();
8864 HBasicBlock* block = switch_instr_->GetBlock();
8865 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8866 // The value that we want is the target offset - the position of the table.
8867 for (int32_t i = 0; i < num_entries; i++) {
8868 HBasicBlock* b = successors[i];
8869 Label* l = codegen_->GetLabelOf(b);
8870 DCHECK(l->IsBound());
8871 int32_t offset_to_block = l->Position() - relative_offset;
8872 assembler->AppendInt32(offset_to_block);
8873 }
8874 }
8875
8876 private:
8877 const HX86PackedSwitch* switch_instr_;
8878 };
8879
Finalize(CodeAllocator * allocator)8880 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
8881 // Generate the constant area if needed.
8882 X86Assembler* assembler = GetAssembler();
8883
8884 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8885 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
8886 // byte values.
8887 assembler->Align(4, 0);
8888 constant_area_start_ = assembler->CodeSize();
8889
8890 // Populate any jump tables.
8891 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8892 jump_table->CreateJumpTable();
8893 }
8894
8895 // And now add the constant area to the generated code.
8896 assembler->AddConstantArea();
8897 }
8898
8899 // And finish up.
8900 CodeGenerator::Finalize(allocator);
8901 }
8902
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)8903 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
8904 HX86ComputeBaseMethodAddress* method_base,
8905 Register reg) {
8906 AssemblerFixup* fixup =
8907 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
8908 return Address(reg, kPlaceholder32BitOffset, fixup);
8909 }
8910
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)8911 Address CodeGeneratorX86::LiteralFloatAddress(float v,
8912 HX86ComputeBaseMethodAddress* method_base,
8913 Register reg) {
8914 AssemblerFixup* fixup =
8915 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
8916 return Address(reg, kPlaceholder32BitOffset, fixup);
8917 }
8918
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8919 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
8920 HX86ComputeBaseMethodAddress* method_base,
8921 Register reg) {
8922 AssemblerFixup* fixup =
8923 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
8924 return Address(reg, kPlaceholder32BitOffset, fixup);
8925 }
8926
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8927 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
8928 HX86ComputeBaseMethodAddress* method_base,
8929 Register reg) {
8930 AssemblerFixup* fixup =
8931 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
8932 return Address(reg, kPlaceholder32BitOffset, fixup);
8933 }
8934
Load32BitValue(Register dest,int32_t value)8935 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
8936 if (value == 0) {
8937 __ xorl(dest, dest);
8938 } else {
8939 __ movl(dest, Immediate(value));
8940 }
8941 }
8942
Compare32BitValue(Register dest,int32_t value)8943 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
8944 if (value == 0) {
8945 __ testl(dest, dest);
8946 } else {
8947 __ cmpl(dest, Immediate(value));
8948 }
8949 }
8950
GenerateIntCompare(Location lhs,Location rhs)8951 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
8952 Register lhs_reg = lhs.AsRegister<Register>();
8953 GenerateIntCompare(lhs_reg, rhs);
8954 }
8955
GenerateIntCompare(Register lhs,Location rhs)8956 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
8957 if (rhs.IsConstant()) {
8958 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8959 Compare32BitValue(lhs, value);
8960 } else if (rhs.IsStackSlot()) {
8961 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
8962 } else {
8963 __ cmpl(lhs, rhs.AsRegister<Register>());
8964 }
8965 }
8966
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)8967 Address CodeGeneratorX86::ArrayAddress(Register obj,
8968 Location index,
8969 ScaleFactor scale,
8970 uint32_t data_offset) {
8971 return index.IsConstant() ?
8972 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
8973 Address(obj, index.AsRegister<Register>(), scale, data_offset);
8974 }
8975
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)8976 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
8977 Register reg,
8978 Register value) {
8979 // Create a fixup to be used to create and address the jump table.
8980 JumpTableRIPFixup* table_fixup =
8981 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8982
8983 // We have to populate the jump tables.
8984 fixups_to_jump_tables_.push_back(table_fixup);
8985
8986 // We want a scaled address, as we are extracting the correct offset from the table.
8987 return Address(reg, value, TIMES_4, kPlaceholder32BitOffset, table_fixup);
8988 }
8989
8990 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)8991 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
8992 if (!target.IsValid()) {
8993 DCHECK_EQ(type, DataType::Type::kVoid);
8994 return;
8995 }
8996
8997 DCHECK_NE(type, DataType::Type::kVoid);
8998
8999 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
9000 if (target.Equals(return_loc)) {
9001 return;
9002 }
9003
9004 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
9005 // with the else branch.
9006 if (type == DataType::Type::kInt64) {
9007 HParallelMove parallel_move(GetGraph()->GetAllocator());
9008 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
9009 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
9010 GetMoveResolver()->EmitNativeCode(¶llel_move);
9011 } else {
9012 // Let the parallel move resolver take care of all of this.
9013 HParallelMove parallel_move(GetGraph()->GetAllocator());
9014 parallel_move.AddMove(return_loc, target, type, nullptr);
9015 GetMoveResolver()->EmitNativeCode(¶llel_move);
9016 }
9017 }
9018
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const9019 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
9020 const uint8_t* roots_data,
9021 const PatchInfo<Label>& info,
9022 uint64_t index_in_table) const {
9023 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
9024 uintptr_t address =
9025 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
9026 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
9027 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
9028 dchecked_integral_cast<uint32_t>(address);
9029 }
9030
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)9031 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
9032 for (const PatchInfo<Label>& info : jit_string_patches_) {
9033 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
9034 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
9035 PatchJitRootUse(code, roots_data, info, index_in_table);
9036 }
9037
9038 for (const PatchInfo<Label>& info : jit_class_patches_) {
9039 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
9040 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
9041 PatchJitRootUse(code, roots_data, info, index_in_table);
9042 }
9043 }
9044
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)9045 void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
9046 ATTRIBUTE_UNUSED) {
9047 LOG(FATAL) << "Unreachable";
9048 }
9049
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)9050 void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
9051 ATTRIBUTE_UNUSED) {
9052 LOG(FATAL) << "Unreachable";
9053 }
9054
CpuHasAvxFeatureFlag()9055 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
9056 return codegen_->GetInstructionSetFeatures().HasAVX();
9057 }
CpuHasAvx2FeatureFlag()9058 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
9059 return codegen_->GetInstructionSetFeatures().HasAVX2();
9060 }
CpuHasAvxFeatureFlag()9061 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
9062 return codegen_->GetInstructionSetFeatures().HasAVX();
9063 }
CpuHasAvx2FeatureFlag()9064 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
9065 return codegen_->GetInstructionSetFeatures().HasAVX2();
9066 }
9067
9068 #undef __
9069
9070 } // namespace x86
9071 } // namespace art
9072