1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "art_method-inl.h"
20 #include "class_table.h"
21 #include "code_generator_utils.h"
22 #include "compiled_method.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "entrypoints/quick/quick_entrypoints_enum.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "intrinsics.h"
29 #include "intrinsics_x86.h"
30 #include "jit/profiling_info.h"
31 #include "linker/linker_patch.h"
32 #include "lock_word.h"
33 #include "mirror/array-inl.h"
34 #include "mirror/class-inl.h"
35 #include "scoped_thread_state_change-inl.h"
36 #include "thread.h"
37 #include "utils/assembler.h"
38 #include "utils/stack_checks.h"
39 #include "utils/x86/assembler_x86.h"
40 #include "utils/x86/managed_register_x86.h"
41
42 namespace art {
43
44 template<class MirrorType>
45 class GcRoot;
46
47 namespace x86 {
48
49 static constexpr int kCurrentMethodStackOffset = 0;
50 static constexpr Register kMethodRegisterArgument = EAX;
51 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
52
53 static constexpr int kC2ConditionMask = 0x400;
54
55 static constexpr int kFakeReturnRegister = Register(8);
56
57 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
58 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
59
OneRegInReferenceOutSaveEverythingCallerSaves()60 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
61 InvokeRuntimeCallingConvention calling_convention;
62 RegisterSet caller_saves = RegisterSet::Empty();
63 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
64 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
65 // that the the kPrimNot result register is the same as the first argument register.
66 return caller_saves;
67 }
68
69 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
70 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
71 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
72
73 class NullCheckSlowPathX86 : public SlowPathCode {
74 public:
NullCheckSlowPathX86(HNullCheck * instruction)75 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
76
EmitNativeCode(CodeGenerator * codegen)77 void EmitNativeCode(CodeGenerator* codegen) override {
78 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
79 __ Bind(GetEntryLabel());
80 if (instruction_->CanThrowIntoCatchBlock()) {
81 // Live registers will be restored in the catch block if caught.
82 SaveLiveRegisters(codegen, instruction_->GetLocations());
83 }
84 x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
85 instruction_,
86 instruction_->GetDexPc(),
87 this);
88 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
89 }
90
IsFatal() const91 bool IsFatal() const override { return true; }
92
GetDescription() const93 const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
94
95 private:
96 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
97 };
98
99 class DivZeroCheckSlowPathX86 : public SlowPathCode {
100 public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)101 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
102
EmitNativeCode(CodeGenerator * codegen)103 void EmitNativeCode(CodeGenerator* codegen) override {
104 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
105 __ Bind(GetEntryLabel());
106 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
107 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
108 }
109
IsFatal() const110 bool IsFatal() const override { return true; }
111
GetDescription() const112 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
113
114 private:
115 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
116 };
117
118 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
119 public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)120 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
121 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
122
EmitNativeCode(CodeGenerator * codegen)123 void EmitNativeCode(CodeGenerator* codegen) override {
124 __ Bind(GetEntryLabel());
125 if (is_div_) {
126 __ negl(reg_);
127 } else {
128 __ movl(reg_, Immediate(0));
129 }
130 __ jmp(GetExitLabel());
131 }
132
GetDescription() const133 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
134
135 private:
136 Register reg_;
137 bool is_div_;
138 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
139 };
140
141 class BoundsCheckSlowPathX86 : public SlowPathCode {
142 public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)143 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
144
EmitNativeCode(CodeGenerator * codegen)145 void EmitNativeCode(CodeGenerator* codegen) override {
146 LocationSummary* locations = instruction_->GetLocations();
147 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
148 __ Bind(GetEntryLabel());
149 // We're moving two locations to locations that could overlap, so we need a parallel
150 // move resolver.
151 if (instruction_->CanThrowIntoCatchBlock()) {
152 // Live registers will be restored in the catch block if caught.
153 SaveLiveRegisters(codegen, instruction_->GetLocations());
154 }
155
156 // Are we using an array length from memory?
157 HInstruction* array_length = instruction_->InputAt(1);
158 Location length_loc = locations->InAt(1);
159 InvokeRuntimeCallingConvention calling_convention;
160 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
161 // Load the array length into our temporary.
162 HArrayLength* length = array_length->AsArrayLength();
163 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
164 Location array_loc = array_length->GetLocations()->InAt(0);
165 Address array_len(array_loc.AsRegister<Register>(), len_offset);
166 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
167 // Check for conflicts with index.
168 if (length_loc.Equals(locations->InAt(0))) {
169 // We know we aren't using parameter 2.
170 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
171 }
172 __ movl(length_loc.AsRegister<Register>(), array_len);
173 if (mirror::kUseStringCompression && length->IsStringLength()) {
174 __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
175 }
176 }
177 x86_codegen->EmitParallelMoves(
178 locations->InAt(0),
179 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
180 DataType::Type::kInt32,
181 length_loc,
182 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
183 DataType::Type::kInt32);
184 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
185 ? kQuickThrowStringBounds
186 : kQuickThrowArrayBounds;
187 x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
188 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
189 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
190 }
191
IsFatal() const192 bool IsFatal() const override { return true; }
193
GetDescription() const194 const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
195
196 private:
197 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
198 };
199
200 class SuspendCheckSlowPathX86 : public SlowPathCode {
201 public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)202 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
203 : SlowPathCode(instruction), successor_(successor) {}
204
EmitNativeCode(CodeGenerator * codegen)205 void EmitNativeCode(CodeGenerator* codegen) override {
206 LocationSummary* locations = instruction_->GetLocations();
207 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
208 __ Bind(GetEntryLabel());
209 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
210 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
211 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
212 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
213 if (successor_ == nullptr) {
214 __ jmp(GetReturnLabel());
215 } else {
216 __ jmp(x86_codegen->GetLabelOf(successor_));
217 }
218 }
219
GetReturnLabel()220 Label* GetReturnLabel() {
221 DCHECK(successor_ == nullptr);
222 return &return_label_;
223 }
224
GetSuccessor() const225 HBasicBlock* GetSuccessor() const {
226 return successor_;
227 }
228
GetDescription() const229 const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
230
231 private:
232 HBasicBlock* const successor_;
233 Label return_label_;
234
235 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
236 };
237
238 class LoadStringSlowPathX86 : public SlowPathCode {
239 public:
LoadStringSlowPathX86(HLoadString * instruction)240 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
241
EmitNativeCode(CodeGenerator * codegen)242 void EmitNativeCode(CodeGenerator* codegen) override {
243 LocationSummary* locations = instruction_->GetLocations();
244 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
245
246 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
247 __ Bind(GetEntryLabel());
248 SaveLiveRegisters(codegen, locations);
249
250 InvokeRuntimeCallingConvention calling_convention;
251 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
252 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
253 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
254 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
255 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
256 RestoreLiveRegisters(codegen, locations);
257
258 __ jmp(GetExitLabel());
259 }
260
GetDescription() const261 const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
262
263 private:
264 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
265 };
266
267 class LoadClassSlowPathX86 : public SlowPathCode {
268 public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)269 LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
270 : SlowPathCode(at), cls_(cls) {
271 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
272 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
273 }
274
EmitNativeCode(CodeGenerator * codegen)275 void EmitNativeCode(CodeGenerator* codegen) override {
276 LocationSummary* locations = instruction_->GetLocations();
277 Location out = locations->Out();
278 const uint32_t dex_pc = instruction_->GetDexPc();
279 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
280 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
281
282 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
283 __ Bind(GetEntryLabel());
284 SaveLiveRegisters(codegen, locations);
285
286 InvokeRuntimeCallingConvention calling_convention;
287 if (must_resolve_type) {
288 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()));
289 dex::TypeIndex type_index = cls_->GetTypeIndex();
290 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
291 x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
292 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
293 // If we also must_do_clinit, the resolved type is now in the correct register.
294 } else {
295 DCHECK(must_do_clinit);
296 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
297 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
298 }
299 if (must_do_clinit) {
300 x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
301 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
302 }
303
304 // Move the class to the desired location.
305 if (out.IsValid()) {
306 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
307 x86_codegen->Move32(out, Location::RegisterLocation(EAX));
308 }
309 RestoreLiveRegisters(codegen, locations);
310 __ jmp(GetExitLabel());
311 }
312
GetDescription() const313 const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
314
315 private:
316 // The class this slow path will load.
317 HLoadClass* const cls_;
318
319 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
320 };
321
322 class TypeCheckSlowPathX86 : public SlowPathCode {
323 public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)324 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
325 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
326
EmitNativeCode(CodeGenerator * codegen)327 void EmitNativeCode(CodeGenerator* codegen) override {
328 LocationSummary* locations = instruction_->GetLocations();
329 DCHECK(instruction_->IsCheckCast()
330 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
331
332 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
333 __ Bind(GetEntryLabel());
334
335 if (kPoisonHeapReferences &&
336 instruction_->IsCheckCast() &&
337 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
338 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
339 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
340 }
341
342 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
343 SaveLiveRegisters(codegen, locations);
344 }
345
346 // We're moving two locations to locations that could overlap, so we need a parallel
347 // move resolver.
348 InvokeRuntimeCallingConvention calling_convention;
349 x86_codegen->EmitParallelMoves(locations->InAt(0),
350 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
351 DataType::Type::kReference,
352 locations->InAt(1),
353 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
354 DataType::Type::kReference);
355 if (instruction_->IsInstanceOf()) {
356 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
357 instruction_,
358 instruction_->GetDexPc(),
359 this);
360 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
361 } else {
362 DCHECK(instruction_->IsCheckCast());
363 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
364 instruction_,
365 instruction_->GetDexPc(),
366 this);
367 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
368 }
369
370 if (!is_fatal_) {
371 if (instruction_->IsInstanceOf()) {
372 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
373 }
374 RestoreLiveRegisters(codegen, locations);
375
376 __ jmp(GetExitLabel());
377 }
378 }
379
GetDescription() const380 const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const381 bool IsFatal() const override { return is_fatal_; }
382
383 private:
384 const bool is_fatal_;
385
386 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
387 };
388
389 class DeoptimizationSlowPathX86 : public SlowPathCode {
390 public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)391 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
392 : SlowPathCode(instruction) {}
393
EmitNativeCode(CodeGenerator * codegen)394 void EmitNativeCode(CodeGenerator* codegen) override {
395 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
396 __ Bind(GetEntryLabel());
397 LocationSummary* locations = instruction_->GetLocations();
398 SaveLiveRegisters(codegen, locations);
399 InvokeRuntimeCallingConvention calling_convention;
400 x86_codegen->Load32BitValue(
401 calling_convention.GetRegisterAt(0),
402 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
403 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
404 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
405 }
406
GetDescription() const407 const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
408
409 private:
410 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
411 };
412
413 class ArraySetSlowPathX86 : public SlowPathCode {
414 public:
ArraySetSlowPathX86(HInstruction * instruction)415 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
416
EmitNativeCode(CodeGenerator * codegen)417 void EmitNativeCode(CodeGenerator* codegen) override {
418 LocationSummary* locations = instruction_->GetLocations();
419 __ Bind(GetEntryLabel());
420 SaveLiveRegisters(codegen, locations);
421
422 InvokeRuntimeCallingConvention calling_convention;
423 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
424 parallel_move.AddMove(
425 locations->InAt(0),
426 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
427 DataType::Type::kReference,
428 nullptr);
429 parallel_move.AddMove(
430 locations->InAt(1),
431 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
432 DataType::Type::kInt32,
433 nullptr);
434 parallel_move.AddMove(
435 locations->InAt(2),
436 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
437 DataType::Type::kReference,
438 nullptr);
439 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
440
441 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
442 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
443 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
444 RestoreLiveRegisters(codegen, locations);
445 __ jmp(GetExitLabel());
446 }
447
GetDescription() const448 const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
449
450 private:
451 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
452 };
453
454 // Slow path marking an object reference `ref` during a read
455 // barrier. The field `obj.field` in the object `obj` holding this
456 // reference does not get updated by this slow path after marking (see
457 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
458 //
459 // This means that after the execution of this slow path, `ref` will
460 // always be up-to-date, but `obj.field` may not; i.e., after the
461 // flip, `ref` will be a to-space reference, but `obj.field` will
462 // probably still be a from-space reference (unless it gets updated by
463 // another thread, or if another thread installed another object
464 // reference (different from `ref`) in `obj.field`).
465 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
466 public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)467 ReadBarrierMarkSlowPathX86(HInstruction* instruction,
468 Location ref,
469 bool unpoison_ref_before_marking)
470 : SlowPathCode(instruction),
471 ref_(ref),
472 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
473 DCHECK(kEmitCompilerReadBarrier);
474 }
475
GetDescription() const476 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
477
EmitNativeCode(CodeGenerator * codegen)478 void EmitNativeCode(CodeGenerator* codegen) override {
479 LocationSummary* locations = instruction_->GetLocations();
480 Register ref_reg = ref_.AsRegister<Register>();
481 DCHECK(locations->CanCall());
482 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
483 DCHECK(instruction_->IsInstanceFieldGet() ||
484 instruction_->IsStaticFieldGet() ||
485 instruction_->IsArrayGet() ||
486 instruction_->IsArraySet() ||
487 instruction_->IsLoadClass() ||
488 instruction_->IsLoadString() ||
489 instruction_->IsInstanceOf() ||
490 instruction_->IsCheckCast() ||
491 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
492 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
493 << "Unexpected instruction in read barrier marking slow path: "
494 << instruction_->DebugName();
495
496 __ Bind(GetEntryLabel());
497 if (unpoison_ref_before_marking_) {
498 // Object* ref = ref_addr->AsMirrorPtr()
499 __ MaybeUnpoisonHeapReference(ref_reg);
500 }
501 // No need to save live registers; it's taken care of by the
502 // entrypoint. Also, there is no need to update the stack mask,
503 // as this runtime call will not trigger a garbage collection.
504 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
505 DCHECK_NE(ref_reg, ESP);
506 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
507 // "Compact" slow path, saving two moves.
508 //
509 // Instead of using the standard runtime calling convention (input
510 // and output in EAX):
511 //
512 // EAX <- ref
513 // EAX <- ReadBarrierMark(EAX)
514 // ref <- EAX
515 //
516 // we just use rX (the register containing `ref`) as input and output
517 // of a dedicated entrypoint:
518 //
519 // rX <- ReadBarrierMarkRegX(rX)
520 //
521 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
522 // This runtime call does not require a stack map.
523 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
524 __ jmp(GetExitLabel());
525 }
526
527 private:
528 // The location (register) of the marked object reference.
529 const Location ref_;
530 // Should the reference in `ref_` be unpoisoned prior to marking it?
531 const bool unpoison_ref_before_marking_;
532
533 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
534 };
535
536 // Slow path marking an object reference `ref` during a read barrier,
537 // and if needed, atomically updating the field `obj.field` in the
538 // object `obj` holding this reference after marking (contrary to
539 // ReadBarrierMarkSlowPathX86 above, which never tries to update
540 // `obj.field`).
541 //
542 // This means that after the execution of this slow path, both `ref`
543 // and `obj.field` will be up-to-date; i.e., after the flip, both will
544 // hold the same to-space reference (unless another thread installed
545 // another object reference (different from `ref`) in `obj.field`).
546 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
547 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)548 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
549 Location ref,
550 Register obj,
551 const Address& field_addr,
552 bool unpoison_ref_before_marking,
553 Register temp)
554 : SlowPathCode(instruction),
555 ref_(ref),
556 obj_(obj),
557 field_addr_(field_addr),
558 unpoison_ref_before_marking_(unpoison_ref_before_marking),
559 temp_(temp) {
560 DCHECK(kEmitCompilerReadBarrier);
561 }
562
GetDescription() const563 const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
564
EmitNativeCode(CodeGenerator * codegen)565 void EmitNativeCode(CodeGenerator* codegen) override {
566 LocationSummary* locations = instruction_->GetLocations();
567 Register ref_reg = ref_.AsRegister<Register>();
568 DCHECK(locations->CanCall());
569 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
570 // This slow path is only used by the UnsafeCASObject intrinsic.
571 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
572 << "Unexpected instruction in read barrier marking and field updating slow path: "
573 << instruction_->DebugName();
574 DCHECK(instruction_->GetLocations()->Intrinsified());
575 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
576
577 __ Bind(GetEntryLabel());
578 if (unpoison_ref_before_marking_) {
579 // Object* ref = ref_addr->AsMirrorPtr()
580 __ MaybeUnpoisonHeapReference(ref_reg);
581 }
582
583 // Save the old (unpoisoned) reference.
584 __ movl(temp_, ref_reg);
585
586 // No need to save live registers; it's taken care of by the
587 // entrypoint. Also, there is no need to update the stack mask,
588 // as this runtime call will not trigger a garbage collection.
589 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
590 DCHECK_NE(ref_reg, ESP);
591 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
592 // "Compact" slow path, saving two moves.
593 //
594 // Instead of using the standard runtime calling convention (input
595 // and output in EAX):
596 //
597 // EAX <- ref
598 // EAX <- ReadBarrierMark(EAX)
599 // ref <- EAX
600 //
601 // we just use rX (the register containing `ref`) as input and output
602 // of a dedicated entrypoint:
603 //
604 // rX <- ReadBarrierMarkRegX(rX)
605 //
606 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
607 // This runtime call does not require a stack map.
608 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
609
610 // If the new reference is different from the old reference,
611 // update the field in the holder (`*field_addr`).
612 //
613 // Note that this field could also hold a different object, if
614 // another thread had concurrently changed it. In that case, the
615 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
616 // operation below would abort the CAS, leaving the field as-is.
617 NearLabel done;
618 __ cmpl(temp_, ref_reg);
619 __ j(kEqual, &done);
620
621 // Update the the holder's field atomically. This may fail if
622 // mutator updates before us, but it's OK. This is achieved
623 // using a strong compare-and-set (CAS) operation with relaxed
624 // memory synchronization ordering, where the expected value is
625 // the old reference and the desired value is the new reference.
626 // This operation is implemented with a 32-bit LOCK CMPXLCHG
627 // instruction, which requires the expected value (the old
628 // reference) to be in EAX. Save EAX beforehand, and move the
629 // expected value (stored in `temp_`) into EAX.
630 __ pushl(EAX);
631 __ movl(EAX, temp_);
632
633 // Convenience aliases.
634 Register base = obj_;
635 Register expected = EAX;
636 Register value = ref_reg;
637
638 bool base_equals_value = (base == value);
639 if (kPoisonHeapReferences) {
640 if (base_equals_value) {
641 // If `base` and `value` are the same register location, move
642 // `value` to a temporary register. This way, poisoning
643 // `value` won't invalidate `base`.
644 value = temp_;
645 __ movl(value, base);
646 }
647
648 // Check that the register allocator did not assign the location
649 // of `expected` (EAX) to `value` nor to `base`, so that heap
650 // poisoning (when enabled) works as intended below.
651 // - If `value` were equal to `expected`, both references would
652 // be poisoned twice, meaning they would not be poisoned at
653 // all, as heap poisoning uses address negation.
654 // - If `base` were equal to `expected`, poisoning `expected`
655 // would invalidate `base`.
656 DCHECK_NE(value, expected);
657 DCHECK_NE(base, expected);
658
659 __ PoisonHeapReference(expected);
660 __ PoisonHeapReference(value);
661 }
662
663 __ LockCmpxchgl(field_addr_, value);
664
665 // If heap poisoning is enabled, we need to unpoison the values
666 // that were poisoned earlier.
667 if (kPoisonHeapReferences) {
668 if (base_equals_value) {
669 // `value` has been moved to a temporary register, no need
670 // to unpoison it.
671 } else {
672 __ UnpoisonHeapReference(value);
673 }
674 // No need to unpoison `expected` (EAX), as it is be overwritten below.
675 }
676
677 // Restore EAX.
678 __ popl(EAX);
679
680 __ Bind(&done);
681 __ jmp(GetExitLabel());
682 }
683
684 private:
685 // The location (register) of the marked object reference.
686 const Location ref_;
687 // The register containing the object holding the marked object reference field.
688 const Register obj_;
689 // The address of the marked reference field. The base of this address must be `obj_`.
690 const Address field_addr_;
691
692 // Should the reference in `ref_` be unpoisoned prior to marking it?
693 const bool unpoison_ref_before_marking_;
694
695 const Register temp_;
696
697 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
698 };
699
700 // Slow path generating a read barrier for a heap reference.
701 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
702 public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)703 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
704 Location out,
705 Location ref,
706 Location obj,
707 uint32_t offset,
708 Location index)
709 : SlowPathCode(instruction),
710 out_(out),
711 ref_(ref),
712 obj_(obj),
713 offset_(offset),
714 index_(index) {
715 DCHECK(kEmitCompilerReadBarrier);
716 // If `obj` is equal to `out` or `ref`, it means the initial object
717 // has been overwritten by (or after) the heap object reference load
718 // to be instrumented, e.g.:
719 //
720 // __ movl(out, Address(out, offset));
721 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
722 //
723 // In that case, we have lost the information about the original
724 // object, and the emitted read barrier cannot work properly.
725 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
726 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
727 }
728
EmitNativeCode(CodeGenerator * codegen)729 void EmitNativeCode(CodeGenerator* codegen) override {
730 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
731 LocationSummary* locations = instruction_->GetLocations();
732 Register reg_out = out_.AsRegister<Register>();
733 DCHECK(locations->CanCall());
734 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
735 DCHECK(instruction_->IsInstanceFieldGet() ||
736 instruction_->IsStaticFieldGet() ||
737 instruction_->IsArrayGet() ||
738 instruction_->IsInstanceOf() ||
739 instruction_->IsCheckCast() ||
740 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
741 << "Unexpected instruction in read barrier for heap reference slow path: "
742 << instruction_->DebugName();
743
744 __ Bind(GetEntryLabel());
745 SaveLiveRegisters(codegen, locations);
746
747 // We may have to change the index's value, but as `index_` is a
748 // constant member (like other "inputs" of this slow path),
749 // introduce a copy of it, `index`.
750 Location index = index_;
751 if (index_.IsValid()) {
752 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
753 if (instruction_->IsArrayGet()) {
754 // Compute the actual memory offset and store it in `index`.
755 Register index_reg = index_.AsRegister<Register>();
756 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
757 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
758 // We are about to change the value of `index_reg` (see the
759 // calls to art::x86::X86Assembler::shll and
760 // art::x86::X86Assembler::AddImmediate below), but it has
761 // not been saved by the previous call to
762 // art::SlowPathCode::SaveLiveRegisters, as it is a
763 // callee-save register --
764 // art::SlowPathCode::SaveLiveRegisters does not consider
765 // callee-save registers, as it has been designed with the
766 // assumption that callee-save registers are supposed to be
767 // handled by the called function. So, as a callee-save
768 // register, `index_reg` _would_ eventually be saved onto
769 // the stack, but it would be too late: we would have
770 // changed its value earlier. Therefore, we manually save
771 // it here into another freely available register,
772 // `free_reg`, chosen of course among the caller-save
773 // registers (as a callee-save `free_reg` register would
774 // exhibit the same problem).
775 //
776 // Note we could have requested a temporary register from
777 // the register allocator instead; but we prefer not to, as
778 // this is a slow path, and we know we can find a
779 // caller-save register that is available.
780 Register free_reg = FindAvailableCallerSaveRegister(codegen);
781 __ movl(free_reg, index_reg);
782 index_reg = free_reg;
783 index = Location::RegisterLocation(index_reg);
784 } else {
785 // The initial register stored in `index_` has already been
786 // saved in the call to art::SlowPathCode::SaveLiveRegisters
787 // (as it is not a callee-save register), so we can freely
788 // use it.
789 }
790 // Shifting the index value contained in `index_reg` by the scale
791 // factor (2) cannot overflow in practice, as the runtime is
792 // unable to allocate object arrays with a size larger than
793 // 2^26 - 1 (that is, 2^28 - 4 bytes).
794 __ shll(index_reg, Immediate(TIMES_4));
795 static_assert(
796 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
797 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
798 __ AddImmediate(index_reg, Immediate(offset_));
799 } else {
800 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
801 // intrinsics, `index_` is not shifted by a scale factor of 2
802 // (as in the case of ArrayGet), as it is actually an offset
803 // to an object field within an object.
804 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
805 DCHECK(instruction_->GetLocations()->Intrinsified());
806 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
807 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
808 << instruction_->AsInvoke()->GetIntrinsic();
809 DCHECK_EQ(offset_, 0U);
810 DCHECK(index_.IsRegisterPair());
811 // UnsafeGet's offset location is a register pair, the low
812 // part contains the correct offset.
813 index = index_.ToLow();
814 }
815 }
816
817 // We're moving two or three locations to locations that could
818 // overlap, so we need a parallel move resolver.
819 InvokeRuntimeCallingConvention calling_convention;
820 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
821 parallel_move.AddMove(ref_,
822 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
823 DataType::Type::kReference,
824 nullptr);
825 parallel_move.AddMove(obj_,
826 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
827 DataType::Type::kReference,
828 nullptr);
829 if (index.IsValid()) {
830 parallel_move.AddMove(index,
831 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
832 DataType::Type::kInt32,
833 nullptr);
834 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
835 } else {
836 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
837 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
838 }
839 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
840 CheckEntrypointTypes<
841 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
842 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
843
844 RestoreLiveRegisters(codegen, locations);
845 __ jmp(GetExitLabel());
846 }
847
GetDescription() const848 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
849
850 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)851 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
852 size_t ref = static_cast<int>(ref_.AsRegister<Register>());
853 size_t obj = static_cast<int>(obj_.AsRegister<Register>());
854 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
855 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
856 return static_cast<Register>(i);
857 }
858 }
859 // We shall never fail to find a free caller-save register, as
860 // there are more than two core caller-save registers on x86
861 // (meaning it is possible to find one which is different from
862 // `ref` and `obj`).
863 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
864 LOG(FATAL) << "Could not find a free caller-save register";
865 UNREACHABLE();
866 }
867
868 const Location out_;
869 const Location ref_;
870 const Location obj_;
871 const uint32_t offset_;
872 // An additional location containing an index to an array.
873 // Only used for HArrayGet and the UnsafeGetObject &
874 // UnsafeGetObjectVolatile intrinsics.
875 const Location index_;
876
877 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
878 };
879
880 // Slow path generating a read barrier for a GC root.
881 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
882 public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)883 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
884 : SlowPathCode(instruction), out_(out), root_(root) {
885 DCHECK(kEmitCompilerReadBarrier);
886 }
887
EmitNativeCode(CodeGenerator * codegen)888 void EmitNativeCode(CodeGenerator* codegen) override {
889 LocationSummary* locations = instruction_->GetLocations();
890 Register reg_out = out_.AsRegister<Register>();
891 DCHECK(locations->CanCall());
892 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
893 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
894 << "Unexpected instruction in read barrier for GC root slow path: "
895 << instruction_->DebugName();
896
897 __ Bind(GetEntryLabel());
898 SaveLiveRegisters(codegen, locations);
899
900 InvokeRuntimeCallingConvention calling_convention;
901 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
902 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
903 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
904 instruction_,
905 instruction_->GetDexPc(),
906 this);
907 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
908 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
909
910 RestoreLiveRegisters(codegen, locations);
911 __ jmp(GetExitLabel());
912 }
913
GetDescription() const914 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
915
916 private:
917 const Location out_;
918 const Location root_;
919
920 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
921 };
922
923 #undef __
924 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
925 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
926
X86Condition(IfCondition cond)927 inline Condition X86Condition(IfCondition cond) {
928 switch (cond) {
929 case kCondEQ: return kEqual;
930 case kCondNE: return kNotEqual;
931 case kCondLT: return kLess;
932 case kCondLE: return kLessEqual;
933 case kCondGT: return kGreater;
934 case kCondGE: return kGreaterEqual;
935 case kCondB: return kBelow;
936 case kCondBE: return kBelowEqual;
937 case kCondA: return kAbove;
938 case kCondAE: return kAboveEqual;
939 }
940 LOG(FATAL) << "Unreachable";
941 UNREACHABLE();
942 }
943
944 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)945 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
946 switch (cond) {
947 case kCondEQ: return kEqual;
948 case kCondNE: return kNotEqual;
949 // Signed to unsigned, and FP to x86 name.
950 case kCondLT: return kBelow;
951 case kCondLE: return kBelowEqual;
952 case kCondGT: return kAbove;
953 case kCondGE: return kAboveEqual;
954 // Unsigned remain unchanged.
955 case kCondB: return kBelow;
956 case kCondBE: return kBelowEqual;
957 case kCondA: return kAbove;
958 case kCondAE: return kAboveEqual;
959 }
960 LOG(FATAL) << "Unreachable";
961 UNREACHABLE();
962 }
963
DumpCoreRegister(std::ostream & stream,int reg) const964 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
965 stream << Register(reg);
966 }
967
DumpFloatingPointRegister(std::ostream & stream,int reg) const968 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
969 stream << XmmRegister(reg);
970 }
971
GetInstructionSetFeatures() const972 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
973 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
974 }
975
SaveCoreRegister(size_t stack_index,uint32_t reg_id)976 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
977 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
978 return kX86WordSize;
979 }
980
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)981 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
982 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
983 return kX86WordSize;
984 }
985
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)986 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
987 if (GetGraph()->HasSIMD()) {
988 __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
989 } else {
990 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
991 }
992 return GetSlowPathFPWidth();
993 }
994
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)995 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
996 if (GetGraph()->HasSIMD()) {
997 __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
998 } else {
999 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1000 }
1001 return GetSlowPathFPWidth();
1002 }
1003
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1004 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1005 HInstruction* instruction,
1006 uint32_t dex_pc,
1007 SlowPathCode* slow_path) {
1008 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1009 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1010 if (EntrypointRequiresStackMap(entrypoint)) {
1011 RecordPcInfo(instruction, dex_pc, slow_path);
1012 }
1013 }
1014
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1015 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1016 HInstruction* instruction,
1017 SlowPathCode* slow_path) {
1018 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1019 GenerateInvokeRuntime(entry_point_offset);
1020 }
1021
GenerateInvokeRuntime(int32_t entry_point_offset)1022 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1023 __ fs()->call(Address::Absolute(entry_point_offset));
1024 }
1025
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1026 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1027 const CompilerOptions& compiler_options,
1028 OptimizingCompilerStats* stats)
1029 : CodeGenerator(graph,
1030 kNumberOfCpuRegisters,
1031 kNumberOfXmmRegisters,
1032 kNumberOfRegisterPairs,
1033 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1034 arraysize(kCoreCalleeSaves))
1035 | (1 << kFakeReturnRegister),
1036 0,
1037 compiler_options,
1038 stats),
1039 block_labels_(nullptr),
1040 location_builder_(graph, this),
1041 instruction_visitor_(graph, this),
1042 move_resolver_(graph->GetAllocator(), this),
1043 assembler_(graph->GetAllocator()),
1044 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1045 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1046 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1047 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1048 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1049 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1050 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1051 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1052 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1053 constant_area_start_(-1),
1054 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1055 method_address_offset_(std::less<uint32_t>(),
1056 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1057 // Use a fake return address register to mimic Quick.
1058 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1059 }
1060
SetupBlockedRegisters() const1061 void CodeGeneratorX86::SetupBlockedRegisters() const {
1062 // Stack register is always reserved.
1063 blocked_core_registers_[ESP] = true;
1064 }
1065
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1066 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1067 : InstructionCodeGenerator(graph, codegen),
1068 assembler_(codegen->GetAssembler()),
1069 codegen_(codegen) {}
1070
DWARFReg(Register reg)1071 static dwarf::Reg DWARFReg(Register reg) {
1072 return dwarf::Reg::X86Core(static_cast<int>(reg));
1073 }
1074
MaybeIncrementHotness(bool is_frame_entry)1075 void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
1076 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1077 Register reg = EAX;
1078 if (is_frame_entry) {
1079 reg = kMethodRegisterArgument;
1080 } else {
1081 __ pushl(EAX);
1082 __ movl(EAX, Address(ESP, kX86WordSize));
1083 }
1084 NearLabel overflow;
1085 __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1086 Immediate(ArtMethod::MaxCounter()));
1087 __ j(kEqual, &overflow);
1088 __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1089 Immediate(1));
1090 __ Bind(&overflow);
1091 if (!is_frame_entry) {
1092 __ popl(EAX);
1093 }
1094 }
1095
1096 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1097 ScopedObjectAccess soa(Thread::Current());
1098 ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
1099 if (info != nullptr) {
1100 uint32_t address = reinterpret_cast32<uint32_t>(info);
1101 NearLabel done;
1102 if (HasEmptyFrame()) {
1103 CHECK(is_frame_entry);
1104 // Alignment
1105 __ subl(ESP, Immediate(8));
1106 __ cfi().AdjustCFAOffset(8);
1107 // We need a temporary. The stub also expects the method at bottom of stack.
1108 __ pushl(EAX);
1109 __ cfi().AdjustCFAOffset(4);
1110 __ movl(EAX, Immediate(address));
1111 __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1112 Immediate(1));
1113 __ j(kCarryClear, &done);
1114 GenerateInvokeRuntime(
1115 GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1116 __ Bind(&done);
1117 // We don't strictly require to restore EAX, but this makes the generated
1118 // code easier to reason about.
1119 __ popl(EAX);
1120 __ cfi().AdjustCFAOffset(-4);
1121 __ addl(ESP, Immediate(8));
1122 __ cfi().AdjustCFAOffset(-8);
1123 } else {
1124 if (!RequiresCurrentMethod()) {
1125 CHECK(is_frame_entry);
1126 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1127 }
1128 // We need a temporary.
1129 __ pushl(EAX);
1130 __ cfi().AdjustCFAOffset(4);
1131 __ movl(EAX, Immediate(address));
1132 __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1133 Immediate(1));
1134 __ popl(EAX); // Put stack as expected before exiting or calling stub.
1135 __ cfi().AdjustCFAOffset(-4);
1136 __ j(kCarryClear, &done);
1137 GenerateInvokeRuntime(
1138 GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1139 __ Bind(&done);
1140 }
1141 }
1142 }
1143 }
1144
GenerateFrameEntry()1145 void CodeGeneratorX86::GenerateFrameEntry() {
1146 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
1147 __ Bind(&frame_entry_label_);
1148 bool skip_overflow_check =
1149 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1150 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1151
1152 if (!skip_overflow_check) {
1153 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1154 __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1155 RecordPcInfo(nullptr, 0);
1156 }
1157
1158 if (!HasEmptyFrame()) {
1159 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1160 Register reg = kCoreCalleeSaves[i];
1161 if (allocated_registers_.ContainsCoreRegister(reg)) {
1162 __ pushl(reg);
1163 __ cfi().AdjustCFAOffset(kX86WordSize);
1164 __ cfi().RelOffset(DWARFReg(reg), 0);
1165 }
1166 }
1167
1168 int adjust = GetFrameSize() - FrameEntrySpillSize();
1169 __ subl(ESP, Immediate(adjust));
1170 __ cfi().AdjustCFAOffset(adjust);
1171 // Save the current method if we need it. Note that we do not
1172 // do this in HCurrentMethod, as the instruction might have been removed
1173 // in the SSA graph.
1174 if (RequiresCurrentMethod()) {
1175 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1176 }
1177
1178 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1179 // Initialize should_deoptimize flag to 0.
1180 __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1181 }
1182 }
1183
1184 MaybeIncrementHotness(/* is_frame_entry= */ true);
1185 }
1186
GenerateFrameExit()1187 void CodeGeneratorX86::GenerateFrameExit() {
1188 __ cfi().RememberState();
1189 if (!HasEmptyFrame()) {
1190 int adjust = GetFrameSize() - FrameEntrySpillSize();
1191 __ addl(ESP, Immediate(adjust));
1192 __ cfi().AdjustCFAOffset(-adjust);
1193
1194 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1195 Register reg = kCoreCalleeSaves[i];
1196 if (allocated_registers_.ContainsCoreRegister(reg)) {
1197 __ popl(reg);
1198 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1199 __ cfi().Restore(DWARFReg(reg));
1200 }
1201 }
1202 }
1203 __ ret();
1204 __ cfi().RestoreState();
1205 __ cfi().DefCFAOffset(GetFrameSize());
1206 }
1207
Bind(HBasicBlock * block)1208 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1209 __ Bind(GetLabelOf(block));
1210 }
1211
GetReturnLocation(DataType::Type type) const1212 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1213 switch (type) {
1214 case DataType::Type::kReference:
1215 case DataType::Type::kBool:
1216 case DataType::Type::kUint8:
1217 case DataType::Type::kInt8:
1218 case DataType::Type::kUint16:
1219 case DataType::Type::kInt16:
1220 case DataType::Type::kUint32:
1221 case DataType::Type::kInt32:
1222 return Location::RegisterLocation(EAX);
1223
1224 case DataType::Type::kUint64:
1225 case DataType::Type::kInt64:
1226 return Location::RegisterPairLocation(EAX, EDX);
1227
1228 case DataType::Type::kVoid:
1229 return Location::NoLocation();
1230
1231 case DataType::Type::kFloat64:
1232 case DataType::Type::kFloat32:
1233 return Location::FpuRegisterLocation(XMM0);
1234 }
1235
1236 UNREACHABLE();
1237 }
1238
GetMethodLocation() const1239 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1240 return Location::RegisterLocation(kMethodRegisterArgument);
1241 }
1242
GetNextLocation(DataType::Type type)1243 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1244 switch (type) {
1245 case DataType::Type::kReference:
1246 case DataType::Type::kBool:
1247 case DataType::Type::kUint8:
1248 case DataType::Type::kInt8:
1249 case DataType::Type::kUint16:
1250 case DataType::Type::kInt16:
1251 case DataType::Type::kInt32: {
1252 uint32_t index = gp_index_++;
1253 stack_index_++;
1254 if (index < calling_convention.GetNumberOfRegisters()) {
1255 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1256 } else {
1257 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1258 }
1259 }
1260
1261 case DataType::Type::kInt64: {
1262 uint32_t index = gp_index_;
1263 gp_index_ += 2;
1264 stack_index_ += 2;
1265 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1266 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1267 calling_convention.GetRegisterPairAt(index));
1268 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1269 } else {
1270 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1271 }
1272 }
1273
1274 case DataType::Type::kFloat32: {
1275 uint32_t index = float_index_++;
1276 stack_index_++;
1277 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1278 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1279 } else {
1280 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1281 }
1282 }
1283
1284 case DataType::Type::kFloat64: {
1285 uint32_t index = float_index_++;
1286 stack_index_ += 2;
1287 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1288 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1289 } else {
1290 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1291 }
1292 }
1293
1294 case DataType::Type::kUint32:
1295 case DataType::Type::kUint64:
1296 case DataType::Type::kVoid:
1297 LOG(FATAL) << "Unexpected parameter type " << type;
1298 UNREACHABLE();
1299 }
1300 return Location::NoLocation();
1301 }
1302
Move32(Location destination,Location source)1303 void CodeGeneratorX86::Move32(Location destination, Location source) {
1304 if (source.Equals(destination)) {
1305 return;
1306 }
1307 if (destination.IsRegister()) {
1308 if (source.IsRegister()) {
1309 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1310 } else if (source.IsFpuRegister()) {
1311 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1312 } else {
1313 DCHECK(source.IsStackSlot());
1314 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1315 }
1316 } else if (destination.IsFpuRegister()) {
1317 if (source.IsRegister()) {
1318 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1319 } else if (source.IsFpuRegister()) {
1320 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1321 } else {
1322 DCHECK(source.IsStackSlot());
1323 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1324 }
1325 } else {
1326 DCHECK(destination.IsStackSlot()) << destination;
1327 if (source.IsRegister()) {
1328 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1329 } else if (source.IsFpuRegister()) {
1330 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1331 } else if (source.IsConstant()) {
1332 HConstant* constant = source.GetConstant();
1333 int32_t value = GetInt32ValueOf(constant);
1334 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1335 } else {
1336 DCHECK(source.IsStackSlot());
1337 __ pushl(Address(ESP, source.GetStackIndex()));
1338 __ popl(Address(ESP, destination.GetStackIndex()));
1339 }
1340 }
1341 }
1342
Move64(Location destination,Location source)1343 void CodeGeneratorX86::Move64(Location destination, Location source) {
1344 if (source.Equals(destination)) {
1345 return;
1346 }
1347 if (destination.IsRegisterPair()) {
1348 if (source.IsRegisterPair()) {
1349 EmitParallelMoves(
1350 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1351 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1352 DataType::Type::kInt32,
1353 Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1354 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1355 DataType::Type::kInt32);
1356 } else if (source.IsFpuRegister()) {
1357 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1358 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1359 __ psrlq(src_reg, Immediate(32));
1360 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1361 } else {
1362 // No conflict possible, so just do the moves.
1363 DCHECK(source.IsDoubleStackSlot());
1364 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1365 __ movl(destination.AsRegisterPairHigh<Register>(),
1366 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1367 }
1368 } else if (destination.IsFpuRegister()) {
1369 if (source.IsFpuRegister()) {
1370 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1371 } else if (source.IsDoubleStackSlot()) {
1372 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1373 } else if (source.IsRegisterPair()) {
1374 size_t elem_size = DataType::Size(DataType::Type::kInt32);
1375 // Create stack space for 2 elements.
1376 __ subl(ESP, Immediate(2 * elem_size));
1377 __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
1378 __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
1379 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1380 // And remove the temporary stack space we allocated.
1381 __ addl(ESP, Immediate(2 * elem_size));
1382 } else {
1383 LOG(FATAL) << "Unimplemented";
1384 }
1385 } else {
1386 DCHECK(destination.IsDoubleStackSlot()) << destination;
1387 if (source.IsRegisterPair()) {
1388 // No conflict possible, so just do the moves.
1389 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1390 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1391 source.AsRegisterPairHigh<Register>());
1392 } else if (source.IsFpuRegister()) {
1393 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1394 } else if (source.IsConstant()) {
1395 HConstant* constant = source.GetConstant();
1396 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1397 int64_t value = GetInt64ValueOf(constant);
1398 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1399 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1400 Immediate(High32Bits(value)));
1401 } else {
1402 DCHECK(source.IsDoubleStackSlot()) << source;
1403 EmitParallelMoves(
1404 Location::StackSlot(source.GetStackIndex()),
1405 Location::StackSlot(destination.GetStackIndex()),
1406 DataType::Type::kInt32,
1407 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1408 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1409 DataType::Type::kInt32);
1410 }
1411 }
1412 }
1413
MoveConstant(Location location,int32_t value)1414 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1415 DCHECK(location.IsRegister());
1416 __ movl(location.AsRegister<Register>(), Immediate(value));
1417 }
1418
MoveLocation(Location dst,Location src,DataType::Type dst_type)1419 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1420 HParallelMove move(GetGraph()->GetAllocator());
1421 if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1422 move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1423 move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1424 } else {
1425 move.AddMove(src, dst, dst_type, nullptr);
1426 }
1427 GetMoveResolver()->EmitNativeCode(&move);
1428 }
1429
AddLocationAsTemp(Location location,LocationSummary * locations)1430 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1431 if (location.IsRegister()) {
1432 locations->AddTemp(location);
1433 } else if (location.IsRegisterPair()) {
1434 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1435 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1436 } else {
1437 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1438 }
1439 }
1440
HandleGoto(HInstruction * got,HBasicBlock * successor)1441 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1442 if (successor->IsExitBlock()) {
1443 DCHECK(got->GetPrevious()->AlwaysThrows());
1444 return; // no code needed
1445 }
1446
1447 HBasicBlock* block = got->GetBlock();
1448 HInstruction* previous = got->GetPrevious();
1449
1450 HLoopInformation* info = block->GetLoopInformation();
1451 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1452 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1453 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1454 return;
1455 }
1456
1457 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1458 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1459 }
1460 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1461 __ jmp(codegen_->GetLabelOf(successor));
1462 }
1463 }
1464
VisitGoto(HGoto * got)1465 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1466 got->SetLocations(nullptr);
1467 }
1468
VisitGoto(HGoto * got)1469 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1470 HandleGoto(got, got->GetSuccessor());
1471 }
1472
VisitTryBoundary(HTryBoundary * try_boundary)1473 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1474 try_boundary->SetLocations(nullptr);
1475 }
1476
VisitTryBoundary(HTryBoundary * try_boundary)1477 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1478 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1479 if (!successor->IsExitBlock()) {
1480 HandleGoto(try_boundary, successor);
1481 }
1482 }
1483
VisitExit(HExit * exit)1484 void LocationsBuilderX86::VisitExit(HExit* exit) {
1485 exit->SetLocations(nullptr);
1486 }
1487
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1488 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1489 }
1490
1491 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1492 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1493 LabelType* true_label,
1494 LabelType* false_label) {
1495 if (cond->IsFPConditionTrueIfNaN()) {
1496 __ j(kUnordered, true_label);
1497 } else if (cond->IsFPConditionFalseIfNaN()) {
1498 __ j(kUnordered, false_label);
1499 }
1500 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1501 }
1502
1503 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1504 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1505 LabelType* true_label,
1506 LabelType* false_label) {
1507 LocationSummary* locations = cond->GetLocations();
1508 Location left = locations->InAt(0);
1509 Location right = locations->InAt(1);
1510 IfCondition if_cond = cond->GetCondition();
1511
1512 Register left_high = left.AsRegisterPairHigh<Register>();
1513 Register left_low = left.AsRegisterPairLow<Register>();
1514 IfCondition true_high_cond = if_cond;
1515 IfCondition false_high_cond = cond->GetOppositeCondition();
1516 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part
1517
1518 // Set the conditions for the test, remembering that == needs to be
1519 // decided using the low words.
1520 switch (if_cond) {
1521 case kCondEQ:
1522 case kCondNE:
1523 // Nothing to do.
1524 break;
1525 case kCondLT:
1526 false_high_cond = kCondGT;
1527 break;
1528 case kCondLE:
1529 true_high_cond = kCondLT;
1530 break;
1531 case kCondGT:
1532 false_high_cond = kCondLT;
1533 break;
1534 case kCondGE:
1535 true_high_cond = kCondGT;
1536 break;
1537 case kCondB:
1538 false_high_cond = kCondA;
1539 break;
1540 case kCondBE:
1541 true_high_cond = kCondB;
1542 break;
1543 case kCondA:
1544 false_high_cond = kCondB;
1545 break;
1546 case kCondAE:
1547 true_high_cond = kCondA;
1548 break;
1549 }
1550
1551 if (right.IsConstant()) {
1552 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1553 int32_t val_high = High32Bits(value);
1554 int32_t val_low = Low32Bits(value);
1555
1556 codegen_->Compare32BitValue(left_high, val_high);
1557 if (if_cond == kCondNE) {
1558 __ j(X86Condition(true_high_cond), true_label);
1559 } else if (if_cond == kCondEQ) {
1560 __ j(X86Condition(false_high_cond), false_label);
1561 } else {
1562 __ j(X86Condition(true_high_cond), true_label);
1563 __ j(X86Condition(false_high_cond), false_label);
1564 }
1565 // Must be equal high, so compare the lows.
1566 codegen_->Compare32BitValue(left_low, val_low);
1567 } else if (right.IsRegisterPair()) {
1568 Register right_high = right.AsRegisterPairHigh<Register>();
1569 Register right_low = right.AsRegisterPairLow<Register>();
1570
1571 __ cmpl(left_high, right_high);
1572 if (if_cond == kCondNE) {
1573 __ j(X86Condition(true_high_cond), true_label);
1574 } else if (if_cond == kCondEQ) {
1575 __ j(X86Condition(false_high_cond), false_label);
1576 } else {
1577 __ j(X86Condition(true_high_cond), true_label);
1578 __ j(X86Condition(false_high_cond), false_label);
1579 }
1580 // Must be equal high, so compare the lows.
1581 __ cmpl(left_low, right_low);
1582 } else {
1583 DCHECK(right.IsDoubleStackSlot());
1584 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1585 if (if_cond == kCondNE) {
1586 __ j(X86Condition(true_high_cond), true_label);
1587 } else if (if_cond == kCondEQ) {
1588 __ j(X86Condition(false_high_cond), false_label);
1589 } else {
1590 __ j(X86Condition(true_high_cond), true_label);
1591 __ j(X86Condition(false_high_cond), false_label);
1592 }
1593 // Must be equal high, so compare the lows.
1594 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1595 }
1596 // The last comparison might be unsigned.
1597 __ j(final_condition, true_label);
1598 }
1599
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1600 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1601 Location rhs,
1602 HInstruction* insn,
1603 bool is_double) {
1604 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1605 if (is_double) {
1606 if (rhs.IsFpuRegister()) {
1607 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1608 } else if (const_area != nullptr) {
1609 DCHECK(const_area->IsEmittedAtUseSite());
1610 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1611 codegen_->LiteralDoubleAddress(
1612 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1613 const_area->GetBaseMethodAddress(),
1614 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1615 } else {
1616 DCHECK(rhs.IsDoubleStackSlot());
1617 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1618 }
1619 } else {
1620 if (rhs.IsFpuRegister()) {
1621 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1622 } else if (const_area != nullptr) {
1623 DCHECK(const_area->IsEmittedAtUseSite());
1624 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
1625 codegen_->LiteralFloatAddress(
1626 const_area->GetConstant()->AsFloatConstant()->GetValue(),
1627 const_area->GetBaseMethodAddress(),
1628 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1629 } else {
1630 DCHECK(rhs.IsStackSlot());
1631 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1632 }
1633 }
1634 }
1635
1636 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1637 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
1638 LabelType* true_target_in,
1639 LabelType* false_target_in) {
1640 // Generated branching requires both targets to be explicit. If either of the
1641 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1642 LabelType fallthrough_target;
1643 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1644 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1645
1646 LocationSummary* locations = condition->GetLocations();
1647 Location left = locations->InAt(0);
1648 Location right = locations->InAt(1);
1649
1650 DataType::Type type = condition->InputAt(0)->GetType();
1651 switch (type) {
1652 case DataType::Type::kInt64:
1653 GenerateLongComparesAndJumps(condition, true_target, false_target);
1654 break;
1655 case DataType::Type::kFloat32:
1656 GenerateFPCompare(left, right, condition, false);
1657 GenerateFPJumps(condition, true_target, false_target);
1658 break;
1659 case DataType::Type::kFloat64:
1660 GenerateFPCompare(left, right, condition, true);
1661 GenerateFPJumps(condition, true_target, false_target);
1662 break;
1663 default:
1664 LOG(FATAL) << "Unexpected compare type " << type;
1665 }
1666
1667 if (false_target != &fallthrough_target) {
1668 __ jmp(false_target);
1669 }
1670
1671 if (fallthrough_target.IsLinked()) {
1672 __ Bind(&fallthrough_target);
1673 }
1674 }
1675
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1676 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1677 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1678 // are set only strictly before `branch`. We can't use the eflags on long/FP
1679 // conditions if they are materialized due to the complex branching.
1680 return cond->IsCondition() &&
1681 cond->GetNext() == branch &&
1682 cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
1683 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1684 }
1685
1686 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1687 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
1688 size_t condition_input_index,
1689 LabelType* true_target,
1690 LabelType* false_target) {
1691 HInstruction* cond = instruction->InputAt(condition_input_index);
1692
1693 if (true_target == nullptr && false_target == nullptr) {
1694 // Nothing to do. The code always falls through.
1695 return;
1696 } else if (cond->IsIntConstant()) {
1697 // Constant condition, statically compared against "true" (integer value 1).
1698 if (cond->AsIntConstant()->IsTrue()) {
1699 if (true_target != nullptr) {
1700 __ jmp(true_target);
1701 }
1702 } else {
1703 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1704 if (false_target != nullptr) {
1705 __ jmp(false_target);
1706 }
1707 }
1708 return;
1709 }
1710
1711 // The following code generates these patterns:
1712 // (1) true_target == nullptr && false_target != nullptr
1713 // - opposite condition true => branch to false_target
1714 // (2) true_target != nullptr && false_target == nullptr
1715 // - condition true => branch to true_target
1716 // (3) true_target != nullptr && false_target != nullptr
1717 // - condition true => branch to true_target
1718 // - branch to false_target
1719 if (IsBooleanValueOrMaterializedCondition(cond)) {
1720 if (AreEflagsSetFrom(cond, instruction)) {
1721 if (true_target == nullptr) {
1722 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
1723 } else {
1724 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
1725 }
1726 } else {
1727 // Materialized condition, compare against 0.
1728 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1729 if (lhs.IsRegister()) {
1730 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
1731 } else {
1732 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
1733 }
1734 if (true_target == nullptr) {
1735 __ j(kEqual, false_target);
1736 } else {
1737 __ j(kNotEqual, true_target);
1738 }
1739 }
1740 } else {
1741 // Condition has not been materialized, use its inputs as the comparison and
1742 // its condition as the branch condition.
1743 HCondition* condition = cond->AsCondition();
1744
1745 // If this is a long or FP comparison that has been folded into
1746 // the HCondition, generate the comparison directly.
1747 DataType::Type type = condition->InputAt(0)->GetType();
1748 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1749 GenerateCompareTestAndBranch(condition, true_target, false_target);
1750 return;
1751 }
1752
1753 Location lhs = condition->GetLocations()->InAt(0);
1754 Location rhs = condition->GetLocations()->InAt(1);
1755 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
1756 codegen_->GenerateIntCompare(lhs, rhs);
1757 if (true_target == nullptr) {
1758 __ j(X86Condition(condition->GetOppositeCondition()), false_target);
1759 } else {
1760 __ j(X86Condition(condition->GetCondition()), true_target);
1761 }
1762 }
1763
1764 // If neither branch falls through (case 3), the conditional branch to `true_target`
1765 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1766 if (true_target != nullptr && false_target != nullptr) {
1767 __ jmp(false_target);
1768 }
1769 }
1770
VisitIf(HIf * if_instr)1771 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
1772 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1773 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1774 locations->SetInAt(0, Location::Any());
1775 }
1776 }
1777
VisitIf(HIf * if_instr)1778 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
1779 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1780 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1781 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1782 nullptr : codegen_->GetLabelOf(true_successor);
1783 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1784 nullptr : codegen_->GetLabelOf(false_successor);
1785 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1786 }
1787
VisitDeoptimize(HDeoptimize * deoptimize)1788 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1789 LocationSummary* locations = new (GetGraph()->GetAllocator())
1790 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1791 InvokeRuntimeCallingConvention calling_convention;
1792 RegisterSet caller_saves = RegisterSet::Empty();
1793 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1794 locations->SetCustomSlowPathCallerSaves(caller_saves);
1795 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1796 locations->SetInAt(0, Location::Any());
1797 }
1798 }
1799
VisitDeoptimize(HDeoptimize * deoptimize)1800 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1801 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
1802 GenerateTestAndBranch<Label>(deoptimize,
1803 /* condition_input_index= */ 0,
1804 slow_path->GetEntryLabel(),
1805 /* false_target= */ nullptr);
1806 }
1807
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1808 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1809 LocationSummary* locations = new (GetGraph()->GetAllocator())
1810 LocationSummary(flag, LocationSummary::kNoCall);
1811 locations->SetOut(Location::RequiresRegister());
1812 }
1813
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1814 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1815 __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
1816 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1817 }
1818
SelectCanUseCMOV(HSelect * select)1819 static bool SelectCanUseCMOV(HSelect* select) {
1820 // There are no conditional move instructions for XMMs.
1821 if (DataType::IsFloatingPointType(select->GetType())) {
1822 return false;
1823 }
1824
1825 // A FP condition doesn't generate the single CC that we need.
1826 // In 32 bit mode, a long condition doesn't generate a single CC either.
1827 HInstruction* condition = select->GetCondition();
1828 if (condition->IsCondition()) {
1829 DataType::Type compare_type = condition->InputAt(0)->GetType();
1830 if (compare_type == DataType::Type::kInt64 ||
1831 DataType::IsFloatingPointType(compare_type)) {
1832 return false;
1833 }
1834 }
1835
1836 // We can generate a CMOV for this Select.
1837 return true;
1838 }
1839
VisitSelect(HSelect * select)1840 void LocationsBuilderX86::VisitSelect(HSelect* select) {
1841 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1842 if (DataType::IsFloatingPointType(select->GetType())) {
1843 locations->SetInAt(0, Location::RequiresFpuRegister());
1844 locations->SetInAt(1, Location::Any());
1845 } else {
1846 locations->SetInAt(0, Location::RequiresRegister());
1847 if (SelectCanUseCMOV(select)) {
1848 if (select->InputAt(1)->IsConstant()) {
1849 // Cmov can't handle a constant value.
1850 locations->SetInAt(1, Location::RequiresRegister());
1851 } else {
1852 locations->SetInAt(1, Location::Any());
1853 }
1854 } else {
1855 locations->SetInAt(1, Location::Any());
1856 }
1857 }
1858 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1859 locations->SetInAt(2, Location::RequiresRegister());
1860 }
1861 locations->SetOut(Location::SameAsFirstInput());
1862 }
1863
VisitSelect(HSelect * select)1864 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
1865 LocationSummary* locations = select->GetLocations();
1866 DCHECK(locations->InAt(0).Equals(locations->Out()));
1867 if (SelectCanUseCMOV(select)) {
1868 // If both the condition and the source types are integer, we can generate
1869 // a CMOV to implement Select.
1870
1871 HInstruction* select_condition = select->GetCondition();
1872 Condition cond = kNotEqual;
1873
1874 // Figure out how to test the 'condition'.
1875 if (select_condition->IsCondition()) {
1876 HCondition* condition = select_condition->AsCondition();
1877 if (!condition->IsEmittedAtUseSite()) {
1878 // This was a previously materialized condition.
1879 // Can we use the existing condition code?
1880 if (AreEflagsSetFrom(condition, select)) {
1881 // Materialization was the previous instruction. Condition codes are right.
1882 cond = X86Condition(condition->GetCondition());
1883 } else {
1884 // No, we have to recreate the condition code.
1885 Register cond_reg = locations->InAt(2).AsRegister<Register>();
1886 __ testl(cond_reg, cond_reg);
1887 }
1888 } else {
1889 // We can't handle FP or long here.
1890 DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
1891 DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
1892 LocationSummary* cond_locations = condition->GetLocations();
1893 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
1894 cond = X86Condition(condition->GetCondition());
1895 }
1896 } else {
1897 // Must be a Boolean condition, which needs to be compared to 0.
1898 Register cond_reg = locations->InAt(2).AsRegister<Register>();
1899 __ testl(cond_reg, cond_reg);
1900 }
1901
1902 // If the condition is true, overwrite the output, which already contains false.
1903 Location false_loc = locations->InAt(0);
1904 Location true_loc = locations->InAt(1);
1905 if (select->GetType() == DataType::Type::kInt64) {
1906 // 64 bit conditional move.
1907 Register false_high = false_loc.AsRegisterPairHigh<Register>();
1908 Register false_low = false_loc.AsRegisterPairLow<Register>();
1909 if (true_loc.IsRegisterPair()) {
1910 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
1911 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
1912 } else {
1913 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
1914 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
1915 }
1916 } else {
1917 // 32 bit conditional move.
1918 Register false_reg = false_loc.AsRegister<Register>();
1919 if (true_loc.IsRegister()) {
1920 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
1921 } else {
1922 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
1923 }
1924 }
1925 } else {
1926 NearLabel false_target;
1927 GenerateTestAndBranch<NearLabel>(
1928 select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
1929 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1930 __ Bind(&false_target);
1931 }
1932 }
1933
VisitNativeDebugInfo(HNativeDebugInfo * info)1934 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1935 new (GetGraph()->GetAllocator()) LocationSummary(info);
1936 }
1937
VisitNativeDebugInfo(HNativeDebugInfo *)1938 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
1939 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1940 }
1941
GenerateNop()1942 void CodeGeneratorX86::GenerateNop() {
1943 __ nop();
1944 }
1945
HandleCondition(HCondition * cond)1946 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
1947 LocationSummary* locations =
1948 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
1949 // Handle the long/FP comparisons made in instruction simplification.
1950 switch (cond->InputAt(0)->GetType()) {
1951 case DataType::Type::kInt64: {
1952 locations->SetInAt(0, Location::RequiresRegister());
1953 locations->SetInAt(1, Location::Any());
1954 if (!cond->IsEmittedAtUseSite()) {
1955 locations->SetOut(Location::RequiresRegister());
1956 }
1957 break;
1958 }
1959 case DataType::Type::kFloat32:
1960 case DataType::Type::kFloat64: {
1961 locations->SetInAt(0, Location::RequiresFpuRegister());
1962 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
1963 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
1964 } else if (cond->InputAt(1)->IsConstant()) {
1965 locations->SetInAt(1, Location::RequiresFpuRegister());
1966 } else {
1967 locations->SetInAt(1, Location::Any());
1968 }
1969 if (!cond->IsEmittedAtUseSite()) {
1970 locations->SetOut(Location::RequiresRegister());
1971 }
1972 break;
1973 }
1974 default:
1975 locations->SetInAt(0, Location::RequiresRegister());
1976 locations->SetInAt(1, Location::Any());
1977 if (!cond->IsEmittedAtUseSite()) {
1978 // We need a byte register.
1979 locations->SetOut(Location::RegisterLocation(ECX));
1980 }
1981 break;
1982 }
1983 }
1984
HandleCondition(HCondition * cond)1985 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
1986 if (cond->IsEmittedAtUseSite()) {
1987 return;
1988 }
1989
1990 LocationSummary* locations = cond->GetLocations();
1991 Location lhs = locations->InAt(0);
1992 Location rhs = locations->InAt(1);
1993 Register reg = locations->Out().AsRegister<Register>();
1994 NearLabel true_label, false_label;
1995
1996 switch (cond->InputAt(0)->GetType()) {
1997 default: {
1998 // Integer case.
1999
2000 // Clear output register: setb only sets the low byte.
2001 __ xorl(reg, reg);
2002 codegen_->GenerateIntCompare(lhs, rhs);
2003 __ setb(X86Condition(cond->GetCondition()), reg);
2004 return;
2005 }
2006 case DataType::Type::kInt64:
2007 GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2008 break;
2009 case DataType::Type::kFloat32:
2010 GenerateFPCompare(lhs, rhs, cond, false);
2011 GenerateFPJumps(cond, &true_label, &false_label);
2012 break;
2013 case DataType::Type::kFloat64:
2014 GenerateFPCompare(lhs, rhs, cond, true);
2015 GenerateFPJumps(cond, &true_label, &false_label);
2016 break;
2017 }
2018
2019 // Convert the jumps into the result.
2020 NearLabel done_label;
2021
2022 // False case: result = 0.
2023 __ Bind(&false_label);
2024 __ xorl(reg, reg);
2025 __ jmp(&done_label);
2026
2027 // True case: result = 1.
2028 __ Bind(&true_label);
2029 __ movl(reg, Immediate(1));
2030 __ Bind(&done_label);
2031 }
2032
VisitEqual(HEqual * comp)2033 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2034 HandleCondition(comp);
2035 }
2036
VisitEqual(HEqual * comp)2037 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2038 HandleCondition(comp);
2039 }
2040
VisitNotEqual(HNotEqual * comp)2041 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2042 HandleCondition(comp);
2043 }
2044
VisitNotEqual(HNotEqual * comp)2045 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2046 HandleCondition(comp);
2047 }
2048
VisitLessThan(HLessThan * comp)2049 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2050 HandleCondition(comp);
2051 }
2052
VisitLessThan(HLessThan * comp)2053 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2054 HandleCondition(comp);
2055 }
2056
VisitLessThanOrEqual(HLessThanOrEqual * comp)2057 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2058 HandleCondition(comp);
2059 }
2060
VisitLessThanOrEqual(HLessThanOrEqual * comp)2061 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2062 HandleCondition(comp);
2063 }
2064
VisitGreaterThan(HGreaterThan * comp)2065 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2066 HandleCondition(comp);
2067 }
2068
VisitGreaterThan(HGreaterThan * comp)2069 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2070 HandleCondition(comp);
2071 }
2072
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2073 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2074 HandleCondition(comp);
2075 }
2076
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2077 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2078 HandleCondition(comp);
2079 }
2080
VisitBelow(HBelow * comp)2081 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2082 HandleCondition(comp);
2083 }
2084
VisitBelow(HBelow * comp)2085 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2086 HandleCondition(comp);
2087 }
2088
VisitBelowOrEqual(HBelowOrEqual * comp)2089 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2090 HandleCondition(comp);
2091 }
2092
VisitBelowOrEqual(HBelowOrEqual * comp)2093 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2094 HandleCondition(comp);
2095 }
2096
VisitAbove(HAbove * comp)2097 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2098 HandleCondition(comp);
2099 }
2100
VisitAbove(HAbove * comp)2101 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2102 HandleCondition(comp);
2103 }
2104
VisitAboveOrEqual(HAboveOrEqual * comp)2105 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2106 HandleCondition(comp);
2107 }
2108
VisitAboveOrEqual(HAboveOrEqual * comp)2109 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2110 HandleCondition(comp);
2111 }
2112
VisitIntConstant(HIntConstant * constant)2113 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2114 LocationSummary* locations =
2115 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2116 locations->SetOut(Location::ConstantLocation(constant));
2117 }
2118
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2119 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2120 // Will be generated at use site.
2121 }
2122
VisitNullConstant(HNullConstant * constant)2123 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2124 LocationSummary* locations =
2125 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2126 locations->SetOut(Location::ConstantLocation(constant));
2127 }
2128
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2129 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2130 // Will be generated at use site.
2131 }
2132
VisitLongConstant(HLongConstant * constant)2133 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2134 LocationSummary* locations =
2135 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2136 locations->SetOut(Location::ConstantLocation(constant));
2137 }
2138
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2139 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2140 // Will be generated at use site.
2141 }
2142
VisitFloatConstant(HFloatConstant * constant)2143 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2144 LocationSummary* locations =
2145 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2146 locations->SetOut(Location::ConstantLocation(constant));
2147 }
2148
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2149 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2150 // Will be generated at use site.
2151 }
2152
VisitDoubleConstant(HDoubleConstant * constant)2153 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2154 LocationSummary* locations =
2155 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2156 locations->SetOut(Location::ConstantLocation(constant));
2157 }
2158
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2159 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2160 // Will be generated at use site.
2161 }
2162
VisitConstructorFence(HConstructorFence * constructor_fence)2163 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2164 constructor_fence->SetLocations(nullptr);
2165 }
2166
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2167 void InstructionCodeGeneratorX86::VisitConstructorFence(
2168 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2169 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2170 }
2171
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2172 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2173 memory_barrier->SetLocations(nullptr);
2174 }
2175
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2176 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2177 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2178 }
2179
VisitReturnVoid(HReturnVoid * ret)2180 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2181 ret->SetLocations(nullptr);
2182 }
2183
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2184 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2185 codegen_->GenerateFrameExit();
2186 }
2187
VisitReturn(HReturn * ret)2188 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2189 LocationSummary* locations =
2190 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2191 switch (ret->InputAt(0)->GetType()) {
2192 case DataType::Type::kReference:
2193 case DataType::Type::kBool:
2194 case DataType::Type::kUint8:
2195 case DataType::Type::kInt8:
2196 case DataType::Type::kUint16:
2197 case DataType::Type::kInt16:
2198 case DataType::Type::kInt32:
2199 locations->SetInAt(0, Location::RegisterLocation(EAX));
2200 break;
2201
2202 case DataType::Type::kInt64:
2203 locations->SetInAt(
2204 0, Location::RegisterPairLocation(EAX, EDX));
2205 break;
2206
2207 case DataType::Type::kFloat32:
2208 case DataType::Type::kFloat64:
2209 locations->SetInAt(
2210 0, Location::FpuRegisterLocation(XMM0));
2211 break;
2212
2213 default:
2214 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2215 }
2216 }
2217
VisitReturn(HReturn * ret)2218 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2219 switch (ret->InputAt(0)->GetType()) {
2220 case DataType::Type::kReference:
2221 case DataType::Type::kBool:
2222 case DataType::Type::kUint8:
2223 case DataType::Type::kInt8:
2224 case DataType::Type::kUint16:
2225 case DataType::Type::kInt16:
2226 case DataType::Type::kInt32:
2227 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2228 break;
2229
2230 case DataType::Type::kInt64:
2231 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2232 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2233 break;
2234
2235 case DataType::Type::kFloat32:
2236 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2237 if (GetGraph()->IsCompilingOsr()) {
2238 // To simplify callers of an OSR method, we put the return value in both
2239 // floating point and core registers.
2240 __ movd(EAX, XMM0);
2241 }
2242 break;
2243
2244 case DataType::Type::kFloat64:
2245 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2246 if (GetGraph()->IsCompilingOsr()) {
2247 // To simplify callers of an OSR method, we put the return value in both
2248 // floating point and core registers.
2249 __ movd(EAX, XMM0);
2250 // Use XMM1 as temporary register to not clobber XMM0.
2251 __ movaps(XMM1, XMM0);
2252 __ psrlq(XMM1, Immediate(32));
2253 __ movd(EDX, XMM1);
2254 }
2255 break;
2256
2257 default:
2258 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2259 }
2260 codegen_->GenerateFrameExit();
2261 }
2262
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2263 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2264 // The trampoline uses the same calling convention as dex calling conventions,
2265 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2266 // the method_idx.
2267 HandleInvoke(invoke);
2268 }
2269
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2270 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2271 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2272 }
2273
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2274 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2275 // Explicit clinit checks triggered by static invokes must have been pruned by
2276 // art::PrepareForRegisterAllocation.
2277 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2278
2279 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2280 if (intrinsic.TryDispatch(invoke)) {
2281 if (invoke->GetLocations()->CanCall() &&
2282 invoke->HasPcRelativeMethodLoadKind() &&
2283 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2284 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2285 }
2286 return;
2287 }
2288
2289 HandleInvoke(invoke);
2290
2291 // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
2292 if (invoke->HasPcRelativeMethodLoadKind()) {
2293 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2294 }
2295 }
2296
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2297 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2298 if (invoke->GetLocations()->Intrinsified()) {
2299 IntrinsicCodeGeneratorX86 intrinsic(codegen);
2300 intrinsic.Dispatch(invoke);
2301 return true;
2302 }
2303 return false;
2304 }
2305
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2306 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2307 // Explicit clinit checks triggered by static invokes must have been pruned by
2308 // art::PrepareForRegisterAllocation.
2309 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2310
2311 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2312 return;
2313 }
2314
2315 LocationSummary* locations = invoke->GetLocations();
2316 codegen_->GenerateStaticOrDirectCall(
2317 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2318 }
2319
VisitInvokeVirtual(HInvokeVirtual * invoke)2320 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2321 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2322 if (intrinsic.TryDispatch(invoke)) {
2323 return;
2324 }
2325
2326 HandleInvoke(invoke);
2327
2328 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2329 // Add one temporary for inline cache update.
2330 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2331 }
2332 }
2333
HandleInvoke(HInvoke * invoke)2334 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2335 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2336 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2337 }
2338
VisitInvokeVirtual(HInvokeVirtual * invoke)2339 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2340 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2341 return;
2342 }
2343
2344 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2345 DCHECK(!codegen_->IsLeafMethod());
2346 }
2347
VisitInvokeInterface(HInvokeInterface * invoke)2348 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2349 // This call to HandleInvoke allocates a temporary (core) register
2350 // which is also used to transfer the hidden argument from FP to
2351 // core register.
2352 HandleInvoke(invoke);
2353 // Add the hidden argument.
2354 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2355
2356 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2357 // Add one temporary for inline cache update.
2358 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2359 }
2360 }
2361
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2362 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2363 DCHECK_EQ(EAX, klass);
2364 // We know the destination of an intrinsic, so no need to record inline
2365 // caches (also the intrinsic location builder doesn't request an additional
2366 // temporary).
2367 if (!instruction->GetLocations()->Intrinsified() &&
2368 GetGraph()->IsCompilingBaseline() &&
2369 !Runtime::Current()->IsAotCompiler()) {
2370 DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
2371 ScopedObjectAccess soa(Thread::Current());
2372 ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
2373 if (info != nullptr) {
2374 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2375 uint32_t address = reinterpret_cast32<uint32_t>(cache);
2376 if (kIsDebugBuild) {
2377 uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2378 CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2379 }
2380 Register temp = EBP;
2381 NearLabel done;
2382 __ movl(temp, Immediate(address));
2383 // Fast path for a monomorphic cache.
2384 __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2385 __ j(kEqual, &done);
2386 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2387 __ Bind(&done);
2388 }
2389 }
2390 }
2391
VisitInvokeInterface(HInvokeInterface * invoke)2392 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2393 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2394 LocationSummary* locations = invoke->GetLocations();
2395 Register temp = locations->GetTemp(0).AsRegister<Register>();
2396 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2397 Location receiver = locations->InAt(0);
2398 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2399
2400 // Set the hidden argument. This is safe to do this here, as XMM7
2401 // won't be modified thereafter, before the `call` instruction.
2402 DCHECK_EQ(XMM7, hidden_reg);
2403 __ movl(temp, Immediate(invoke->GetDexMethodIndex()));
2404 __ movd(hidden_reg, temp);
2405
2406 if (receiver.IsStackSlot()) {
2407 __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2408 // /* HeapReference<Class> */ temp = temp->klass_
2409 __ movl(temp, Address(temp, class_offset));
2410 } else {
2411 // /* HeapReference<Class> */ temp = receiver->klass_
2412 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2413 }
2414 codegen_->MaybeRecordImplicitNullCheck(invoke);
2415 // Instead of simply (possibly) unpoisoning `temp` here, we should
2416 // emit a read barrier for the previous class reference load.
2417 // However this is not required in practice, as this is an
2418 // intermediate/temporary reference and because the current
2419 // concurrent copying collector keeps the from-space memory
2420 // intact/accessible until the end of the marking phase (the
2421 // concurrent copying collector may not in the future).
2422 __ MaybeUnpoisonHeapReference(temp);
2423
2424 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2425
2426 // temp = temp->GetAddressOfIMT()
2427 __ movl(temp,
2428 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2429 // temp = temp->GetImtEntryAt(method_offset);
2430 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2431 invoke->GetImtIndex(), kX86PointerSize));
2432 __ movl(temp, Address(temp, method_offset));
2433 // call temp->GetEntryPoint();
2434 __ call(Address(temp,
2435 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2436
2437 DCHECK(!codegen_->IsLeafMethod());
2438 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2439 }
2440
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2441 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2442 HandleInvoke(invoke);
2443 }
2444
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2445 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2446 codegen_->GenerateInvokePolymorphicCall(invoke);
2447 }
2448
VisitInvokeCustom(HInvokeCustom * invoke)2449 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2450 HandleInvoke(invoke);
2451 }
2452
VisitInvokeCustom(HInvokeCustom * invoke)2453 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2454 codegen_->GenerateInvokeCustomCall(invoke);
2455 }
2456
VisitNeg(HNeg * neg)2457 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2458 LocationSummary* locations =
2459 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2460 switch (neg->GetResultType()) {
2461 case DataType::Type::kInt32:
2462 case DataType::Type::kInt64:
2463 locations->SetInAt(0, Location::RequiresRegister());
2464 locations->SetOut(Location::SameAsFirstInput());
2465 break;
2466
2467 case DataType::Type::kFloat32:
2468 locations->SetInAt(0, Location::RequiresFpuRegister());
2469 locations->SetOut(Location::SameAsFirstInput());
2470 locations->AddTemp(Location::RequiresRegister());
2471 locations->AddTemp(Location::RequiresFpuRegister());
2472 break;
2473
2474 case DataType::Type::kFloat64:
2475 locations->SetInAt(0, Location::RequiresFpuRegister());
2476 locations->SetOut(Location::SameAsFirstInput());
2477 locations->AddTemp(Location::RequiresFpuRegister());
2478 break;
2479
2480 default:
2481 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2482 }
2483 }
2484
VisitNeg(HNeg * neg)2485 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2486 LocationSummary* locations = neg->GetLocations();
2487 Location out = locations->Out();
2488 Location in = locations->InAt(0);
2489 switch (neg->GetResultType()) {
2490 case DataType::Type::kInt32:
2491 DCHECK(in.IsRegister());
2492 DCHECK(in.Equals(out));
2493 __ negl(out.AsRegister<Register>());
2494 break;
2495
2496 case DataType::Type::kInt64:
2497 DCHECK(in.IsRegisterPair());
2498 DCHECK(in.Equals(out));
2499 __ negl(out.AsRegisterPairLow<Register>());
2500 // Negation is similar to subtraction from zero. The least
2501 // significant byte triggers a borrow when it is different from
2502 // zero; to take it into account, add 1 to the most significant
2503 // byte if the carry flag (CF) is set to 1 after the first NEGL
2504 // operation.
2505 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2506 __ negl(out.AsRegisterPairHigh<Register>());
2507 break;
2508
2509 case DataType::Type::kFloat32: {
2510 DCHECK(in.Equals(out));
2511 Register constant = locations->GetTemp(0).AsRegister<Register>();
2512 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2513 // Implement float negation with an exclusive or with value
2514 // 0x80000000 (mask for bit 31, representing the sign of a
2515 // single-precision floating-point number).
2516 __ movl(constant, Immediate(INT32_C(0x80000000)));
2517 __ movd(mask, constant);
2518 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2519 break;
2520 }
2521
2522 case DataType::Type::kFloat64: {
2523 DCHECK(in.Equals(out));
2524 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2525 // Implement double negation with an exclusive or with value
2526 // 0x8000000000000000 (mask for bit 63, representing the sign of
2527 // a double-precision floating-point number).
2528 __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2529 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2530 break;
2531 }
2532
2533 default:
2534 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2535 }
2536 }
2537
VisitX86FPNeg(HX86FPNeg * neg)2538 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2539 LocationSummary* locations =
2540 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2541 DCHECK(DataType::IsFloatingPointType(neg->GetType()));
2542 locations->SetInAt(0, Location::RequiresFpuRegister());
2543 locations->SetInAt(1, Location::RequiresRegister());
2544 locations->SetOut(Location::SameAsFirstInput());
2545 locations->AddTemp(Location::RequiresFpuRegister());
2546 }
2547
VisitX86FPNeg(HX86FPNeg * neg)2548 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2549 LocationSummary* locations = neg->GetLocations();
2550 Location out = locations->Out();
2551 DCHECK(locations->InAt(0).Equals(out));
2552
2553 Register constant_area = locations->InAt(1).AsRegister<Register>();
2554 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2555 if (neg->GetType() == DataType::Type::kFloat32) {
2556 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2557 neg->GetBaseMethodAddress(),
2558 constant_area));
2559 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2560 } else {
2561 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2562 neg->GetBaseMethodAddress(),
2563 constant_area));
2564 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2565 }
2566 }
2567
VisitTypeConversion(HTypeConversion * conversion)2568 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2569 DataType::Type result_type = conversion->GetResultType();
2570 DataType::Type input_type = conversion->GetInputType();
2571 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2572 << input_type << " -> " << result_type;
2573
2574 // The float-to-long and double-to-long type conversions rely on a
2575 // call to the runtime.
2576 LocationSummary::CallKind call_kind =
2577 ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
2578 && result_type == DataType::Type::kInt64)
2579 ? LocationSummary::kCallOnMainOnly
2580 : LocationSummary::kNoCall;
2581 LocationSummary* locations =
2582 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
2583
2584 switch (result_type) {
2585 case DataType::Type::kUint8:
2586 case DataType::Type::kInt8:
2587 switch (input_type) {
2588 case DataType::Type::kUint8:
2589 case DataType::Type::kInt8:
2590 case DataType::Type::kUint16:
2591 case DataType::Type::kInt16:
2592 case DataType::Type::kInt32:
2593 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2594 // Make the output overlap to please the register allocator. This greatly simplifies
2595 // the validation of the linear scan implementation
2596 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2597 break;
2598 case DataType::Type::kInt64: {
2599 HInstruction* input = conversion->InputAt(0);
2600 Location input_location = input->IsConstant()
2601 ? Location::ConstantLocation(input->AsConstant())
2602 : Location::RegisterPairLocation(EAX, EDX);
2603 locations->SetInAt(0, input_location);
2604 // Make the output overlap to please the register allocator. This greatly simplifies
2605 // the validation of the linear scan implementation
2606 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2607 break;
2608 }
2609
2610 default:
2611 LOG(FATAL) << "Unexpected type conversion from " << input_type
2612 << " to " << result_type;
2613 }
2614 break;
2615
2616 case DataType::Type::kUint16:
2617 case DataType::Type::kInt16:
2618 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2619 locations->SetInAt(0, Location::Any());
2620 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2621 break;
2622
2623 case DataType::Type::kInt32:
2624 switch (input_type) {
2625 case DataType::Type::kInt64:
2626 locations->SetInAt(0, Location::Any());
2627 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2628 break;
2629
2630 case DataType::Type::kFloat32:
2631 locations->SetInAt(0, Location::RequiresFpuRegister());
2632 locations->SetOut(Location::RequiresRegister());
2633 locations->AddTemp(Location::RequiresFpuRegister());
2634 break;
2635
2636 case DataType::Type::kFloat64:
2637 locations->SetInAt(0, Location::RequiresFpuRegister());
2638 locations->SetOut(Location::RequiresRegister());
2639 locations->AddTemp(Location::RequiresFpuRegister());
2640 break;
2641
2642 default:
2643 LOG(FATAL) << "Unexpected type conversion from " << input_type
2644 << " to " << result_type;
2645 }
2646 break;
2647
2648 case DataType::Type::kInt64:
2649 switch (input_type) {
2650 case DataType::Type::kBool:
2651 case DataType::Type::kUint8:
2652 case DataType::Type::kInt8:
2653 case DataType::Type::kUint16:
2654 case DataType::Type::kInt16:
2655 case DataType::Type::kInt32:
2656 locations->SetInAt(0, Location::RegisterLocation(EAX));
2657 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2658 break;
2659
2660 case DataType::Type::kFloat32:
2661 case DataType::Type::kFloat64: {
2662 InvokeRuntimeCallingConvention calling_convention;
2663 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
2664 locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
2665
2666 // The runtime helper puts the result in EAX, EDX.
2667 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2668 }
2669 break;
2670
2671 default:
2672 LOG(FATAL) << "Unexpected type conversion from " << input_type
2673 << " to " << result_type;
2674 }
2675 break;
2676
2677 case DataType::Type::kFloat32:
2678 switch (input_type) {
2679 case DataType::Type::kBool:
2680 case DataType::Type::kUint8:
2681 case DataType::Type::kInt8:
2682 case DataType::Type::kUint16:
2683 case DataType::Type::kInt16:
2684 case DataType::Type::kInt32:
2685 locations->SetInAt(0, Location::RequiresRegister());
2686 locations->SetOut(Location::RequiresFpuRegister());
2687 break;
2688
2689 case DataType::Type::kInt64:
2690 locations->SetInAt(0, Location::Any());
2691 locations->SetOut(Location::Any());
2692 break;
2693
2694 case DataType::Type::kFloat64:
2695 locations->SetInAt(0, Location::RequiresFpuRegister());
2696 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2697 break;
2698
2699 default:
2700 LOG(FATAL) << "Unexpected type conversion from " << input_type
2701 << " to " << result_type;
2702 }
2703 break;
2704
2705 case DataType::Type::kFloat64:
2706 switch (input_type) {
2707 case DataType::Type::kBool:
2708 case DataType::Type::kUint8:
2709 case DataType::Type::kInt8:
2710 case DataType::Type::kUint16:
2711 case DataType::Type::kInt16:
2712 case DataType::Type::kInt32:
2713 locations->SetInAt(0, Location::RequiresRegister());
2714 locations->SetOut(Location::RequiresFpuRegister());
2715 break;
2716
2717 case DataType::Type::kInt64:
2718 locations->SetInAt(0, Location::Any());
2719 locations->SetOut(Location::Any());
2720 break;
2721
2722 case DataType::Type::kFloat32:
2723 locations->SetInAt(0, Location::RequiresFpuRegister());
2724 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2725 break;
2726
2727 default:
2728 LOG(FATAL) << "Unexpected type conversion from " << input_type
2729 << " to " << result_type;
2730 }
2731 break;
2732
2733 default:
2734 LOG(FATAL) << "Unexpected type conversion from " << input_type
2735 << " to " << result_type;
2736 }
2737 }
2738
VisitTypeConversion(HTypeConversion * conversion)2739 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
2740 LocationSummary* locations = conversion->GetLocations();
2741 Location out = locations->Out();
2742 Location in = locations->InAt(0);
2743 DataType::Type result_type = conversion->GetResultType();
2744 DataType::Type input_type = conversion->GetInputType();
2745 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2746 << input_type << " -> " << result_type;
2747 switch (result_type) {
2748 case DataType::Type::kUint8:
2749 switch (input_type) {
2750 case DataType::Type::kInt8:
2751 case DataType::Type::kUint16:
2752 case DataType::Type::kInt16:
2753 case DataType::Type::kInt32:
2754 if (in.IsRegister()) {
2755 __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2756 } else {
2757 DCHECK(in.GetConstant()->IsIntConstant());
2758 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2759 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2760 }
2761 break;
2762 case DataType::Type::kInt64:
2763 if (in.IsRegisterPair()) {
2764 __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2765 } else {
2766 DCHECK(in.GetConstant()->IsLongConstant());
2767 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2768 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2769 }
2770 break;
2771
2772 default:
2773 LOG(FATAL) << "Unexpected type conversion from " << input_type
2774 << " to " << result_type;
2775 }
2776 break;
2777
2778 case DataType::Type::kInt8:
2779 switch (input_type) {
2780 case DataType::Type::kUint8:
2781 case DataType::Type::kUint16:
2782 case DataType::Type::kInt16:
2783 case DataType::Type::kInt32:
2784 if (in.IsRegister()) {
2785 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2786 } else {
2787 DCHECK(in.GetConstant()->IsIntConstant());
2788 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2789 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2790 }
2791 break;
2792 case DataType::Type::kInt64:
2793 if (in.IsRegisterPair()) {
2794 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2795 } else {
2796 DCHECK(in.GetConstant()->IsLongConstant());
2797 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2798 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2799 }
2800 break;
2801
2802 default:
2803 LOG(FATAL) << "Unexpected type conversion from " << input_type
2804 << " to " << result_type;
2805 }
2806 break;
2807
2808 case DataType::Type::kUint16:
2809 switch (input_type) {
2810 case DataType::Type::kInt8:
2811 case DataType::Type::kInt16:
2812 case DataType::Type::kInt32:
2813 if (in.IsRegister()) {
2814 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2815 } else if (in.IsStackSlot()) {
2816 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2817 } else {
2818 DCHECK(in.GetConstant()->IsIntConstant());
2819 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2820 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2821 }
2822 break;
2823 case DataType::Type::kInt64:
2824 if (in.IsRegisterPair()) {
2825 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2826 } else if (in.IsDoubleStackSlot()) {
2827 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2828 } else {
2829 DCHECK(in.GetConstant()->IsLongConstant());
2830 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2831 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2832 }
2833 break;
2834
2835 default:
2836 LOG(FATAL) << "Unexpected type conversion from " << input_type
2837 << " to " << result_type;
2838 }
2839 break;
2840
2841 case DataType::Type::kInt16:
2842 switch (input_type) {
2843 case DataType::Type::kUint16:
2844 case DataType::Type::kInt32:
2845 if (in.IsRegister()) {
2846 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2847 } else if (in.IsStackSlot()) {
2848 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2849 } else {
2850 DCHECK(in.GetConstant()->IsIntConstant());
2851 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2852 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2853 }
2854 break;
2855 case DataType::Type::kInt64:
2856 if (in.IsRegisterPair()) {
2857 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2858 } else if (in.IsDoubleStackSlot()) {
2859 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2860 } else {
2861 DCHECK(in.GetConstant()->IsLongConstant());
2862 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2863 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2864 }
2865 break;
2866
2867 default:
2868 LOG(FATAL) << "Unexpected type conversion from " << input_type
2869 << " to " << result_type;
2870 }
2871 break;
2872
2873 case DataType::Type::kInt32:
2874 switch (input_type) {
2875 case DataType::Type::kInt64:
2876 if (in.IsRegisterPair()) {
2877 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2878 } else if (in.IsDoubleStackSlot()) {
2879 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2880 } else {
2881 DCHECK(in.IsConstant());
2882 DCHECK(in.GetConstant()->IsLongConstant());
2883 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2884 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
2885 }
2886 break;
2887
2888 case DataType::Type::kFloat32: {
2889 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2890 Register output = out.AsRegister<Register>();
2891 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2892 NearLabel done, nan;
2893
2894 __ movl(output, Immediate(kPrimIntMax));
2895 // temp = int-to-float(output)
2896 __ cvtsi2ss(temp, output);
2897 // if input >= temp goto done
2898 __ comiss(input, temp);
2899 __ j(kAboveEqual, &done);
2900 // if input == NaN goto nan
2901 __ j(kUnordered, &nan);
2902 // output = float-to-int-truncate(input)
2903 __ cvttss2si(output, input);
2904 __ jmp(&done);
2905 __ Bind(&nan);
2906 // output = 0
2907 __ xorl(output, output);
2908 __ Bind(&done);
2909 break;
2910 }
2911
2912 case DataType::Type::kFloat64: {
2913 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2914 Register output = out.AsRegister<Register>();
2915 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2916 NearLabel done, nan;
2917
2918 __ movl(output, Immediate(kPrimIntMax));
2919 // temp = int-to-double(output)
2920 __ cvtsi2sd(temp, output);
2921 // if input >= temp goto done
2922 __ comisd(input, temp);
2923 __ j(kAboveEqual, &done);
2924 // if input == NaN goto nan
2925 __ j(kUnordered, &nan);
2926 // output = double-to-int-truncate(input)
2927 __ cvttsd2si(output, input);
2928 __ jmp(&done);
2929 __ Bind(&nan);
2930 // output = 0
2931 __ xorl(output, output);
2932 __ Bind(&done);
2933 break;
2934 }
2935
2936 default:
2937 LOG(FATAL) << "Unexpected type conversion from " << input_type
2938 << " to " << result_type;
2939 }
2940 break;
2941
2942 case DataType::Type::kInt64:
2943 switch (input_type) {
2944 case DataType::Type::kBool:
2945 case DataType::Type::kUint8:
2946 case DataType::Type::kInt8:
2947 case DataType::Type::kUint16:
2948 case DataType::Type::kInt16:
2949 case DataType::Type::kInt32:
2950 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
2951 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
2952 DCHECK_EQ(in.AsRegister<Register>(), EAX);
2953 __ cdq();
2954 break;
2955
2956 case DataType::Type::kFloat32:
2957 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
2958 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
2959 break;
2960
2961 case DataType::Type::kFloat64:
2962 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
2963 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
2964 break;
2965
2966 default:
2967 LOG(FATAL) << "Unexpected type conversion from " << input_type
2968 << " to " << result_type;
2969 }
2970 break;
2971
2972 case DataType::Type::kFloat32:
2973 switch (input_type) {
2974 case DataType::Type::kBool:
2975 case DataType::Type::kUint8:
2976 case DataType::Type::kInt8:
2977 case DataType::Type::kUint16:
2978 case DataType::Type::kInt16:
2979 case DataType::Type::kInt32:
2980 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
2981 break;
2982
2983 case DataType::Type::kInt64: {
2984 size_t adjustment = 0;
2985
2986 // Create stack space for the call to
2987 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
2988 // TODO: enhance register allocator to ask for stack temporaries.
2989 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
2990 adjustment = DataType::Size(DataType::Type::kInt64);
2991 __ subl(ESP, Immediate(adjustment));
2992 }
2993
2994 // Load the value to the FP stack, using temporaries if needed.
2995 PushOntoFPStack(in, 0, adjustment, false, true);
2996
2997 if (out.IsStackSlot()) {
2998 __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
2999 } else {
3000 __ fstps(Address(ESP, 0));
3001 Location stack_temp = Location::StackSlot(0);
3002 codegen_->Move32(out, stack_temp);
3003 }
3004
3005 // Remove the temporary stack space we allocated.
3006 if (adjustment != 0) {
3007 __ addl(ESP, Immediate(adjustment));
3008 }
3009 break;
3010 }
3011
3012 case DataType::Type::kFloat64:
3013 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3014 break;
3015
3016 default:
3017 LOG(FATAL) << "Unexpected type conversion from " << input_type
3018 << " to " << result_type;
3019 }
3020 break;
3021
3022 case DataType::Type::kFloat64:
3023 switch (input_type) {
3024 case DataType::Type::kBool:
3025 case DataType::Type::kUint8:
3026 case DataType::Type::kInt8:
3027 case DataType::Type::kUint16:
3028 case DataType::Type::kInt16:
3029 case DataType::Type::kInt32:
3030 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3031 break;
3032
3033 case DataType::Type::kInt64: {
3034 size_t adjustment = 0;
3035
3036 // Create stack space for the call to
3037 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3038 // TODO: enhance register allocator to ask for stack temporaries.
3039 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3040 adjustment = DataType::Size(DataType::Type::kInt64);
3041 __ subl(ESP, Immediate(adjustment));
3042 }
3043
3044 // Load the value to the FP stack, using temporaries if needed.
3045 PushOntoFPStack(in, 0, adjustment, false, true);
3046
3047 if (out.IsDoubleStackSlot()) {
3048 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3049 } else {
3050 __ fstpl(Address(ESP, 0));
3051 Location stack_temp = Location::DoubleStackSlot(0);
3052 codegen_->Move64(out, stack_temp);
3053 }
3054
3055 // Remove the temporary stack space we allocated.
3056 if (adjustment != 0) {
3057 __ addl(ESP, Immediate(adjustment));
3058 }
3059 break;
3060 }
3061
3062 case DataType::Type::kFloat32:
3063 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3064 break;
3065
3066 default:
3067 LOG(FATAL) << "Unexpected type conversion from " << input_type
3068 << " to " << result_type;
3069 }
3070 break;
3071
3072 default:
3073 LOG(FATAL) << "Unexpected type conversion from " << input_type
3074 << " to " << result_type;
3075 }
3076 }
3077
VisitAdd(HAdd * add)3078 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3079 LocationSummary* locations =
3080 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3081 switch (add->GetResultType()) {
3082 case DataType::Type::kInt32: {
3083 locations->SetInAt(0, Location::RequiresRegister());
3084 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3085 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3086 break;
3087 }
3088
3089 case DataType::Type::kInt64: {
3090 locations->SetInAt(0, Location::RequiresRegister());
3091 locations->SetInAt(1, Location::Any());
3092 locations->SetOut(Location::SameAsFirstInput());
3093 break;
3094 }
3095
3096 case DataType::Type::kFloat32:
3097 case DataType::Type::kFloat64: {
3098 locations->SetInAt(0, Location::RequiresFpuRegister());
3099 if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3100 DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3101 } else if (add->InputAt(1)->IsConstant()) {
3102 locations->SetInAt(1, Location::RequiresFpuRegister());
3103 } else {
3104 locations->SetInAt(1, Location::Any());
3105 }
3106 locations->SetOut(Location::SameAsFirstInput());
3107 break;
3108 }
3109
3110 default:
3111 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3112 UNREACHABLE();
3113 }
3114 }
3115
VisitAdd(HAdd * add)3116 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3117 LocationSummary* locations = add->GetLocations();
3118 Location first = locations->InAt(0);
3119 Location second = locations->InAt(1);
3120 Location out = locations->Out();
3121
3122 switch (add->GetResultType()) {
3123 case DataType::Type::kInt32: {
3124 if (second.IsRegister()) {
3125 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3126 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3127 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3128 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3129 } else {
3130 __ leal(out.AsRegister<Register>(), Address(
3131 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3132 }
3133 } else if (second.IsConstant()) {
3134 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3135 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3136 __ addl(out.AsRegister<Register>(), Immediate(value));
3137 } else {
3138 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3139 }
3140 } else {
3141 DCHECK(first.Equals(locations->Out()));
3142 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3143 }
3144 break;
3145 }
3146
3147 case DataType::Type::kInt64: {
3148 if (second.IsRegisterPair()) {
3149 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3150 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3151 } else if (second.IsDoubleStackSlot()) {
3152 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3153 __ adcl(first.AsRegisterPairHigh<Register>(),
3154 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3155 } else {
3156 DCHECK(second.IsConstant()) << second;
3157 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3158 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3159 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3160 }
3161 break;
3162 }
3163
3164 case DataType::Type::kFloat32: {
3165 if (second.IsFpuRegister()) {
3166 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3167 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3168 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3169 DCHECK(const_area->IsEmittedAtUseSite());
3170 __ addss(first.AsFpuRegister<XmmRegister>(),
3171 codegen_->LiteralFloatAddress(
3172 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3173 const_area->GetBaseMethodAddress(),
3174 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3175 } else {
3176 DCHECK(second.IsStackSlot());
3177 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3178 }
3179 break;
3180 }
3181
3182 case DataType::Type::kFloat64: {
3183 if (second.IsFpuRegister()) {
3184 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3185 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3186 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3187 DCHECK(const_area->IsEmittedAtUseSite());
3188 __ addsd(first.AsFpuRegister<XmmRegister>(),
3189 codegen_->LiteralDoubleAddress(
3190 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3191 const_area->GetBaseMethodAddress(),
3192 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3193 } else {
3194 DCHECK(second.IsDoubleStackSlot());
3195 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3196 }
3197 break;
3198 }
3199
3200 default:
3201 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3202 }
3203 }
3204
VisitSub(HSub * sub)3205 void LocationsBuilderX86::VisitSub(HSub* sub) {
3206 LocationSummary* locations =
3207 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3208 switch (sub->GetResultType()) {
3209 case DataType::Type::kInt32:
3210 case DataType::Type::kInt64: {
3211 locations->SetInAt(0, Location::RequiresRegister());
3212 locations->SetInAt(1, Location::Any());
3213 locations->SetOut(Location::SameAsFirstInput());
3214 break;
3215 }
3216 case DataType::Type::kFloat32:
3217 case DataType::Type::kFloat64: {
3218 locations->SetInAt(0, Location::RequiresFpuRegister());
3219 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3220 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3221 } else if (sub->InputAt(1)->IsConstant()) {
3222 locations->SetInAt(1, Location::RequiresFpuRegister());
3223 } else {
3224 locations->SetInAt(1, Location::Any());
3225 }
3226 locations->SetOut(Location::SameAsFirstInput());
3227 break;
3228 }
3229
3230 default:
3231 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3232 }
3233 }
3234
VisitSub(HSub * sub)3235 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3236 LocationSummary* locations = sub->GetLocations();
3237 Location first = locations->InAt(0);
3238 Location second = locations->InAt(1);
3239 DCHECK(first.Equals(locations->Out()));
3240 switch (sub->GetResultType()) {
3241 case DataType::Type::kInt32: {
3242 if (second.IsRegister()) {
3243 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3244 } else if (second.IsConstant()) {
3245 __ subl(first.AsRegister<Register>(),
3246 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3247 } else {
3248 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3249 }
3250 break;
3251 }
3252
3253 case DataType::Type::kInt64: {
3254 if (second.IsRegisterPair()) {
3255 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3256 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3257 } else if (second.IsDoubleStackSlot()) {
3258 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3259 __ sbbl(first.AsRegisterPairHigh<Register>(),
3260 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3261 } else {
3262 DCHECK(second.IsConstant()) << second;
3263 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3264 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3265 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3266 }
3267 break;
3268 }
3269
3270 case DataType::Type::kFloat32: {
3271 if (second.IsFpuRegister()) {
3272 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3273 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3274 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3275 DCHECK(const_area->IsEmittedAtUseSite());
3276 __ subss(first.AsFpuRegister<XmmRegister>(),
3277 codegen_->LiteralFloatAddress(
3278 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3279 const_area->GetBaseMethodAddress(),
3280 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3281 } else {
3282 DCHECK(second.IsStackSlot());
3283 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3284 }
3285 break;
3286 }
3287
3288 case DataType::Type::kFloat64: {
3289 if (second.IsFpuRegister()) {
3290 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3291 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3292 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3293 DCHECK(const_area->IsEmittedAtUseSite());
3294 __ subsd(first.AsFpuRegister<XmmRegister>(),
3295 codegen_->LiteralDoubleAddress(
3296 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3297 const_area->GetBaseMethodAddress(),
3298 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3299 } else {
3300 DCHECK(second.IsDoubleStackSlot());
3301 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3302 }
3303 break;
3304 }
3305
3306 default:
3307 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3308 }
3309 }
3310
VisitMul(HMul * mul)3311 void LocationsBuilderX86::VisitMul(HMul* mul) {
3312 LocationSummary* locations =
3313 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3314 switch (mul->GetResultType()) {
3315 case DataType::Type::kInt32:
3316 locations->SetInAt(0, Location::RequiresRegister());
3317 locations->SetInAt(1, Location::Any());
3318 if (mul->InputAt(1)->IsIntConstant()) {
3319 // Can use 3 operand multiply.
3320 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3321 } else {
3322 locations->SetOut(Location::SameAsFirstInput());
3323 }
3324 break;
3325 case DataType::Type::kInt64: {
3326 locations->SetInAt(0, Location::RequiresRegister());
3327 locations->SetInAt(1, Location::Any());
3328 locations->SetOut(Location::SameAsFirstInput());
3329 // Needed for imul on 32bits with 64bits output.
3330 locations->AddTemp(Location::RegisterLocation(EAX));
3331 locations->AddTemp(Location::RegisterLocation(EDX));
3332 break;
3333 }
3334 case DataType::Type::kFloat32:
3335 case DataType::Type::kFloat64: {
3336 locations->SetInAt(0, Location::RequiresFpuRegister());
3337 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3338 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3339 } else if (mul->InputAt(1)->IsConstant()) {
3340 locations->SetInAt(1, Location::RequiresFpuRegister());
3341 } else {
3342 locations->SetInAt(1, Location::Any());
3343 }
3344 locations->SetOut(Location::SameAsFirstInput());
3345 break;
3346 }
3347
3348 default:
3349 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3350 }
3351 }
3352
VisitMul(HMul * mul)3353 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3354 LocationSummary* locations = mul->GetLocations();
3355 Location first = locations->InAt(0);
3356 Location second = locations->InAt(1);
3357 Location out = locations->Out();
3358
3359 switch (mul->GetResultType()) {
3360 case DataType::Type::kInt32:
3361 // The constant may have ended up in a register, so test explicitly to avoid
3362 // problems where the output may not be the same as the first operand.
3363 if (mul->InputAt(1)->IsIntConstant()) {
3364 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3365 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3366 } else if (second.IsRegister()) {
3367 DCHECK(first.Equals(out));
3368 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3369 } else {
3370 DCHECK(second.IsStackSlot());
3371 DCHECK(first.Equals(out));
3372 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3373 }
3374 break;
3375
3376 case DataType::Type::kInt64: {
3377 Register in1_hi = first.AsRegisterPairHigh<Register>();
3378 Register in1_lo = first.AsRegisterPairLow<Register>();
3379 Register eax = locations->GetTemp(0).AsRegister<Register>();
3380 Register edx = locations->GetTemp(1).AsRegister<Register>();
3381
3382 DCHECK_EQ(EAX, eax);
3383 DCHECK_EQ(EDX, edx);
3384
3385 // input: in1 - 64 bits, in2 - 64 bits.
3386 // output: in1
3387 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3388 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3389 // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3390 if (second.IsConstant()) {
3391 DCHECK(second.GetConstant()->IsLongConstant());
3392
3393 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3394 int32_t low_value = Low32Bits(value);
3395 int32_t high_value = High32Bits(value);
3396 Immediate low(low_value);
3397 Immediate high(high_value);
3398
3399 __ movl(eax, high);
3400 // eax <- in1.lo * in2.hi
3401 __ imull(eax, in1_lo);
3402 // in1.hi <- in1.hi * in2.lo
3403 __ imull(in1_hi, low);
3404 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3405 __ addl(in1_hi, eax);
3406 // move in2_lo to eax to prepare for double precision
3407 __ movl(eax, low);
3408 // edx:eax <- in1.lo * in2.lo
3409 __ mull(in1_lo);
3410 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3411 __ addl(in1_hi, edx);
3412 // in1.lo <- (in1.lo * in2.lo)[31:0];
3413 __ movl(in1_lo, eax);
3414 } else if (second.IsRegisterPair()) {
3415 Register in2_hi = second.AsRegisterPairHigh<Register>();
3416 Register in2_lo = second.AsRegisterPairLow<Register>();
3417
3418 __ movl(eax, in2_hi);
3419 // eax <- in1.lo * in2.hi
3420 __ imull(eax, in1_lo);
3421 // in1.hi <- in1.hi * in2.lo
3422 __ imull(in1_hi, in2_lo);
3423 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3424 __ addl(in1_hi, eax);
3425 // move in1_lo to eax to prepare for double precision
3426 __ movl(eax, in1_lo);
3427 // edx:eax <- in1.lo * in2.lo
3428 __ mull(in2_lo);
3429 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3430 __ addl(in1_hi, edx);
3431 // in1.lo <- (in1.lo * in2.lo)[31:0];
3432 __ movl(in1_lo, eax);
3433 } else {
3434 DCHECK(second.IsDoubleStackSlot()) << second;
3435 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3436 Address in2_lo(ESP, second.GetStackIndex());
3437
3438 __ movl(eax, in2_hi);
3439 // eax <- in1.lo * in2.hi
3440 __ imull(eax, in1_lo);
3441 // in1.hi <- in1.hi * in2.lo
3442 __ imull(in1_hi, in2_lo);
3443 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3444 __ addl(in1_hi, eax);
3445 // move in1_lo to eax to prepare for double precision
3446 __ movl(eax, in1_lo);
3447 // edx:eax <- in1.lo * in2.lo
3448 __ mull(in2_lo);
3449 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3450 __ addl(in1_hi, edx);
3451 // in1.lo <- (in1.lo * in2.lo)[31:0];
3452 __ movl(in1_lo, eax);
3453 }
3454
3455 break;
3456 }
3457
3458 case DataType::Type::kFloat32: {
3459 DCHECK(first.Equals(locations->Out()));
3460 if (second.IsFpuRegister()) {
3461 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3462 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3463 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3464 DCHECK(const_area->IsEmittedAtUseSite());
3465 __ mulss(first.AsFpuRegister<XmmRegister>(),
3466 codegen_->LiteralFloatAddress(
3467 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3468 const_area->GetBaseMethodAddress(),
3469 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3470 } else {
3471 DCHECK(second.IsStackSlot());
3472 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3473 }
3474 break;
3475 }
3476
3477 case DataType::Type::kFloat64: {
3478 DCHECK(first.Equals(locations->Out()));
3479 if (second.IsFpuRegister()) {
3480 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3481 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3482 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3483 DCHECK(const_area->IsEmittedAtUseSite());
3484 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3485 codegen_->LiteralDoubleAddress(
3486 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3487 const_area->GetBaseMethodAddress(),
3488 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3489 } else {
3490 DCHECK(second.IsDoubleStackSlot());
3491 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3492 }
3493 break;
3494 }
3495
3496 default:
3497 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3498 }
3499 }
3500
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3501 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3502 uint32_t temp_offset,
3503 uint32_t stack_adjustment,
3504 bool is_fp,
3505 bool is_wide) {
3506 if (source.IsStackSlot()) {
3507 DCHECK(!is_wide);
3508 if (is_fp) {
3509 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3510 } else {
3511 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3512 }
3513 } else if (source.IsDoubleStackSlot()) {
3514 DCHECK(is_wide);
3515 if (is_fp) {
3516 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3517 } else {
3518 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3519 }
3520 } else {
3521 // Write the value to the temporary location on the stack and load to FP stack.
3522 if (!is_wide) {
3523 Location stack_temp = Location::StackSlot(temp_offset);
3524 codegen_->Move32(stack_temp, source);
3525 if (is_fp) {
3526 __ flds(Address(ESP, temp_offset));
3527 } else {
3528 __ filds(Address(ESP, temp_offset));
3529 }
3530 } else {
3531 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3532 codegen_->Move64(stack_temp, source);
3533 if (is_fp) {
3534 __ fldl(Address(ESP, temp_offset));
3535 } else {
3536 __ fildl(Address(ESP, temp_offset));
3537 }
3538 }
3539 }
3540 }
3541
GenerateRemFP(HRem * rem)3542 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3543 DataType::Type type = rem->GetResultType();
3544 bool is_float = type == DataType::Type::kFloat32;
3545 size_t elem_size = DataType::Size(type);
3546 LocationSummary* locations = rem->GetLocations();
3547 Location first = locations->InAt(0);
3548 Location second = locations->InAt(1);
3549 Location out = locations->Out();
3550
3551 // Create stack space for 2 elements.
3552 // TODO: enhance register allocator to ask for stack temporaries.
3553 __ subl(ESP, Immediate(2 * elem_size));
3554
3555 // Load the values to the FP stack in reverse order, using temporaries if needed.
3556 const bool is_wide = !is_float;
3557 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
3558 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
3559
3560 // Loop doing FPREM until we stabilize.
3561 NearLabel retry;
3562 __ Bind(&retry);
3563 __ fprem();
3564
3565 // Move FP status to AX.
3566 __ fstsw();
3567
3568 // And see if the argument reduction is complete. This is signaled by the
3569 // C2 FPU flag bit set to 0.
3570 __ andl(EAX, Immediate(kC2ConditionMask));
3571 __ j(kNotEqual, &retry);
3572
3573 // We have settled on the final value. Retrieve it into an XMM register.
3574 // Store FP top of stack to real stack.
3575 if (is_float) {
3576 __ fsts(Address(ESP, 0));
3577 } else {
3578 __ fstl(Address(ESP, 0));
3579 }
3580
3581 // Pop the 2 items from the FP stack.
3582 __ fucompp();
3583
3584 // Load the value from the stack into an XMM register.
3585 DCHECK(out.IsFpuRegister()) << out;
3586 if (is_float) {
3587 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3588 } else {
3589 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3590 }
3591
3592 // And remove the temporary stack space we allocated.
3593 __ addl(ESP, Immediate(2 * elem_size));
3594 }
3595
3596
DivRemOneOrMinusOne(HBinaryOperation * instruction)3597 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3598 DCHECK(instruction->IsDiv() || instruction->IsRem());
3599
3600 LocationSummary* locations = instruction->GetLocations();
3601 DCHECK(locations->InAt(1).IsConstant());
3602 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3603
3604 Register out_register = locations->Out().AsRegister<Register>();
3605 Register input_register = locations->InAt(0).AsRegister<Register>();
3606 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3607
3608 DCHECK(imm == 1 || imm == -1);
3609
3610 if (instruction->IsRem()) {
3611 __ xorl(out_register, out_register);
3612 } else {
3613 __ movl(out_register, input_register);
3614 if (imm == -1) {
3615 __ negl(out_register);
3616 }
3617 }
3618 }
3619
RemByPowerOfTwo(HRem * instruction)3620 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
3621 LocationSummary* locations = instruction->GetLocations();
3622 Location second = locations->InAt(1);
3623
3624 Register out = locations->Out().AsRegister<Register>();
3625 Register numerator = locations->InAt(0).AsRegister<Register>();
3626
3627 int32_t imm = Int64FromConstant(second.GetConstant());
3628 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3629 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3630
3631 Register tmp = locations->GetTemp(0).AsRegister<Register>();
3632 NearLabel done;
3633 __ movl(out, numerator);
3634 __ andl(out, Immediate(abs_imm-1));
3635 __ j(Condition::kZero, &done);
3636 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3637 __ testl(numerator, numerator);
3638 __ cmovl(Condition::kLess, out, tmp);
3639 __ Bind(&done);
3640 }
3641
DivByPowerOfTwo(HDiv * instruction)3642 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
3643 LocationSummary* locations = instruction->GetLocations();
3644
3645 Register out_register = locations->Out().AsRegister<Register>();
3646 Register input_register = locations->InAt(0).AsRegister<Register>();
3647 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3648 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3649 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3650
3651 Register num = locations->GetTemp(0).AsRegister<Register>();
3652
3653 __ leal(num, Address(input_register, abs_imm - 1));
3654 __ testl(input_register, input_register);
3655 __ cmovl(kGreaterEqual, num, input_register);
3656 int shift = CTZ(imm);
3657 __ sarl(num, Immediate(shift));
3658
3659 if (imm < 0) {
3660 __ negl(num);
3661 }
3662
3663 __ movl(out_register, num);
3664 }
3665
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3666 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3667 DCHECK(instruction->IsDiv() || instruction->IsRem());
3668
3669 LocationSummary* locations = instruction->GetLocations();
3670 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3671
3672 Register eax = locations->InAt(0).AsRegister<Register>();
3673 Register out = locations->Out().AsRegister<Register>();
3674 Register num;
3675 Register edx;
3676
3677 if (instruction->IsDiv()) {
3678 edx = locations->GetTemp(0).AsRegister<Register>();
3679 num = locations->GetTemp(1).AsRegister<Register>();
3680 } else {
3681 edx = locations->Out().AsRegister<Register>();
3682 num = locations->GetTemp(0).AsRegister<Register>();
3683 }
3684
3685 DCHECK_EQ(EAX, eax);
3686 DCHECK_EQ(EDX, edx);
3687 if (instruction->IsDiv()) {
3688 DCHECK_EQ(EAX, out);
3689 } else {
3690 DCHECK_EQ(EDX, out);
3691 }
3692
3693 int64_t magic;
3694 int shift;
3695 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3696
3697 // Save the numerator.
3698 __ movl(num, eax);
3699
3700 // EAX = magic
3701 __ movl(eax, Immediate(magic));
3702
3703 // EDX:EAX = magic * numerator
3704 __ imull(num);
3705
3706 if (imm > 0 && magic < 0) {
3707 // EDX += num
3708 __ addl(edx, num);
3709 } else if (imm < 0 && magic > 0) {
3710 __ subl(edx, num);
3711 }
3712
3713 // Shift if needed.
3714 if (shift != 0) {
3715 __ sarl(edx, Immediate(shift));
3716 }
3717
3718 // EDX += 1 if EDX < 0
3719 __ movl(eax, edx);
3720 __ shrl(edx, Immediate(31));
3721 __ addl(edx, eax);
3722
3723 if (instruction->IsRem()) {
3724 __ movl(eax, num);
3725 __ imull(edx, Immediate(imm));
3726 __ subl(eax, edx);
3727 __ movl(edx, eax);
3728 } else {
3729 __ movl(eax, edx);
3730 }
3731 }
3732
GenerateDivRemIntegral(HBinaryOperation * instruction)3733 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3734 DCHECK(instruction->IsDiv() || instruction->IsRem());
3735
3736 LocationSummary* locations = instruction->GetLocations();
3737 Location out = locations->Out();
3738 Location first = locations->InAt(0);
3739 Location second = locations->InAt(1);
3740 bool is_div = instruction->IsDiv();
3741
3742 switch (instruction->GetResultType()) {
3743 case DataType::Type::kInt32: {
3744 DCHECK_EQ(EAX, first.AsRegister<Register>());
3745 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
3746
3747 if (second.IsConstant()) {
3748 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
3749
3750 if (imm == 0) {
3751 // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
3752 } else if (imm == 1 || imm == -1) {
3753 DivRemOneOrMinusOne(instruction);
3754 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3755 if (is_div) {
3756 DivByPowerOfTwo(instruction->AsDiv());
3757 } else {
3758 RemByPowerOfTwo(instruction->AsRem());
3759 }
3760 } else {
3761 DCHECK(imm <= -2 || imm >= 2);
3762 GenerateDivRemWithAnyConstant(instruction);
3763 }
3764 } else {
3765 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
3766 instruction, out.AsRegister<Register>(), is_div);
3767 codegen_->AddSlowPath(slow_path);
3768
3769 Register second_reg = second.AsRegister<Register>();
3770 // 0x80000000/-1 triggers an arithmetic exception!
3771 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
3772 // it's safe to just use negl instead of more complex comparisons.
3773
3774 __ cmpl(second_reg, Immediate(-1));
3775 __ j(kEqual, slow_path->GetEntryLabel());
3776
3777 // edx:eax <- sign-extended of eax
3778 __ cdq();
3779 // eax = quotient, edx = remainder
3780 __ idivl(second_reg);
3781 __ Bind(slow_path->GetExitLabel());
3782 }
3783 break;
3784 }
3785
3786 case DataType::Type::kInt64: {
3787 InvokeRuntimeCallingConvention calling_convention;
3788 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
3789 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
3790 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
3791 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
3792 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
3793 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
3794
3795 if (is_div) {
3796 codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
3797 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
3798 } else {
3799 codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
3800 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
3801 }
3802 break;
3803 }
3804
3805 default:
3806 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
3807 }
3808 }
3809
VisitDiv(HDiv * div)3810 void LocationsBuilderX86::VisitDiv(HDiv* div) {
3811 LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
3812 ? LocationSummary::kCallOnMainOnly
3813 : LocationSummary::kNoCall;
3814 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
3815
3816 switch (div->GetResultType()) {
3817 case DataType::Type::kInt32: {
3818 locations->SetInAt(0, Location::RegisterLocation(EAX));
3819 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3820 locations->SetOut(Location::SameAsFirstInput());
3821 // Intel uses edx:eax as the dividend.
3822 locations->AddTemp(Location::RegisterLocation(EDX));
3823 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3824 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
3825 // output and request another temp.
3826 if (div->InputAt(1)->IsIntConstant()) {
3827 locations->AddTemp(Location::RequiresRegister());
3828 }
3829 break;
3830 }
3831 case DataType::Type::kInt64: {
3832 InvokeRuntimeCallingConvention calling_convention;
3833 locations->SetInAt(0, Location::RegisterPairLocation(
3834 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3835 locations->SetInAt(1, Location::RegisterPairLocation(
3836 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3837 // Runtime helper puts the result in EAX, EDX.
3838 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3839 break;
3840 }
3841 case DataType::Type::kFloat32:
3842 case DataType::Type::kFloat64: {
3843 locations->SetInAt(0, Location::RequiresFpuRegister());
3844 if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3845 DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
3846 } else if (div->InputAt(1)->IsConstant()) {
3847 locations->SetInAt(1, Location::RequiresFpuRegister());
3848 } else {
3849 locations->SetInAt(1, Location::Any());
3850 }
3851 locations->SetOut(Location::SameAsFirstInput());
3852 break;
3853 }
3854
3855 default:
3856 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3857 }
3858 }
3859
VisitDiv(HDiv * div)3860 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
3861 LocationSummary* locations = div->GetLocations();
3862 Location first = locations->InAt(0);
3863 Location second = locations->InAt(1);
3864
3865 switch (div->GetResultType()) {
3866 case DataType::Type::kInt32:
3867 case DataType::Type::kInt64: {
3868 GenerateDivRemIntegral(div);
3869 break;
3870 }
3871
3872 case DataType::Type::kFloat32: {
3873 if (second.IsFpuRegister()) {
3874 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3875 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3876 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3877 DCHECK(const_area->IsEmittedAtUseSite());
3878 __ divss(first.AsFpuRegister<XmmRegister>(),
3879 codegen_->LiteralFloatAddress(
3880 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3881 const_area->GetBaseMethodAddress(),
3882 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3883 } else {
3884 DCHECK(second.IsStackSlot());
3885 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3886 }
3887 break;
3888 }
3889
3890 case DataType::Type::kFloat64: {
3891 if (second.IsFpuRegister()) {
3892 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3893 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3894 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3895 DCHECK(const_area->IsEmittedAtUseSite());
3896 __ divsd(first.AsFpuRegister<XmmRegister>(),
3897 codegen_->LiteralDoubleAddress(
3898 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3899 const_area->GetBaseMethodAddress(),
3900 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3901 } else {
3902 DCHECK(second.IsDoubleStackSlot());
3903 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3904 }
3905 break;
3906 }
3907
3908 default:
3909 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3910 }
3911 }
3912
VisitRem(HRem * rem)3913 void LocationsBuilderX86::VisitRem(HRem* rem) {
3914 DataType::Type type = rem->GetResultType();
3915
3916 LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
3917 ? LocationSummary::kCallOnMainOnly
3918 : LocationSummary::kNoCall;
3919 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
3920
3921 switch (type) {
3922 case DataType::Type::kInt32: {
3923 locations->SetInAt(0, Location::RegisterLocation(EAX));
3924 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3925 locations->SetOut(Location::RegisterLocation(EDX));
3926 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3927 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
3928 // output and request another temp.
3929 if (rem->InputAt(1)->IsIntConstant()) {
3930 locations->AddTemp(Location::RequiresRegister());
3931 }
3932 break;
3933 }
3934 case DataType::Type::kInt64: {
3935 InvokeRuntimeCallingConvention calling_convention;
3936 locations->SetInAt(0, Location::RegisterPairLocation(
3937 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3938 locations->SetInAt(1, Location::RegisterPairLocation(
3939 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3940 // Runtime helper puts the result in EAX, EDX.
3941 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3942 break;
3943 }
3944 case DataType::Type::kFloat64:
3945 case DataType::Type::kFloat32: {
3946 locations->SetInAt(0, Location::Any());
3947 locations->SetInAt(1, Location::Any());
3948 locations->SetOut(Location::RequiresFpuRegister());
3949 locations->AddTemp(Location::RegisterLocation(EAX));
3950 break;
3951 }
3952
3953 default:
3954 LOG(FATAL) << "Unexpected rem type " << type;
3955 }
3956 }
3957
VisitRem(HRem * rem)3958 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
3959 DataType::Type type = rem->GetResultType();
3960 switch (type) {
3961 case DataType::Type::kInt32:
3962 case DataType::Type::kInt64: {
3963 GenerateDivRemIntegral(rem);
3964 break;
3965 }
3966 case DataType::Type::kFloat32:
3967 case DataType::Type::kFloat64: {
3968 GenerateRemFP(rem);
3969 break;
3970 }
3971 default:
3972 LOG(FATAL) << "Unexpected rem type " << type;
3973 }
3974 }
3975
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)3976 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
3977 LocationSummary* locations = new (allocator) LocationSummary(minmax);
3978 switch (minmax->GetResultType()) {
3979 case DataType::Type::kInt32:
3980 locations->SetInAt(0, Location::RequiresRegister());
3981 locations->SetInAt(1, Location::RequiresRegister());
3982 locations->SetOut(Location::SameAsFirstInput());
3983 break;
3984 case DataType::Type::kInt64:
3985 locations->SetInAt(0, Location::RequiresRegister());
3986 locations->SetInAt(1, Location::RequiresRegister());
3987 locations->SetOut(Location::SameAsFirstInput());
3988 // Register to use to perform a long subtract to set cc.
3989 locations->AddTemp(Location::RequiresRegister());
3990 break;
3991 case DataType::Type::kFloat32:
3992 locations->SetInAt(0, Location::RequiresFpuRegister());
3993 locations->SetInAt(1, Location::RequiresFpuRegister());
3994 locations->SetOut(Location::SameAsFirstInput());
3995 locations->AddTemp(Location::RequiresRegister());
3996 break;
3997 case DataType::Type::kFloat64:
3998 locations->SetInAt(0, Location::RequiresFpuRegister());
3999 locations->SetInAt(1, Location::RequiresFpuRegister());
4000 locations->SetOut(Location::SameAsFirstInput());
4001 break;
4002 default:
4003 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4004 }
4005 }
4006
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4007 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4008 bool is_min,
4009 DataType::Type type) {
4010 Location op1_loc = locations->InAt(0);
4011 Location op2_loc = locations->InAt(1);
4012
4013 // Shortcut for same input locations.
4014 if (op1_loc.Equals(op2_loc)) {
4015 // Can return immediately, as op1_loc == out_loc.
4016 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4017 // a copy here.
4018 DCHECK(locations->Out().Equals(op1_loc));
4019 return;
4020 }
4021
4022 if (type == DataType::Type::kInt64) {
4023 // Need to perform a subtract to get the sign right.
4024 // op1 is already in the same location as the output.
4025 Location output = locations->Out();
4026 Register output_lo = output.AsRegisterPairLow<Register>();
4027 Register output_hi = output.AsRegisterPairHigh<Register>();
4028
4029 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4030 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4031
4032 // The comparison is performed by subtracting the second operand from
4033 // the first operand and then setting the status flags in the same
4034 // manner as the SUB instruction."
4035 __ cmpl(output_lo, op2_lo);
4036
4037 // Now use a temp and the borrow to finish the subtraction of op2_hi.
4038 Register temp = locations->GetTemp(0).AsRegister<Register>();
4039 __ movl(temp, output_hi);
4040 __ sbbl(temp, op2_hi);
4041
4042 // Now the condition code is correct.
4043 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4044 __ cmovl(cond, output_lo, op2_lo);
4045 __ cmovl(cond, output_hi, op2_hi);
4046 } else {
4047 DCHECK_EQ(type, DataType::Type::kInt32);
4048 Register out = locations->Out().AsRegister<Register>();
4049 Register op2 = op2_loc.AsRegister<Register>();
4050
4051 // (out := op1)
4052 // out <=? op2
4053 // if out is min jmp done
4054 // out := op2
4055 // done:
4056
4057 __ cmpl(out, op2);
4058 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4059 __ cmovl(cond, out, op2);
4060 }
4061 }
4062
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4063 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4064 bool is_min,
4065 DataType::Type type) {
4066 Location op1_loc = locations->InAt(0);
4067 Location op2_loc = locations->InAt(1);
4068 Location out_loc = locations->Out();
4069 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4070
4071 // Shortcut for same input locations.
4072 if (op1_loc.Equals(op2_loc)) {
4073 DCHECK(out_loc.Equals(op1_loc));
4074 return;
4075 }
4076
4077 // (out := op1)
4078 // out <=? op2
4079 // if Nan jmp Nan_label
4080 // if out is min jmp done
4081 // if op2 is min jmp op2_label
4082 // handle -0/+0
4083 // jmp done
4084 // Nan_label:
4085 // out := NaN
4086 // op2_label:
4087 // out := op2
4088 // done:
4089 //
4090 // This removes one jmp, but needs to copy one input (op1) to out.
4091 //
4092 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4093
4094 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4095
4096 NearLabel nan, done, op2_label;
4097 if (type == DataType::Type::kFloat64) {
4098 __ ucomisd(out, op2);
4099 } else {
4100 DCHECK_EQ(type, DataType::Type::kFloat32);
4101 __ ucomiss(out, op2);
4102 }
4103
4104 __ j(Condition::kParityEven, &nan);
4105
4106 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4107 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4108
4109 // Handle 0.0/-0.0.
4110 if (is_min) {
4111 if (type == DataType::Type::kFloat64) {
4112 __ orpd(out, op2);
4113 } else {
4114 __ orps(out, op2);
4115 }
4116 } else {
4117 if (type == DataType::Type::kFloat64) {
4118 __ andpd(out, op2);
4119 } else {
4120 __ andps(out, op2);
4121 }
4122 }
4123 __ jmp(&done);
4124
4125 // NaN handling.
4126 __ Bind(&nan);
4127 if (type == DataType::Type::kFloat64) {
4128 // TODO: Use a constant from the constant table (requires extra input).
4129 __ LoadLongConstant(out, kDoubleNaN);
4130 } else {
4131 Register constant = locations->GetTemp(0).AsRegister<Register>();
4132 __ movl(constant, Immediate(kFloatNaN));
4133 __ movd(out, constant);
4134 }
4135 __ jmp(&done);
4136
4137 // out := op2;
4138 __ Bind(&op2_label);
4139 if (type == DataType::Type::kFloat64) {
4140 __ movsd(out, op2);
4141 } else {
4142 __ movss(out, op2);
4143 }
4144
4145 // Done.
4146 __ Bind(&done);
4147 }
4148
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4149 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4150 DataType::Type type = minmax->GetResultType();
4151 switch (type) {
4152 case DataType::Type::kInt32:
4153 case DataType::Type::kInt64:
4154 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4155 break;
4156 case DataType::Type::kFloat32:
4157 case DataType::Type::kFloat64:
4158 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4159 break;
4160 default:
4161 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4162 }
4163 }
4164
VisitMin(HMin * min)4165 void LocationsBuilderX86::VisitMin(HMin* min) {
4166 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4167 }
4168
VisitMin(HMin * min)4169 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4170 GenerateMinMax(min, /*is_min*/ true);
4171 }
4172
VisitMax(HMax * max)4173 void LocationsBuilderX86::VisitMax(HMax* max) {
4174 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4175 }
4176
VisitMax(HMax * max)4177 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4178 GenerateMinMax(max, /*is_min*/ false);
4179 }
4180
VisitAbs(HAbs * abs)4181 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4182 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4183 switch (abs->GetResultType()) {
4184 case DataType::Type::kInt32:
4185 locations->SetInAt(0, Location::RegisterLocation(EAX));
4186 locations->SetOut(Location::SameAsFirstInput());
4187 locations->AddTemp(Location::RegisterLocation(EDX));
4188 break;
4189 case DataType::Type::kInt64:
4190 locations->SetInAt(0, Location::RequiresRegister());
4191 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4192 locations->AddTemp(Location::RequiresRegister());
4193 break;
4194 case DataType::Type::kFloat32:
4195 locations->SetInAt(0, Location::RequiresFpuRegister());
4196 locations->SetOut(Location::SameAsFirstInput());
4197 locations->AddTemp(Location::RequiresFpuRegister());
4198 locations->AddTemp(Location::RequiresRegister());
4199 break;
4200 case DataType::Type::kFloat64:
4201 locations->SetInAt(0, Location::RequiresFpuRegister());
4202 locations->SetOut(Location::SameAsFirstInput());
4203 locations->AddTemp(Location::RequiresFpuRegister());
4204 break;
4205 default:
4206 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4207 }
4208 }
4209
VisitAbs(HAbs * abs)4210 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4211 LocationSummary* locations = abs->GetLocations();
4212 switch (abs->GetResultType()) {
4213 case DataType::Type::kInt32: {
4214 Register out = locations->Out().AsRegister<Register>();
4215 DCHECK_EQ(out, EAX);
4216 Register temp = locations->GetTemp(0).AsRegister<Register>();
4217 DCHECK_EQ(temp, EDX);
4218 // Sign extend EAX into EDX.
4219 __ cdq();
4220 // XOR EAX with sign.
4221 __ xorl(EAX, EDX);
4222 // Subtract out sign to correct.
4223 __ subl(EAX, EDX);
4224 // The result is in EAX.
4225 break;
4226 }
4227 case DataType::Type::kInt64: {
4228 Location input = locations->InAt(0);
4229 Register input_lo = input.AsRegisterPairLow<Register>();
4230 Register input_hi = input.AsRegisterPairHigh<Register>();
4231 Location output = locations->Out();
4232 Register output_lo = output.AsRegisterPairLow<Register>();
4233 Register output_hi = output.AsRegisterPairHigh<Register>();
4234 Register temp = locations->GetTemp(0).AsRegister<Register>();
4235 // Compute the sign into the temporary.
4236 __ movl(temp, input_hi);
4237 __ sarl(temp, Immediate(31));
4238 // Store the sign into the output.
4239 __ movl(output_lo, temp);
4240 __ movl(output_hi, temp);
4241 // XOR the input to the output.
4242 __ xorl(output_lo, input_lo);
4243 __ xorl(output_hi, input_hi);
4244 // Subtract the sign.
4245 __ subl(output_lo, temp);
4246 __ sbbl(output_hi, temp);
4247 break;
4248 }
4249 case DataType::Type::kFloat32: {
4250 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4251 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4252 Register constant = locations->GetTemp(1).AsRegister<Register>();
4253 __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4254 __ movd(temp, constant);
4255 __ andps(out, temp);
4256 break;
4257 }
4258 case DataType::Type::kFloat64: {
4259 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4260 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4261 // TODO: Use a constant from the constant table (requires extra input).
4262 __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4263 __ andpd(out, temp);
4264 break;
4265 }
4266 default:
4267 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4268 }
4269 }
4270
VisitDivZeroCheck(HDivZeroCheck * instruction)4271 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4272 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4273 switch (instruction->GetType()) {
4274 case DataType::Type::kBool:
4275 case DataType::Type::kUint8:
4276 case DataType::Type::kInt8:
4277 case DataType::Type::kUint16:
4278 case DataType::Type::kInt16:
4279 case DataType::Type::kInt32: {
4280 locations->SetInAt(0, Location::Any());
4281 break;
4282 }
4283 case DataType::Type::kInt64: {
4284 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4285 if (!instruction->IsConstant()) {
4286 locations->AddTemp(Location::RequiresRegister());
4287 }
4288 break;
4289 }
4290 default:
4291 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4292 }
4293 }
4294
VisitDivZeroCheck(HDivZeroCheck * instruction)4295 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4296 SlowPathCode* slow_path =
4297 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4298 codegen_->AddSlowPath(slow_path);
4299
4300 LocationSummary* locations = instruction->GetLocations();
4301 Location value = locations->InAt(0);
4302
4303 switch (instruction->GetType()) {
4304 case DataType::Type::kBool:
4305 case DataType::Type::kUint8:
4306 case DataType::Type::kInt8:
4307 case DataType::Type::kUint16:
4308 case DataType::Type::kInt16:
4309 case DataType::Type::kInt32: {
4310 if (value.IsRegister()) {
4311 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4312 __ j(kEqual, slow_path->GetEntryLabel());
4313 } else if (value.IsStackSlot()) {
4314 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4315 __ j(kEqual, slow_path->GetEntryLabel());
4316 } else {
4317 DCHECK(value.IsConstant()) << value;
4318 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4319 __ jmp(slow_path->GetEntryLabel());
4320 }
4321 }
4322 break;
4323 }
4324 case DataType::Type::kInt64: {
4325 if (value.IsRegisterPair()) {
4326 Register temp = locations->GetTemp(0).AsRegister<Register>();
4327 __ movl(temp, value.AsRegisterPairLow<Register>());
4328 __ orl(temp, value.AsRegisterPairHigh<Register>());
4329 __ j(kEqual, slow_path->GetEntryLabel());
4330 } else {
4331 DCHECK(value.IsConstant()) << value;
4332 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4333 __ jmp(slow_path->GetEntryLabel());
4334 }
4335 }
4336 break;
4337 }
4338 default:
4339 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4340 }
4341 }
4342
HandleShift(HBinaryOperation * op)4343 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4344 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4345
4346 LocationSummary* locations =
4347 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4348
4349 switch (op->GetResultType()) {
4350 case DataType::Type::kInt32:
4351 case DataType::Type::kInt64: {
4352 // Can't have Location::Any() and output SameAsFirstInput()
4353 locations->SetInAt(0, Location::RequiresRegister());
4354 // The shift count needs to be in CL or a constant.
4355 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4356 locations->SetOut(Location::SameAsFirstInput());
4357 break;
4358 }
4359 default:
4360 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4361 }
4362 }
4363
HandleShift(HBinaryOperation * op)4364 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4365 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4366
4367 LocationSummary* locations = op->GetLocations();
4368 Location first = locations->InAt(0);
4369 Location second = locations->InAt(1);
4370 DCHECK(first.Equals(locations->Out()));
4371
4372 switch (op->GetResultType()) {
4373 case DataType::Type::kInt32: {
4374 DCHECK(first.IsRegister());
4375 Register first_reg = first.AsRegister<Register>();
4376 if (second.IsRegister()) {
4377 Register second_reg = second.AsRegister<Register>();
4378 DCHECK_EQ(ECX, second_reg);
4379 if (op->IsShl()) {
4380 __ shll(first_reg, second_reg);
4381 } else if (op->IsShr()) {
4382 __ sarl(first_reg, second_reg);
4383 } else {
4384 __ shrl(first_reg, second_reg);
4385 }
4386 } else {
4387 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4388 if (shift == 0) {
4389 return;
4390 }
4391 Immediate imm(shift);
4392 if (op->IsShl()) {
4393 __ shll(first_reg, imm);
4394 } else if (op->IsShr()) {
4395 __ sarl(first_reg, imm);
4396 } else {
4397 __ shrl(first_reg, imm);
4398 }
4399 }
4400 break;
4401 }
4402 case DataType::Type::kInt64: {
4403 if (second.IsRegister()) {
4404 Register second_reg = second.AsRegister<Register>();
4405 DCHECK_EQ(ECX, second_reg);
4406 if (op->IsShl()) {
4407 GenerateShlLong(first, second_reg);
4408 } else if (op->IsShr()) {
4409 GenerateShrLong(first, second_reg);
4410 } else {
4411 GenerateUShrLong(first, second_reg);
4412 }
4413 } else {
4414 // Shift by a constant.
4415 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4416 // Nothing to do if the shift is 0, as the input is already the output.
4417 if (shift != 0) {
4418 if (op->IsShl()) {
4419 GenerateShlLong(first, shift);
4420 } else if (op->IsShr()) {
4421 GenerateShrLong(first, shift);
4422 } else {
4423 GenerateUShrLong(first, shift);
4424 }
4425 }
4426 }
4427 break;
4428 }
4429 default:
4430 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4431 }
4432 }
4433
GenerateShlLong(const Location & loc,int shift)4434 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4435 Register low = loc.AsRegisterPairLow<Register>();
4436 Register high = loc.AsRegisterPairHigh<Register>();
4437 if (shift == 1) {
4438 // This is just an addition.
4439 __ addl(low, low);
4440 __ adcl(high, high);
4441 } else if (shift == 32) {
4442 // Shift by 32 is easy. High gets low, and low gets 0.
4443 codegen_->EmitParallelMoves(
4444 loc.ToLow(),
4445 loc.ToHigh(),
4446 DataType::Type::kInt32,
4447 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4448 loc.ToLow(),
4449 DataType::Type::kInt32);
4450 } else if (shift > 32) {
4451 // Low part becomes 0. High part is low part << (shift-32).
4452 __ movl(high, low);
4453 __ shll(high, Immediate(shift - 32));
4454 __ xorl(low, low);
4455 } else {
4456 // Between 1 and 31.
4457 __ shld(high, low, Immediate(shift));
4458 __ shll(low, Immediate(shift));
4459 }
4460 }
4461
GenerateShlLong(const Location & loc,Register shifter)4462 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4463 NearLabel done;
4464 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4465 __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4466 __ testl(shifter, Immediate(32));
4467 __ j(kEqual, &done);
4468 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4469 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4470 __ Bind(&done);
4471 }
4472
GenerateShrLong(const Location & loc,int shift)4473 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4474 Register low = loc.AsRegisterPairLow<Register>();
4475 Register high = loc.AsRegisterPairHigh<Register>();
4476 if (shift == 32) {
4477 // Need to copy the sign.
4478 DCHECK_NE(low, high);
4479 __ movl(low, high);
4480 __ sarl(high, Immediate(31));
4481 } else if (shift > 32) {
4482 DCHECK_NE(low, high);
4483 // High part becomes sign. Low part is shifted by shift - 32.
4484 __ movl(low, high);
4485 __ sarl(high, Immediate(31));
4486 __ sarl(low, Immediate(shift - 32));
4487 } else {
4488 // Between 1 and 31.
4489 __ shrd(low, high, Immediate(shift));
4490 __ sarl(high, Immediate(shift));
4491 }
4492 }
4493
GenerateShrLong(const Location & loc,Register shifter)4494 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4495 NearLabel done;
4496 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4497 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4498 __ testl(shifter, Immediate(32));
4499 __ j(kEqual, &done);
4500 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4501 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4502 __ Bind(&done);
4503 }
4504
GenerateUShrLong(const Location & loc,int shift)4505 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4506 Register low = loc.AsRegisterPairLow<Register>();
4507 Register high = loc.AsRegisterPairHigh<Register>();
4508 if (shift == 32) {
4509 // Shift by 32 is easy. Low gets high, and high gets 0.
4510 codegen_->EmitParallelMoves(
4511 loc.ToHigh(),
4512 loc.ToLow(),
4513 DataType::Type::kInt32,
4514 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4515 loc.ToHigh(),
4516 DataType::Type::kInt32);
4517 } else if (shift > 32) {
4518 // Low part is high >> (shift - 32). High part becomes 0.
4519 __ movl(low, high);
4520 __ shrl(low, Immediate(shift - 32));
4521 __ xorl(high, high);
4522 } else {
4523 // Between 1 and 31.
4524 __ shrd(low, high, Immediate(shift));
4525 __ shrl(high, Immediate(shift));
4526 }
4527 }
4528
GenerateUShrLong(const Location & loc,Register shifter)4529 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4530 NearLabel done;
4531 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4532 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4533 __ testl(shifter, Immediate(32));
4534 __ j(kEqual, &done);
4535 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4536 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4537 __ Bind(&done);
4538 }
4539
VisitRor(HRor * ror)4540 void LocationsBuilderX86::VisitRor(HRor* ror) {
4541 LocationSummary* locations =
4542 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4543
4544 switch (ror->GetResultType()) {
4545 case DataType::Type::kInt64:
4546 // Add the temporary needed.
4547 locations->AddTemp(Location::RequiresRegister());
4548 FALLTHROUGH_INTENDED;
4549 case DataType::Type::kInt32:
4550 locations->SetInAt(0, Location::RequiresRegister());
4551 // The shift count needs to be in CL (unless it is a constant).
4552 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4553 locations->SetOut(Location::SameAsFirstInput());
4554 break;
4555 default:
4556 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4557 UNREACHABLE();
4558 }
4559 }
4560
VisitRor(HRor * ror)4561 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4562 LocationSummary* locations = ror->GetLocations();
4563 Location first = locations->InAt(0);
4564 Location second = locations->InAt(1);
4565
4566 if (ror->GetResultType() == DataType::Type::kInt32) {
4567 Register first_reg = first.AsRegister<Register>();
4568 if (second.IsRegister()) {
4569 Register second_reg = second.AsRegister<Register>();
4570 __ rorl(first_reg, second_reg);
4571 } else {
4572 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4573 __ rorl(first_reg, imm);
4574 }
4575 return;
4576 }
4577
4578 DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64);
4579 Register first_reg_lo = first.AsRegisterPairLow<Register>();
4580 Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4581 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4582 if (second.IsRegister()) {
4583 Register second_reg = second.AsRegister<Register>();
4584 DCHECK_EQ(second_reg, ECX);
4585 __ movl(temp_reg, first_reg_hi);
4586 __ shrd(first_reg_hi, first_reg_lo, second_reg);
4587 __ shrd(first_reg_lo, temp_reg, second_reg);
4588 __ movl(temp_reg, first_reg_hi);
4589 __ testl(second_reg, Immediate(32));
4590 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4591 __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4592 } else {
4593 int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4594 if (shift_amt == 0) {
4595 // Already fine.
4596 return;
4597 }
4598 if (shift_amt == 32) {
4599 // Just swap.
4600 __ movl(temp_reg, first_reg_lo);
4601 __ movl(first_reg_lo, first_reg_hi);
4602 __ movl(first_reg_hi, temp_reg);
4603 return;
4604 }
4605
4606 Immediate imm(shift_amt);
4607 // Save the constents of the low value.
4608 __ movl(temp_reg, first_reg_lo);
4609
4610 // Shift right into low, feeding bits from high.
4611 __ shrd(first_reg_lo, first_reg_hi, imm);
4612
4613 // Shift right into high, feeding bits from the original low.
4614 __ shrd(first_reg_hi, temp_reg, imm);
4615
4616 // Swap if needed.
4617 if (shift_amt > 32) {
4618 __ movl(temp_reg, first_reg_lo);
4619 __ movl(first_reg_lo, first_reg_hi);
4620 __ movl(first_reg_hi, temp_reg);
4621 }
4622 }
4623 }
4624
VisitShl(HShl * shl)4625 void LocationsBuilderX86::VisitShl(HShl* shl) {
4626 HandleShift(shl);
4627 }
4628
VisitShl(HShl * shl)4629 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
4630 HandleShift(shl);
4631 }
4632
VisitShr(HShr * shr)4633 void LocationsBuilderX86::VisitShr(HShr* shr) {
4634 HandleShift(shr);
4635 }
4636
VisitShr(HShr * shr)4637 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
4638 HandleShift(shr);
4639 }
4640
VisitUShr(HUShr * ushr)4641 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
4642 HandleShift(ushr);
4643 }
4644
VisitUShr(HUShr * ushr)4645 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
4646 HandleShift(ushr);
4647 }
4648
VisitNewInstance(HNewInstance * instruction)4649 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
4650 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4651 instruction, LocationSummary::kCallOnMainOnly);
4652 locations->SetOut(Location::RegisterLocation(EAX));
4653 InvokeRuntimeCallingConvention calling_convention;
4654 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4655 }
4656
VisitNewInstance(HNewInstance * instruction)4657 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
4658 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4659 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4660 DCHECK(!codegen_->IsLeafMethod());
4661 }
4662
VisitNewArray(HNewArray * instruction)4663 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
4664 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4665 instruction, LocationSummary::kCallOnMainOnly);
4666 locations->SetOut(Location::RegisterLocation(EAX));
4667 InvokeRuntimeCallingConvention calling_convention;
4668 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4669 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4670 }
4671
VisitNewArray(HNewArray * instruction)4672 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
4673 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4674 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4675 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4676 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4677 DCHECK(!codegen_->IsLeafMethod());
4678 }
4679
VisitParameterValue(HParameterValue * instruction)4680 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
4681 LocationSummary* locations =
4682 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4683 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4684 if (location.IsStackSlot()) {
4685 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4686 } else if (location.IsDoubleStackSlot()) {
4687 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4688 }
4689 locations->SetOut(location);
4690 }
4691
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4692 void InstructionCodeGeneratorX86::VisitParameterValue(
4693 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4694 }
4695
VisitCurrentMethod(HCurrentMethod * instruction)4696 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
4697 LocationSummary* locations =
4698 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4699 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4700 }
4701
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4702 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4703 }
4704
VisitClassTableGet(HClassTableGet * instruction)4705 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
4706 LocationSummary* locations =
4707 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4708 locations->SetInAt(0, Location::RequiresRegister());
4709 locations->SetOut(Location::RequiresRegister());
4710 }
4711
VisitClassTableGet(HClassTableGet * instruction)4712 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
4713 LocationSummary* locations = instruction->GetLocations();
4714 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4715 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4716 instruction->GetIndex(), kX86PointerSize).SizeValue();
4717 __ movl(locations->Out().AsRegister<Register>(),
4718 Address(locations->InAt(0).AsRegister<Register>(), method_offset));
4719 } else {
4720 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4721 instruction->GetIndex(), kX86PointerSize));
4722 __ movl(locations->Out().AsRegister<Register>(),
4723 Address(locations->InAt(0).AsRegister<Register>(),
4724 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
4725 // temp = temp->GetImtEntryAt(method_offset);
4726 __ movl(locations->Out().AsRegister<Register>(),
4727 Address(locations->Out().AsRegister<Register>(), method_offset));
4728 }
4729 }
4730
VisitNot(HNot * not_)4731 void LocationsBuilderX86::VisitNot(HNot* not_) {
4732 LocationSummary* locations =
4733 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4734 locations->SetInAt(0, Location::RequiresRegister());
4735 locations->SetOut(Location::SameAsFirstInput());
4736 }
4737
VisitNot(HNot * not_)4738 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
4739 LocationSummary* locations = not_->GetLocations();
4740 Location in = locations->InAt(0);
4741 Location out = locations->Out();
4742 DCHECK(in.Equals(out));
4743 switch (not_->GetResultType()) {
4744 case DataType::Type::kInt32:
4745 __ notl(out.AsRegister<Register>());
4746 break;
4747
4748 case DataType::Type::kInt64:
4749 __ notl(out.AsRegisterPairLow<Register>());
4750 __ notl(out.AsRegisterPairHigh<Register>());
4751 break;
4752
4753 default:
4754 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4755 }
4756 }
4757
VisitBooleanNot(HBooleanNot * bool_not)4758 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
4759 LocationSummary* locations =
4760 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4761 locations->SetInAt(0, Location::RequiresRegister());
4762 locations->SetOut(Location::SameAsFirstInput());
4763 }
4764
VisitBooleanNot(HBooleanNot * bool_not)4765 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
4766 LocationSummary* locations = bool_not->GetLocations();
4767 Location in = locations->InAt(0);
4768 Location out = locations->Out();
4769 DCHECK(in.Equals(out));
4770 __ xorl(out.AsRegister<Register>(), Immediate(1));
4771 }
4772
VisitCompare(HCompare * compare)4773 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
4774 LocationSummary* locations =
4775 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
4776 switch (compare->InputAt(0)->GetType()) {
4777 case DataType::Type::kBool:
4778 case DataType::Type::kUint8:
4779 case DataType::Type::kInt8:
4780 case DataType::Type::kUint16:
4781 case DataType::Type::kInt16:
4782 case DataType::Type::kInt32:
4783 case DataType::Type::kInt64: {
4784 locations->SetInAt(0, Location::RequiresRegister());
4785 locations->SetInAt(1, Location::Any());
4786 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4787 break;
4788 }
4789 case DataType::Type::kFloat32:
4790 case DataType::Type::kFloat64: {
4791 locations->SetInAt(0, Location::RequiresFpuRegister());
4792 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
4793 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
4794 } else if (compare->InputAt(1)->IsConstant()) {
4795 locations->SetInAt(1, Location::RequiresFpuRegister());
4796 } else {
4797 locations->SetInAt(1, Location::Any());
4798 }
4799 locations->SetOut(Location::RequiresRegister());
4800 break;
4801 }
4802 default:
4803 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4804 }
4805 }
4806
VisitCompare(HCompare * compare)4807 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
4808 LocationSummary* locations = compare->GetLocations();
4809 Register out = locations->Out().AsRegister<Register>();
4810 Location left = locations->InAt(0);
4811 Location right = locations->InAt(1);
4812
4813 NearLabel less, greater, done;
4814 Condition less_cond = kLess;
4815
4816 switch (compare->InputAt(0)->GetType()) {
4817 case DataType::Type::kBool:
4818 case DataType::Type::kUint8:
4819 case DataType::Type::kInt8:
4820 case DataType::Type::kUint16:
4821 case DataType::Type::kInt16:
4822 case DataType::Type::kInt32: {
4823 codegen_->GenerateIntCompare(left, right);
4824 break;
4825 }
4826 case DataType::Type::kInt64: {
4827 Register left_low = left.AsRegisterPairLow<Register>();
4828 Register left_high = left.AsRegisterPairHigh<Register>();
4829 int32_t val_low = 0;
4830 int32_t val_high = 0;
4831 bool right_is_const = false;
4832
4833 if (right.IsConstant()) {
4834 DCHECK(right.GetConstant()->IsLongConstant());
4835 right_is_const = true;
4836 int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
4837 val_low = Low32Bits(val);
4838 val_high = High32Bits(val);
4839 }
4840
4841 if (right.IsRegisterPair()) {
4842 __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
4843 } else if (right.IsDoubleStackSlot()) {
4844 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
4845 } else {
4846 DCHECK(right_is_const) << right;
4847 codegen_->Compare32BitValue(left_high, val_high);
4848 }
4849 __ j(kLess, &less); // Signed compare.
4850 __ j(kGreater, &greater); // Signed compare.
4851 if (right.IsRegisterPair()) {
4852 __ cmpl(left_low, right.AsRegisterPairLow<Register>());
4853 } else if (right.IsDoubleStackSlot()) {
4854 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
4855 } else {
4856 DCHECK(right_is_const) << right;
4857 codegen_->Compare32BitValue(left_low, val_low);
4858 }
4859 less_cond = kBelow; // for CF (unsigned).
4860 break;
4861 }
4862 case DataType::Type::kFloat32: {
4863 GenerateFPCompare(left, right, compare, false);
4864 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4865 less_cond = kBelow; // for CF (floats).
4866 break;
4867 }
4868 case DataType::Type::kFloat64: {
4869 GenerateFPCompare(left, right, compare, true);
4870 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4871 less_cond = kBelow; // for CF (floats).
4872 break;
4873 }
4874 default:
4875 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4876 }
4877
4878 __ movl(out, Immediate(0));
4879 __ j(kEqual, &done);
4880 __ j(less_cond, &less);
4881
4882 __ Bind(&greater);
4883 __ movl(out, Immediate(1));
4884 __ jmp(&done);
4885
4886 __ Bind(&less);
4887 __ movl(out, Immediate(-1));
4888
4889 __ Bind(&done);
4890 }
4891
VisitPhi(HPhi * instruction)4892 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
4893 LocationSummary* locations =
4894 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4895 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4896 locations->SetInAt(i, Location::Any());
4897 }
4898 locations->SetOut(Location::Any());
4899 }
4900
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4901 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4902 LOG(FATAL) << "Unreachable";
4903 }
4904
GenerateMemoryBarrier(MemBarrierKind kind)4905 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
4906 /*
4907 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4908 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
4909 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4910 */
4911 switch (kind) {
4912 case MemBarrierKind::kAnyAny: {
4913 MemoryFence();
4914 break;
4915 }
4916 case MemBarrierKind::kAnyStore:
4917 case MemBarrierKind::kLoadAny:
4918 case MemBarrierKind::kStoreStore: {
4919 // nop
4920 break;
4921 }
4922 case MemBarrierKind::kNTStoreStore:
4923 // Non-Temporal Store/Store needs an explicit fence.
4924 MemoryFence(/* non-temporal= */ true);
4925 break;
4926 }
4927 }
4928
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)4929 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
4930 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4931 ArtMethod* method ATTRIBUTE_UNUSED) {
4932 return desired_dispatch_info;
4933 }
4934
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)4935 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
4936 Register temp) {
4937 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
4938 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4939 if (!invoke->GetLocations()->Intrinsified()) {
4940 return location.AsRegister<Register>();
4941 }
4942 // For intrinsics we allow any location, so it may be on the stack.
4943 if (!location.IsRegister()) {
4944 __ movl(temp, Address(ESP, location.GetStackIndex()));
4945 return temp;
4946 }
4947 // For register locations, check if the register was saved. If so, get it from the stack.
4948 // Note: There is a chance that the register was saved but not overwritten, so we could
4949 // save one load. However, since this is just an intrinsic slow path we prefer this
4950 // simple and more robust approach rather that trying to determine if that's the case.
4951 SlowPathCode* slow_path = GetCurrentSlowPath();
4952 DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
4953 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
4954 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
4955 __ movl(temp, Address(ESP, stack_offset));
4956 return temp;
4957 }
4958 return location.AsRegister<Register>();
4959 }
4960
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4961 void CodeGeneratorX86::GenerateStaticOrDirectCall(
4962 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4963 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
4964 switch (invoke->GetMethodLoadKind()) {
4965 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4966 // temp = thread->string_init_entrypoint
4967 uint32_t offset =
4968 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4969 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
4970 break;
4971 }
4972 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4973 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4974 break;
4975 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
4976 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4977 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4978 temp.AsRegister<Register>());
4979 __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset));
4980 RecordBootImageMethodPatch(invoke);
4981 break;
4982 }
4983 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
4984 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4985 temp.AsRegister<Register>());
4986 __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
4987 RecordBootImageRelRoPatch(
4988 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(),
4989 GetBootImageOffset(invoke));
4990 break;
4991 }
4992 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
4993 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4994 temp.AsRegister<Register>());
4995 __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
4996 RecordMethodBssEntryPatch(invoke);
4997 // No need for memory fence, thanks to the x86 memory model.
4998 break;
4999 }
5000 case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
5001 __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
5002 break;
5003 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
5004 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5005 return; // No code pointer retrieval; the runtime performs the call directly.
5006 }
5007 }
5008
5009 switch (invoke->GetCodePtrLocation()) {
5010 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
5011 __ call(GetFrameEntryLabel());
5012 break;
5013 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
5014 // (callee_method + offset_of_quick_compiled_code)()
5015 __ call(Address(callee_method.AsRegister<Register>(),
5016 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5017 kX86PointerSize).Int32Value()));
5018 break;
5019 }
5020 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5021
5022 DCHECK(!IsLeafMethod());
5023 }
5024
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5025 void CodeGeneratorX86::GenerateVirtualCall(
5026 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5027 Register temp = temp_in.AsRegister<Register>();
5028 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5029 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5030
5031 // Use the calling convention instead of the location of the receiver, as
5032 // intrinsics may have put the receiver in a different register. In the intrinsics
5033 // slow path, the arguments have been moved to the right place, so here we are
5034 // guaranteed that the receiver is the first register of the calling convention.
5035 InvokeDexCallingConvention calling_convention;
5036 Register receiver = calling_convention.GetRegisterAt(0);
5037 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5038 // /* HeapReference<Class> */ temp = receiver->klass_
5039 __ movl(temp, Address(receiver, class_offset));
5040 MaybeRecordImplicitNullCheck(invoke);
5041 // Instead of simply (possibly) unpoisoning `temp` here, we should
5042 // emit a read barrier for the previous class reference load.
5043 // However this is not required in practice, as this is an
5044 // intermediate/temporary reference and because the current
5045 // concurrent copying collector keeps the from-space memory
5046 // intact/accessible until the end of the marking phase (the
5047 // concurrent copying collector may not in the future).
5048 __ MaybeUnpoisonHeapReference(temp);
5049
5050 MaybeGenerateInlineCacheCheck(invoke, temp);
5051
5052 // temp = temp->GetMethodAt(method_offset);
5053 __ movl(temp, Address(temp, method_offset));
5054 // call temp->GetEntryPoint();
5055 __ call(Address(
5056 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5057 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5058 }
5059
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5060 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5061 uint32_t intrinsic_data) {
5062 boot_image_other_patches_.emplace_back(
5063 method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5064 __ Bind(&boot_image_other_patches_.back().label);
5065 }
5066
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5067 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5068 uint32_t boot_image_offset) {
5069 boot_image_other_patches_.emplace_back(
5070 method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5071 __ Bind(&boot_image_other_patches_.back().label);
5072 }
5073
RecordBootImageMethodPatch(HInvokeStaticOrDirect * invoke)5074 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
5075 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5076 HX86ComputeBaseMethodAddress* method_address =
5077 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5078 boot_image_method_patches_.emplace_back(
5079 method_address, invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
5080 __ Bind(&boot_image_method_patches_.back().label);
5081 }
5082
RecordMethodBssEntryPatch(HInvokeStaticOrDirect * invoke)5083 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
5084 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5085 HX86ComputeBaseMethodAddress* method_address =
5086 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5087 // Add the patch entry and bind its label at the end of the instruction.
5088 method_bss_entry_patches_.emplace_back(
5089 method_address, &GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
5090 __ Bind(&method_bss_entry_patches_.back().label);
5091 }
5092
RecordBootImageTypePatch(HLoadClass * load_class)5093 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5094 HX86ComputeBaseMethodAddress* method_address =
5095 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5096 boot_image_type_patches_.emplace_back(
5097 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5098 __ Bind(&boot_image_type_patches_.back().label);
5099 }
5100
NewTypeBssEntryPatch(HLoadClass * load_class)5101 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5102 HX86ComputeBaseMethodAddress* method_address =
5103 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5104 type_bss_entry_patches_.emplace_back(
5105 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5106 return &type_bss_entry_patches_.back().label;
5107 }
5108
RecordBootImageStringPatch(HLoadString * load_string)5109 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5110 HX86ComputeBaseMethodAddress* method_address =
5111 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5112 boot_image_string_patches_.emplace_back(
5113 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5114 __ Bind(&boot_image_string_patches_.back().label);
5115 }
5116
NewStringBssEntryPatch(HLoadString * load_string)5117 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5118 HX86ComputeBaseMethodAddress* method_address =
5119 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5120 string_bss_entry_patches_.emplace_back(
5121 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5122 return &string_bss_entry_patches_.back().label;
5123 }
5124
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5125 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5126 uint32_t boot_image_reference,
5127 HInvokeStaticOrDirect* invoke) {
5128 if (GetCompilerOptions().IsBootImage()) {
5129 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5130 HX86ComputeBaseMethodAddress* method_address =
5131 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5132 DCHECK(method_address != nullptr);
5133 Register method_address_reg =
5134 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5135 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
5136 RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5137 } else if (GetCompilerOptions().GetCompilePic()) {
5138 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5139 HX86ComputeBaseMethodAddress* method_address =
5140 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5141 DCHECK(method_address != nullptr);
5142 Register method_address_reg =
5143 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5144 __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
5145 RecordBootImageRelRoPatch(method_address, boot_image_reference);
5146 } else {
5147 DCHECK(Runtime::Current()->UseJitCompilation());
5148 gc::Heap* heap = Runtime::Current()->GetHeap();
5149 DCHECK(!heap->GetBootImageSpaces().empty());
5150 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5151 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5152 }
5153 }
5154
AllocateInstanceForIntrinsic(HInvokeStaticOrDirect * invoke,uint32_t boot_image_offset)5155 void CodeGeneratorX86::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
5156 uint32_t boot_image_offset) {
5157 DCHECK(invoke->IsStatic());
5158 InvokeRuntimeCallingConvention calling_convention;
5159 Register argument = calling_convention.GetRegisterAt(0);
5160 if (GetCompilerOptions().IsBootImage()) {
5161 DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
5162 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5163 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5164 HX86ComputeBaseMethodAddress* method_address =
5165 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5166 DCHECK(method_address != nullptr);
5167 Register method_address_reg =
5168 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5169 __ leal(argument, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
5170 MethodReference target_method = invoke->GetTargetMethod();
5171 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5172 boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5173 __ Bind(&boot_image_type_patches_.back().label);
5174 } else {
5175 LoadBootImageAddress(argument, boot_image_offset, invoke);
5176 }
5177 InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
5178 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5179 }
5180
5181 // The label points to the end of the "movl" or another instruction but the literal offset
5182 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5183 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5184
5185 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5186 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5187 const ArenaDeque<X86PcRelativePatchInfo>& infos,
5188 ArenaVector<linker::LinkerPatch>* linker_patches) {
5189 for (const X86PcRelativePatchInfo& info : infos) {
5190 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5191 linker_patches->push_back(Factory(literal_offset,
5192 info.target_dex_file,
5193 GetMethodAddressOffset(info.method_address),
5194 info.offset_or_index));
5195 }
5196 }
5197
5198 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5199 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5200 const DexFile* target_dex_file,
5201 uint32_t pc_insn_offset,
5202 uint32_t boot_image_offset) {
5203 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
5204 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5205 }
5206
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5207 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5208 DCHECK(linker_patches->empty());
5209 size_t size =
5210 boot_image_method_patches_.size() +
5211 method_bss_entry_patches_.size() +
5212 boot_image_type_patches_.size() +
5213 type_bss_entry_patches_.size() +
5214 boot_image_string_patches_.size() +
5215 string_bss_entry_patches_.size() +
5216 boot_image_other_patches_.size();
5217 linker_patches->reserve(size);
5218 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5219 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5220 boot_image_method_patches_, linker_patches);
5221 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5222 boot_image_type_patches_, linker_patches);
5223 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5224 boot_image_string_patches_, linker_patches);
5225 } else {
5226 DCHECK(boot_image_method_patches_.empty());
5227 DCHECK(boot_image_type_patches_.empty());
5228 DCHECK(boot_image_string_patches_.empty());
5229 }
5230 if (GetCompilerOptions().IsBootImage()) {
5231 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5232 boot_image_other_patches_, linker_patches);
5233 } else {
5234 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
5235 boot_image_other_patches_, linker_patches);
5236 }
5237 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5238 method_bss_entry_patches_, linker_patches);
5239 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5240 type_bss_entry_patches_, linker_patches);
5241 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5242 string_bss_entry_patches_, linker_patches);
5243 DCHECK_EQ(size, linker_patches->size());
5244 }
5245
MarkGCCard(Register temp,Register card,Register object,Register value,bool value_can_be_null)5246 void CodeGeneratorX86::MarkGCCard(Register temp,
5247 Register card,
5248 Register object,
5249 Register value,
5250 bool value_can_be_null) {
5251 NearLabel is_null;
5252 if (value_can_be_null) {
5253 __ testl(value, value);
5254 __ j(kEqual, &is_null);
5255 }
5256 // Load the address of the card table into `card`.
5257 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5258 // Calculate the offset (in the card table) of the card corresponding to
5259 // `object`.
5260 __ movl(temp, object);
5261 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5262 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5263 // `object`'s card.
5264 //
5265 // Register `card` contains the address of the card table. Note that the card
5266 // table's base is biased during its creation so that it always starts at an
5267 // address whose least-significant byte is equal to `kCardDirty` (see
5268 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5269 // below writes the `kCardDirty` (byte) value into the `object`'s card
5270 // (located at `card + object >> kCardShift`).
5271 //
5272 // This dual use of the value in register `card` (1. to calculate the location
5273 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5274 // (no need to explicitly load `kCardDirty` as an immediate value).
5275 __ movb(Address(temp, card, TIMES_1, 0),
5276 X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
5277 if (value_can_be_null) {
5278 __ Bind(&is_null);
5279 }
5280 }
5281
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5282 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
5283 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5284
5285 bool object_field_get_with_read_barrier =
5286 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5287 LocationSummary* locations =
5288 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5289 kEmitCompilerReadBarrier
5290 ? LocationSummary::kCallOnSlowPath
5291 : LocationSummary::kNoCall);
5292 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5293 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5294 }
5295 locations->SetInAt(0, Location::RequiresRegister());
5296
5297 if (DataType::IsFloatingPointType(instruction->GetType())) {
5298 locations->SetOut(Location::RequiresFpuRegister());
5299 } else {
5300 // The output overlaps in case of long: we don't want the low move
5301 // to overwrite the object's location. Likewise, in the case of
5302 // an object field get with read barriers enabled, we do not want
5303 // the move to overwrite the object's location, as we need it to emit
5304 // the read barrier.
5305 locations->SetOut(
5306 Location::RequiresRegister(),
5307 (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64) ?
5308 Location::kOutputOverlap :
5309 Location::kNoOutputOverlap);
5310 }
5311
5312 if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
5313 // Long values can be loaded atomically into an XMM using movsd.
5314 // So we use an XMM register as a temp to achieve atomicity (first
5315 // load the temp into the XMM and then copy the XMM into the
5316 // output, 32 bits at a time).
5317 locations->AddTemp(Location::RequiresFpuRegister());
5318 }
5319 }
5320
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5321 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
5322 const FieldInfo& field_info) {
5323 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5324
5325 LocationSummary* locations = instruction->GetLocations();
5326 Location base_loc = locations->InAt(0);
5327 Register base = base_loc.AsRegister<Register>();
5328 Location out = locations->Out();
5329 bool is_volatile = field_info.IsVolatile();
5330 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5331 DataType::Type load_type = instruction->GetType();
5332 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5333
5334 switch (load_type) {
5335 case DataType::Type::kBool:
5336 case DataType::Type::kUint8: {
5337 __ movzxb(out.AsRegister<Register>(), Address(base, offset));
5338 break;
5339 }
5340
5341 case DataType::Type::kInt8: {
5342 __ movsxb(out.AsRegister<Register>(), Address(base, offset));
5343 break;
5344 }
5345
5346 case DataType::Type::kUint16: {
5347 __ movzxw(out.AsRegister<Register>(), Address(base, offset));
5348 break;
5349 }
5350
5351 case DataType::Type::kInt16: {
5352 __ movsxw(out.AsRegister<Register>(), Address(base, offset));
5353 break;
5354 }
5355
5356 case DataType::Type::kInt32:
5357 __ movl(out.AsRegister<Register>(), Address(base, offset));
5358 break;
5359
5360 case DataType::Type::kReference: {
5361 // /* HeapReference<Object> */ out = *(base + offset)
5362 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5363 // Note that a potential implicit null check is handled in this
5364 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
5365 codegen_->GenerateFieldLoadWithBakerReadBarrier(
5366 instruction, out, base, offset, /* needs_null_check= */ true);
5367 if (is_volatile) {
5368 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5369 }
5370 } else {
5371 __ movl(out.AsRegister<Register>(), Address(base, offset));
5372 codegen_->MaybeRecordImplicitNullCheck(instruction);
5373 if (is_volatile) {
5374 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5375 }
5376 // If read barriers are enabled, emit read barriers other than
5377 // Baker's using a slow path (and also unpoison the loaded
5378 // reference, if heap poisoning is enabled).
5379 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5380 }
5381 break;
5382 }
5383
5384 case DataType::Type::kInt64: {
5385 if (is_volatile) {
5386 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5387 __ movsd(temp, Address(base, offset));
5388 codegen_->MaybeRecordImplicitNullCheck(instruction);
5389 __ movd(out.AsRegisterPairLow<Register>(), temp);
5390 __ psrlq(temp, Immediate(32));
5391 __ movd(out.AsRegisterPairHigh<Register>(), temp);
5392 } else {
5393 DCHECK_NE(base, out.AsRegisterPairLow<Register>());
5394 __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset));
5395 codegen_->MaybeRecordImplicitNullCheck(instruction);
5396 __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset));
5397 }
5398 break;
5399 }
5400
5401 case DataType::Type::kFloat32: {
5402 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5403 break;
5404 }
5405
5406 case DataType::Type::kFloat64: {
5407 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5408 break;
5409 }
5410
5411 case DataType::Type::kUint32:
5412 case DataType::Type::kUint64:
5413 case DataType::Type::kVoid:
5414 LOG(FATAL) << "Unreachable type " << load_type;
5415 UNREACHABLE();
5416 }
5417
5418 if (load_type == DataType::Type::kReference || load_type == DataType::Type::kInt64) {
5419 // Potential implicit null checks, in the case of reference or
5420 // long fields, are handled in the previous switch statement.
5421 } else {
5422 codegen_->MaybeRecordImplicitNullCheck(instruction);
5423 }
5424
5425 if (is_volatile) {
5426 if (load_type == DataType::Type::kReference) {
5427 // Memory barriers, in the case of references, are also handled
5428 // in the previous switch statement.
5429 } else {
5430 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5431 }
5432 }
5433 }
5434
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5435 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
5436 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5437
5438 LocationSummary* locations =
5439 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5440 locations->SetInAt(0, Location::RequiresRegister());
5441 bool is_volatile = field_info.IsVolatile();
5442 DataType::Type field_type = field_info.GetFieldType();
5443 bool is_byte_type = DataType::Size(field_type) == 1u;
5444
5445 // The register allocator does not support multiple
5446 // inputs that die at entry with one in a specific register.
5447 if (is_byte_type) {
5448 // Ensure the value is in a byte register.
5449 locations->SetInAt(1, Location::RegisterLocation(EAX));
5450 } else if (DataType::IsFloatingPointType(field_type)) {
5451 if (is_volatile && field_type == DataType::Type::kFloat64) {
5452 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5453 locations->SetInAt(1, Location::RequiresFpuRegister());
5454 } else {
5455 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5456 }
5457 } else if (is_volatile && field_type == DataType::Type::kInt64) {
5458 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5459 locations->SetInAt(1, Location::RequiresRegister());
5460
5461 // 64bits value can be atomically written to an address with movsd and an XMM register.
5462 // We need two XMM registers because there's no easier way to (bit) copy a register pair
5463 // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
5464 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
5465 // isolated cases when we need this it isn't worth adding the extra complexity.
5466 locations->AddTemp(Location::RequiresFpuRegister());
5467 locations->AddTemp(Location::RequiresFpuRegister());
5468 } else {
5469 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5470
5471 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5472 // Temporary registers for the write barrier.
5473 locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too.
5474 // Ensure the card is in a byte register.
5475 locations->AddTemp(Location::RegisterLocation(ECX));
5476 }
5477 }
5478 }
5479
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5480 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
5481 const FieldInfo& field_info,
5482 bool value_can_be_null) {
5483 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5484
5485 LocationSummary* locations = instruction->GetLocations();
5486 Register base = locations->InAt(0).AsRegister<Register>();
5487 Location value = locations->InAt(1);
5488 bool is_volatile = field_info.IsVolatile();
5489 DataType::Type field_type = field_info.GetFieldType();
5490 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5491 bool needs_write_barrier =
5492 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5493
5494 if (is_volatile) {
5495 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5496 }
5497
5498 bool maybe_record_implicit_null_check_done = false;
5499
5500 switch (field_type) {
5501 case DataType::Type::kBool:
5502 case DataType::Type::kUint8:
5503 case DataType::Type::kInt8: {
5504 __ movb(Address(base, offset), value.AsRegister<ByteRegister>());
5505 break;
5506 }
5507
5508 case DataType::Type::kUint16:
5509 case DataType::Type::kInt16: {
5510 if (value.IsConstant()) {
5511 __ movw(Address(base, offset),
5512 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5513 } else {
5514 __ movw(Address(base, offset), value.AsRegister<Register>());
5515 }
5516 break;
5517 }
5518
5519 case DataType::Type::kInt32:
5520 case DataType::Type::kReference: {
5521 if (kPoisonHeapReferences && needs_write_barrier) {
5522 // Note that in the case where `value` is a null reference,
5523 // we do not enter this block, as the reference does not
5524 // need poisoning.
5525 DCHECK_EQ(field_type, DataType::Type::kReference);
5526 Register temp = locations->GetTemp(0).AsRegister<Register>();
5527 __ movl(temp, value.AsRegister<Register>());
5528 __ PoisonHeapReference(temp);
5529 __ movl(Address(base, offset), temp);
5530 } else if (value.IsConstant()) {
5531 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5532 __ movl(Address(base, offset), Immediate(v));
5533 } else {
5534 DCHECK(value.IsRegister()) << value;
5535 __ movl(Address(base, offset), value.AsRegister<Register>());
5536 }
5537 break;
5538 }
5539
5540 case DataType::Type::kInt64: {
5541 if (is_volatile) {
5542 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5543 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
5544 __ movd(temp1, value.AsRegisterPairLow<Register>());
5545 __ movd(temp2, value.AsRegisterPairHigh<Register>());
5546 __ punpckldq(temp1, temp2);
5547 __ movsd(Address(base, offset), temp1);
5548 codegen_->MaybeRecordImplicitNullCheck(instruction);
5549 } else if (value.IsConstant()) {
5550 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5551 __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5552 codegen_->MaybeRecordImplicitNullCheck(instruction);
5553 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5554 } else {
5555 __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
5556 codegen_->MaybeRecordImplicitNullCheck(instruction);
5557 __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
5558 }
5559 maybe_record_implicit_null_check_done = true;
5560 break;
5561 }
5562
5563 case DataType::Type::kFloat32: {
5564 if (value.IsConstant()) {
5565 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5566 __ movl(Address(base, offset), Immediate(v));
5567 } else {
5568 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5569 }
5570 break;
5571 }
5572
5573 case DataType::Type::kFloat64: {
5574 if (value.IsConstant()) {
5575 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5576 __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5577 codegen_->MaybeRecordImplicitNullCheck(instruction);
5578 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5579 maybe_record_implicit_null_check_done = true;
5580 } else {
5581 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5582 }
5583 break;
5584 }
5585
5586 case DataType::Type::kUint32:
5587 case DataType::Type::kUint64:
5588 case DataType::Type::kVoid:
5589 LOG(FATAL) << "Unreachable type " << field_type;
5590 UNREACHABLE();
5591 }
5592
5593 if (!maybe_record_implicit_null_check_done) {
5594 codegen_->MaybeRecordImplicitNullCheck(instruction);
5595 }
5596
5597 if (needs_write_barrier) {
5598 Register temp = locations->GetTemp(0).AsRegister<Register>();
5599 Register card = locations->GetTemp(1).AsRegister<Register>();
5600 codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
5601 }
5602
5603 if (is_volatile) {
5604 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5605 }
5606 }
5607
VisitStaticFieldGet(HStaticFieldGet * instruction)5608 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5609 HandleFieldGet(instruction, instruction->GetFieldInfo());
5610 }
5611
VisitStaticFieldGet(HStaticFieldGet * instruction)5612 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5613 HandleFieldGet(instruction, instruction->GetFieldInfo());
5614 }
5615
VisitStaticFieldSet(HStaticFieldSet * instruction)5616 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5617 HandleFieldSet(instruction, instruction->GetFieldInfo());
5618 }
5619
VisitStaticFieldSet(HStaticFieldSet * instruction)5620 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5621 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5622 }
5623
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5624 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5625 HandleFieldSet(instruction, instruction->GetFieldInfo());
5626 }
5627
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5628 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5629 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5630 }
5631
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5632 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5633 HandleFieldGet(instruction, instruction->GetFieldInfo());
5634 }
5635
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5636 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5637 HandleFieldGet(instruction, instruction->GetFieldInfo());
5638 }
5639
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5640 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5641 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
5642 }
5643
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5644 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5645 __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
5646 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5647 }
5648
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5649 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
5650 HUnresolvedInstanceFieldGet* instruction) {
5651 FieldAccessCallingConventionX86 calling_convention;
5652 codegen_->CreateUnresolvedFieldLocationSummary(
5653 instruction, instruction->GetFieldType(), calling_convention);
5654 }
5655
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5656 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
5657 HUnresolvedInstanceFieldGet* instruction) {
5658 FieldAccessCallingConventionX86 calling_convention;
5659 codegen_->GenerateUnresolvedFieldAccess(instruction,
5660 instruction->GetFieldType(),
5661 instruction->GetFieldIndex(),
5662 instruction->GetDexPc(),
5663 calling_convention);
5664 }
5665
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5666 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
5667 HUnresolvedInstanceFieldSet* instruction) {
5668 FieldAccessCallingConventionX86 calling_convention;
5669 codegen_->CreateUnresolvedFieldLocationSummary(
5670 instruction, instruction->GetFieldType(), calling_convention);
5671 }
5672
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5673 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
5674 HUnresolvedInstanceFieldSet* instruction) {
5675 FieldAccessCallingConventionX86 calling_convention;
5676 codegen_->GenerateUnresolvedFieldAccess(instruction,
5677 instruction->GetFieldType(),
5678 instruction->GetFieldIndex(),
5679 instruction->GetDexPc(),
5680 calling_convention);
5681 }
5682
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5683 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
5684 HUnresolvedStaticFieldGet* instruction) {
5685 FieldAccessCallingConventionX86 calling_convention;
5686 codegen_->CreateUnresolvedFieldLocationSummary(
5687 instruction, instruction->GetFieldType(), calling_convention);
5688 }
5689
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5690 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
5691 HUnresolvedStaticFieldGet* instruction) {
5692 FieldAccessCallingConventionX86 calling_convention;
5693 codegen_->GenerateUnresolvedFieldAccess(instruction,
5694 instruction->GetFieldType(),
5695 instruction->GetFieldIndex(),
5696 instruction->GetDexPc(),
5697 calling_convention);
5698 }
5699
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5700 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
5701 HUnresolvedStaticFieldSet* instruction) {
5702 FieldAccessCallingConventionX86 calling_convention;
5703 codegen_->CreateUnresolvedFieldLocationSummary(
5704 instruction, instruction->GetFieldType(), calling_convention);
5705 }
5706
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5707 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
5708 HUnresolvedStaticFieldSet* instruction) {
5709 FieldAccessCallingConventionX86 calling_convention;
5710 codegen_->GenerateUnresolvedFieldAccess(instruction,
5711 instruction->GetFieldType(),
5712 instruction->GetFieldIndex(),
5713 instruction->GetDexPc(),
5714 calling_convention);
5715 }
5716
VisitNullCheck(HNullCheck * instruction)5717 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
5718 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5719 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5720 ? Location::RequiresRegister()
5721 : Location::Any();
5722 locations->SetInAt(0, loc);
5723 }
5724
GenerateImplicitNullCheck(HNullCheck * instruction)5725 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
5726 if (CanMoveNullCheckToUser(instruction)) {
5727 return;
5728 }
5729 LocationSummary* locations = instruction->GetLocations();
5730 Location obj = locations->InAt(0);
5731
5732 __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
5733 RecordPcInfo(instruction, instruction->GetDexPc());
5734 }
5735
GenerateExplicitNullCheck(HNullCheck * instruction)5736 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
5737 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
5738 AddSlowPath(slow_path);
5739
5740 LocationSummary* locations = instruction->GetLocations();
5741 Location obj = locations->InAt(0);
5742
5743 if (obj.IsRegister()) {
5744 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
5745 } else if (obj.IsStackSlot()) {
5746 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
5747 } else {
5748 DCHECK(obj.IsConstant()) << obj;
5749 DCHECK(obj.GetConstant()->IsNullConstant());
5750 __ jmp(slow_path->GetEntryLabel());
5751 return;
5752 }
5753 __ j(kEqual, slow_path->GetEntryLabel());
5754 }
5755
VisitNullCheck(HNullCheck * instruction)5756 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
5757 codegen_->GenerateNullCheck(instruction);
5758 }
5759
VisitArrayGet(HArrayGet * instruction)5760 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
5761 bool object_array_get_with_read_barrier =
5762 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5763 LocationSummary* locations =
5764 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5765 object_array_get_with_read_barrier
5766 ? LocationSummary::kCallOnSlowPath
5767 : LocationSummary::kNoCall);
5768 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5769 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5770 }
5771 locations->SetInAt(0, Location::RequiresRegister());
5772 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5773 if (DataType::IsFloatingPointType(instruction->GetType())) {
5774 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5775 } else {
5776 // The output overlaps in case of long: we don't want the low move
5777 // to overwrite the array's location. Likewise, in the case of an
5778 // object array get with read barriers enabled, we do not want the
5779 // move to overwrite the array's location, as we need it to emit
5780 // the read barrier.
5781 locations->SetOut(
5782 Location::RequiresRegister(),
5783 (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
5784 ? Location::kOutputOverlap
5785 : Location::kNoOutputOverlap);
5786 }
5787 }
5788
VisitArrayGet(HArrayGet * instruction)5789 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
5790 LocationSummary* locations = instruction->GetLocations();
5791 Location obj_loc = locations->InAt(0);
5792 Register obj = obj_loc.AsRegister<Register>();
5793 Location index = locations->InAt(1);
5794 Location out_loc = locations->Out();
5795 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5796
5797 DataType::Type type = instruction->GetType();
5798 switch (type) {
5799 case DataType::Type::kBool:
5800 case DataType::Type::kUint8: {
5801 Register out = out_loc.AsRegister<Register>();
5802 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5803 break;
5804 }
5805
5806 case DataType::Type::kInt8: {
5807 Register out = out_loc.AsRegister<Register>();
5808 __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5809 break;
5810 }
5811
5812 case DataType::Type::kUint16: {
5813 Register out = out_loc.AsRegister<Register>();
5814 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5815 // Branch cases into compressed and uncompressed for each index's type.
5816 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5817 NearLabel done, not_compressed;
5818 __ testb(Address(obj, count_offset), Immediate(1));
5819 codegen_->MaybeRecordImplicitNullCheck(instruction);
5820 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5821 "Expecting 0=compressed, 1=uncompressed");
5822 __ j(kNotZero, ¬_compressed);
5823 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5824 __ jmp(&done);
5825 __ Bind(¬_compressed);
5826 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5827 __ Bind(&done);
5828 } else {
5829 // Common case for charAt of array of char or when string compression's
5830 // feature is turned off.
5831 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5832 }
5833 break;
5834 }
5835
5836 case DataType::Type::kInt16: {
5837 Register out = out_loc.AsRegister<Register>();
5838 __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5839 break;
5840 }
5841
5842 case DataType::Type::kInt32: {
5843 Register out = out_loc.AsRegister<Register>();
5844 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5845 break;
5846 }
5847
5848 case DataType::Type::kReference: {
5849 static_assert(
5850 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5851 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5852 // /* HeapReference<Object> */ out =
5853 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5854 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5855 // Note that a potential implicit null check is handled in this
5856 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
5857 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5858 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5859 } else {
5860 Register out = out_loc.AsRegister<Register>();
5861 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5862 codegen_->MaybeRecordImplicitNullCheck(instruction);
5863 // If read barriers are enabled, emit read barriers other than
5864 // Baker's using a slow path (and also unpoison the loaded
5865 // reference, if heap poisoning is enabled).
5866 if (index.IsConstant()) {
5867 uint32_t offset =
5868 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5869 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5870 } else {
5871 codegen_->MaybeGenerateReadBarrierSlow(
5872 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5873 }
5874 }
5875 break;
5876 }
5877
5878 case DataType::Type::kInt64: {
5879 DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
5880 __ movl(out_loc.AsRegisterPairLow<Register>(),
5881 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5882 codegen_->MaybeRecordImplicitNullCheck(instruction);
5883 __ movl(out_loc.AsRegisterPairHigh<Register>(),
5884 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize));
5885 break;
5886 }
5887
5888 case DataType::Type::kFloat32: {
5889 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5890 __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5891 break;
5892 }
5893
5894 case DataType::Type::kFloat64: {
5895 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5896 __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5897 break;
5898 }
5899
5900 case DataType::Type::kUint32:
5901 case DataType::Type::kUint64:
5902 case DataType::Type::kVoid:
5903 LOG(FATAL) << "Unreachable type " << type;
5904 UNREACHABLE();
5905 }
5906
5907 if (type == DataType::Type::kReference || type == DataType::Type::kInt64) {
5908 // Potential implicit null checks, in the case of reference or
5909 // long arrays, are handled in the previous switch statement.
5910 } else {
5911 codegen_->MaybeRecordImplicitNullCheck(instruction);
5912 }
5913 }
5914
VisitArraySet(HArraySet * instruction)5915 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
5916 DataType::Type value_type = instruction->GetComponentType();
5917
5918 bool needs_write_barrier =
5919 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5920 bool needs_type_check = instruction->NeedsTypeCheck();
5921
5922 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5923 instruction,
5924 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5925
5926 bool is_byte_type = DataType::Size(value_type) == 1u;
5927 // We need the inputs to be different than the output in case of long operation.
5928 // In case of a byte operation, the register allocator does not support multiple
5929 // inputs that die at entry with one in a specific register.
5930 locations->SetInAt(0, Location::RequiresRegister());
5931 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5932 if (is_byte_type) {
5933 // Ensure the value is in a byte register.
5934 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
5935 } else if (DataType::IsFloatingPointType(value_type)) {
5936 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5937 } else {
5938 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5939 }
5940 if (needs_write_barrier) {
5941 // Temporary registers for the write barrier.
5942 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
5943 // Ensure the card is in a byte register.
5944 locations->AddTemp(Location::RegisterLocation(ECX));
5945 }
5946 }
5947
VisitArraySet(HArraySet * instruction)5948 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
5949 LocationSummary* locations = instruction->GetLocations();
5950 Location array_loc = locations->InAt(0);
5951 Register array = array_loc.AsRegister<Register>();
5952 Location index = locations->InAt(1);
5953 Location value = locations->InAt(2);
5954 DataType::Type value_type = instruction->GetComponentType();
5955 bool needs_type_check = instruction->NeedsTypeCheck();
5956 bool needs_write_barrier =
5957 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5958
5959 switch (value_type) {
5960 case DataType::Type::kBool:
5961 case DataType::Type::kUint8:
5962 case DataType::Type::kInt8: {
5963 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5964 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
5965 if (value.IsRegister()) {
5966 __ movb(address, value.AsRegister<ByteRegister>());
5967 } else {
5968 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5969 }
5970 codegen_->MaybeRecordImplicitNullCheck(instruction);
5971 break;
5972 }
5973
5974 case DataType::Type::kUint16:
5975 case DataType::Type::kInt16: {
5976 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5977 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
5978 if (value.IsRegister()) {
5979 __ movw(address, value.AsRegister<Register>());
5980 } else {
5981 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5982 }
5983 codegen_->MaybeRecordImplicitNullCheck(instruction);
5984 break;
5985 }
5986
5987 case DataType::Type::kReference: {
5988 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5989 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5990
5991 if (!value.IsRegister()) {
5992 // Just setting null.
5993 DCHECK(instruction->InputAt(2)->IsNullConstant());
5994 DCHECK(value.IsConstant()) << value;
5995 __ movl(address, Immediate(0));
5996 codegen_->MaybeRecordImplicitNullCheck(instruction);
5997 DCHECK(!needs_write_barrier);
5998 DCHECK(!needs_type_check);
5999 break;
6000 }
6001
6002 DCHECK(needs_write_barrier);
6003 Register register_value = value.AsRegister<Register>();
6004 Location temp_loc = locations->GetTemp(0);
6005 Register temp = temp_loc.AsRegister<Register>();
6006
6007 bool can_value_be_null = instruction->GetValueCanBeNull();
6008 NearLabel do_store;
6009 if (can_value_be_null) {
6010 __ testl(register_value, register_value);
6011 __ j(kEqual, &do_store);
6012 }
6013
6014 SlowPathCode* slow_path = nullptr;
6015 if (needs_type_check) {
6016 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6017 codegen_->AddSlowPath(slow_path);
6018
6019 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6020 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6021 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6022
6023 // Note that when Baker read barriers are enabled, the type
6024 // checks are performed without read barriers. This is fine,
6025 // even in the case where a class object is in the from-space
6026 // after the flip, as a comparison involving such a type would
6027 // not produce a false positive; it may of course produce a
6028 // false negative, in which case we would take the ArraySet
6029 // slow path.
6030
6031 // /* HeapReference<Class> */ temp = array->klass_
6032 __ movl(temp, Address(array, class_offset));
6033 codegen_->MaybeRecordImplicitNullCheck(instruction);
6034 __ MaybeUnpoisonHeapReference(temp);
6035
6036 // /* HeapReference<Class> */ temp = temp->component_type_
6037 __ movl(temp, Address(temp, component_offset));
6038 // If heap poisoning is enabled, no need to unpoison `temp`
6039 // nor the object reference in `register_value->klass`, as
6040 // we are comparing two poisoned references.
6041 __ cmpl(temp, Address(register_value, class_offset));
6042
6043 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6044 NearLabel do_put;
6045 __ j(kEqual, &do_put);
6046 // If heap poisoning is enabled, the `temp` reference has
6047 // not been unpoisoned yet; unpoison it now.
6048 __ MaybeUnpoisonHeapReference(temp);
6049
6050 // If heap poisoning is enabled, no need to unpoison the
6051 // heap reference loaded below, as it is only used for a
6052 // comparison with null.
6053 __ cmpl(Address(temp, super_offset), Immediate(0));
6054 __ j(kNotEqual, slow_path->GetEntryLabel());
6055 __ Bind(&do_put);
6056 } else {
6057 __ j(kNotEqual, slow_path->GetEntryLabel());
6058 }
6059 }
6060
6061 Register card = locations->GetTemp(1).AsRegister<Register>();
6062 codegen_->MarkGCCard(
6063 temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false);
6064
6065 if (can_value_be_null) {
6066 DCHECK(do_store.IsLinked());
6067 __ Bind(&do_store);
6068 }
6069
6070 Register source = register_value;
6071 if (kPoisonHeapReferences) {
6072 __ movl(temp, register_value);
6073 __ PoisonHeapReference(temp);
6074 source = temp;
6075 }
6076
6077 __ movl(address, source);
6078
6079 if (can_value_be_null || !needs_type_check) {
6080 codegen_->MaybeRecordImplicitNullCheck(instruction);
6081 }
6082
6083 if (slow_path != nullptr) {
6084 __ Bind(slow_path->GetExitLabel());
6085 }
6086
6087 break;
6088 }
6089
6090 case DataType::Type::kInt32: {
6091 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6092 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6093 if (value.IsRegister()) {
6094 __ movl(address, value.AsRegister<Register>());
6095 } else {
6096 DCHECK(value.IsConstant()) << value;
6097 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6098 __ movl(address, Immediate(v));
6099 }
6100 codegen_->MaybeRecordImplicitNullCheck(instruction);
6101 break;
6102 }
6103
6104 case DataType::Type::kInt64: {
6105 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6106 if (value.IsRegisterPair()) {
6107 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6108 value.AsRegisterPairLow<Register>());
6109 codegen_->MaybeRecordImplicitNullCheck(instruction);
6110 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6111 value.AsRegisterPairHigh<Register>());
6112 } else {
6113 DCHECK(value.IsConstant());
6114 int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6115 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6116 Immediate(Low32Bits(val)));
6117 codegen_->MaybeRecordImplicitNullCheck(instruction);
6118 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6119 Immediate(High32Bits(val)));
6120 }
6121 break;
6122 }
6123
6124 case DataType::Type::kFloat32: {
6125 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6126 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6127 if (value.IsFpuRegister()) {
6128 __ movss(address, value.AsFpuRegister<XmmRegister>());
6129 } else {
6130 DCHECK(value.IsConstant());
6131 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6132 __ movl(address, Immediate(v));
6133 }
6134 codegen_->MaybeRecordImplicitNullCheck(instruction);
6135 break;
6136 }
6137
6138 case DataType::Type::kFloat64: {
6139 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6140 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6141 if (value.IsFpuRegister()) {
6142 __ movsd(address, value.AsFpuRegister<XmmRegister>());
6143 } else {
6144 DCHECK(value.IsConstant());
6145 Address address_hi =
6146 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6147 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6148 __ movl(address, Immediate(Low32Bits(v)));
6149 codegen_->MaybeRecordImplicitNullCheck(instruction);
6150 __ movl(address_hi, Immediate(High32Bits(v)));
6151 }
6152 break;
6153 }
6154
6155 case DataType::Type::kUint32:
6156 case DataType::Type::kUint64:
6157 case DataType::Type::kVoid:
6158 LOG(FATAL) << "Unreachable type " << instruction->GetType();
6159 UNREACHABLE();
6160 }
6161 }
6162
VisitArrayLength(HArrayLength * instruction)6163 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6164 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6165 locations->SetInAt(0, Location::RequiresRegister());
6166 if (!instruction->IsEmittedAtUseSite()) {
6167 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6168 }
6169 }
6170
VisitArrayLength(HArrayLength * instruction)6171 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6172 if (instruction->IsEmittedAtUseSite()) {
6173 return;
6174 }
6175
6176 LocationSummary* locations = instruction->GetLocations();
6177 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6178 Register obj = locations->InAt(0).AsRegister<Register>();
6179 Register out = locations->Out().AsRegister<Register>();
6180 __ movl(out, Address(obj, offset));
6181 codegen_->MaybeRecordImplicitNullCheck(instruction);
6182 // Mask out most significant bit in case the array is String's array of char.
6183 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6184 __ shrl(out, Immediate(1));
6185 }
6186 }
6187
VisitBoundsCheck(HBoundsCheck * instruction)6188 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6189 RegisterSet caller_saves = RegisterSet::Empty();
6190 InvokeRuntimeCallingConvention calling_convention;
6191 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6192 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6193 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6194 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6195 HInstruction* length = instruction->InputAt(1);
6196 if (!length->IsEmittedAtUseSite()) {
6197 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6198 }
6199 // Need register to see array's length.
6200 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6201 locations->AddTemp(Location::RequiresRegister());
6202 }
6203 }
6204
VisitBoundsCheck(HBoundsCheck * instruction)6205 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6206 const bool is_string_compressed_char_at =
6207 mirror::kUseStringCompression && instruction->IsStringCharAt();
6208 LocationSummary* locations = instruction->GetLocations();
6209 Location index_loc = locations->InAt(0);
6210 Location length_loc = locations->InAt(1);
6211 SlowPathCode* slow_path =
6212 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6213
6214 if (length_loc.IsConstant()) {
6215 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6216 if (index_loc.IsConstant()) {
6217 // BCE will remove the bounds check if we are guarenteed to pass.
6218 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6219 if (index < 0 || index >= length) {
6220 codegen_->AddSlowPath(slow_path);
6221 __ jmp(slow_path->GetEntryLabel());
6222 } else {
6223 // Some optimization after BCE may have generated this, and we should not
6224 // generate a bounds check if it is a valid range.
6225 }
6226 return;
6227 }
6228
6229 // We have to reverse the jump condition because the length is the constant.
6230 Register index_reg = index_loc.AsRegister<Register>();
6231 __ cmpl(index_reg, Immediate(length));
6232 codegen_->AddSlowPath(slow_path);
6233 __ j(kAboveEqual, slow_path->GetEntryLabel());
6234 } else {
6235 HInstruction* array_length = instruction->InputAt(1);
6236 if (array_length->IsEmittedAtUseSite()) {
6237 // Address the length field in the array.
6238 DCHECK(array_length->IsArrayLength());
6239 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6240 Location array_loc = array_length->GetLocations()->InAt(0);
6241 Address array_len(array_loc.AsRegister<Register>(), len_offset);
6242 if (is_string_compressed_char_at) {
6243 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6244 // the string compression flag) with the in-memory length and avoid the temporary.
6245 Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6246 __ movl(length_reg, array_len);
6247 codegen_->MaybeRecordImplicitNullCheck(array_length);
6248 __ shrl(length_reg, Immediate(1));
6249 codegen_->GenerateIntCompare(length_reg, index_loc);
6250 } else {
6251 // Checking bounds for general case:
6252 // Array of char or string's array with feature compression off.
6253 if (index_loc.IsConstant()) {
6254 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6255 __ cmpl(array_len, Immediate(value));
6256 } else {
6257 __ cmpl(array_len, index_loc.AsRegister<Register>());
6258 }
6259 codegen_->MaybeRecordImplicitNullCheck(array_length);
6260 }
6261 } else {
6262 codegen_->GenerateIntCompare(length_loc, index_loc);
6263 }
6264 codegen_->AddSlowPath(slow_path);
6265 __ j(kBelowEqual, slow_path->GetEntryLabel());
6266 }
6267 }
6268
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)6269 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
6270 LOG(FATAL) << "Unreachable";
6271 }
6272
VisitParallelMove(HParallelMove * instruction)6273 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6274 if (instruction->GetNext()->IsSuspendCheck() &&
6275 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6276 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6277 // The back edge will generate the suspend check.
6278 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6279 }
6280
6281 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6282 }
6283
VisitSuspendCheck(HSuspendCheck * instruction)6284 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6285 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6286 instruction, LocationSummary::kCallOnSlowPath);
6287 // In suspend check slow path, usually there are no caller-save registers at all.
6288 // If SIMD instructions are present, however, we force spilling all live SIMD
6289 // registers in full width (since the runtime only saves/restores lower part).
6290 locations->SetCustomSlowPathCallerSaves(
6291 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6292 }
6293
VisitSuspendCheck(HSuspendCheck * instruction)6294 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6295 HBasicBlock* block = instruction->GetBlock();
6296 if (block->GetLoopInformation() != nullptr) {
6297 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6298 // The back edge will generate the suspend check.
6299 return;
6300 }
6301 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6302 // The goto will generate the suspend check.
6303 return;
6304 }
6305 GenerateSuspendCheck(instruction, nullptr);
6306 }
6307
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6308 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
6309 HBasicBlock* successor) {
6310 SuspendCheckSlowPathX86* slow_path =
6311 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
6312 if (slow_path == nullptr) {
6313 slow_path =
6314 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
6315 instruction->SetSlowPath(slow_path);
6316 codegen_->AddSlowPath(slow_path);
6317 if (successor != nullptr) {
6318 DCHECK(successor->IsLoopHeader());
6319 }
6320 } else {
6321 DCHECK_EQ(slow_path->GetSuccessor(), successor);
6322 }
6323
6324 __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
6325 Immediate(0));
6326 if (successor == nullptr) {
6327 __ j(kNotEqual, slow_path->GetEntryLabel());
6328 __ Bind(slow_path->GetReturnLabel());
6329 } else {
6330 __ j(kEqual, codegen_->GetLabelOf(successor));
6331 __ jmp(slow_path->GetEntryLabel());
6332 }
6333 }
6334
GetAssembler() const6335 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
6336 return codegen_->GetAssembler();
6337 }
6338
MoveMemoryToMemory(int dst,int src,int number_of_words)6339 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
6340 ScratchRegisterScope ensure_scratch(
6341 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6342 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6343 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6344
6345 // Now that temp register is available (possibly spilled), move blocks of memory.
6346 for (int i = 0; i < number_of_words; i++) {
6347 __ movl(temp_reg, Address(ESP, src + stack_offset));
6348 __ movl(Address(ESP, dst + stack_offset), temp_reg);
6349 stack_offset += kX86WordSize;
6350 }
6351 }
6352
EmitMove(size_t index)6353 void ParallelMoveResolverX86::EmitMove(size_t index) {
6354 MoveOperands* move = moves_[index];
6355 Location source = move->GetSource();
6356 Location destination = move->GetDestination();
6357
6358 if (source.IsRegister()) {
6359 if (destination.IsRegister()) {
6360 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6361 } else if (destination.IsFpuRegister()) {
6362 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
6363 } else {
6364 DCHECK(destination.IsStackSlot());
6365 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
6366 }
6367 } else if (source.IsRegisterPair()) {
6368 size_t elem_size = DataType::Size(DataType::Type::kInt32);
6369 // Create stack space for 2 elements.
6370 __ subl(ESP, Immediate(2 * elem_size));
6371 __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
6372 __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
6373 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
6374 // And remove the temporary stack space we allocated.
6375 __ addl(ESP, Immediate(2 * elem_size));
6376 } else if (source.IsFpuRegister()) {
6377 if (destination.IsRegister()) {
6378 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
6379 } else if (destination.IsFpuRegister()) {
6380 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6381 } else if (destination.IsRegisterPair()) {
6382 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
6383 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
6384 __ psrlq(src_reg, Immediate(32));
6385 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
6386 } else if (destination.IsStackSlot()) {
6387 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6388 } else if (destination.IsDoubleStackSlot()) {
6389 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6390 } else {
6391 DCHECK(destination.IsSIMDStackSlot());
6392 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6393 }
6394 } else if (source.IsStackSlot()) {
6395 if (destination.IsRegister()) {
6396 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
6397 } else if (destination.IsFpuRegister()) {
6398 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6399 } else {
6400 DCHECK(destination.IsStackSlot());
6401 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6402 }
6403 } else if (source.IsDoubleStackSlot()) {
6404 if (destination.IsRegisterPair()) {
6405 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
6406 __ movl(destination.AsRegisterPairHigh<Register>(),
6407 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
6408 } else if (destination.IsFpuRegister()) {
6409 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6410 } else {
6411 DCHECK(destination.IsDoubleStackSlot()) << destination;
6412 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6413 }
6414 } else if (source.IsSIMDStackSlot()) {
6415 if (destination.IsFpuRegister()) {
6416 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6417 } else {
6418 DCHECK(destination.IsSIMDStackSlot());
6419 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6420 }
6421 } else if (source.IsConstant()) {
6422 HConstant* constant = source.GetConstant();
6423 if (constant->IsIntConstant() || constant->IsNullConstant()) {
6424 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6425 if (destination.IsRegister()) {
6426 if (value == 0) {
6427 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
6428 } else {
6429 __ movl(destination.AsRegister<Register>(), Immediate(value));
6430 }
6431 } else {
6432 DCHECK(destination.IsStackSlot()) << destination;
6433 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
6434 }
6435 } else if (constant->IsFloatConstant()) {
6436 float fp_value = constant->AsFloatConstant()->GetValue();
6437 int32_t value = bit_cast<int32_t, float>(fp_value);
6438 Immediate imm(value);
6439 if (destination.IsFpuRegister()) {
6440 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6441 if (value == 0) {
6442 // Easy handling of 0.0.
6443 __ xorps(dest, dest);
6444 } else {
6445 ScratchRegisterScope ensure_scratch(
6446 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6447 Register temp = static_cast<Register>(ensure_scratch.GetRegister());
6448 __ movl(temp, Immediate(value));
6449 __ movd(dest, temp);
6450 }
6451 } else {
6452 DCHECK(destination.IsStackSlot()) << destination;
6453 __ movl(Address(ESP, destination.GetStackIndex()), imm);
6454 }
6455 } else if (constant->IsLongConstant()) {
6456 int64_t value = constant->AsLongConstant()->GetValue();
6457 int32_t low_value = Low32Bits(value);
6458 int32_t high_value = High32Bits(value);
6459 Immediate low(low_value);
6460 Immediate high(high_value);
6461 if (destination.IsDoubleStackSlot()) {
6462 __ movl(Address(ESP, destination.GetStackIndex()), low);
6463 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6464 } else {
6465 __ movl(destination.AsRegisterPairLow<Register>(), low);
6466 __ movl(destination.AsRegisterPairHigh<Register>(), high);
6467 }
6468 } else {
6469 DCHECK(constant->IsDoubleConstant());
6470 double dbl_value = constant->AsDoubleConstant()->GetValue();
6471 int64_t value = bit_cast<int64_t, double>(dbl_value);
6472 int32_t low_value = Low32Bits(value);
6473 int32_t high_value = High32Bits(value);
6474 Immediate low(low_value);
6475 Immediate high(high_value);
6476 if (destination.IsFpuRegister()) {
6477 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6478 if (value == 0) {
6479 // Easy handling of 0.0.
6480 __ xorpd(dest, dest);
6481 } else {
6482 __ pushl(high);
6483 __ pushl(low);
6484 __ movsd(dest, Address(ESP, 0));
6485 __ addl(ESP, Immediate(8));
6486 }
6487 } else {
6488 DCHECK(destination.IsDoubleStackSlot()) << destination;
6489 __ movl(Address(ESP, destination.GetStackIndex()), low);
6490 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6491 }
6492 }
6493 } else {
6494 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
6495 }
6496 }
6497
Exchange(Register reg,int mem)6498 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
6499 Register suggested_scratch = reg == EAX ? EBX : EAX;
6500 ScratchRegisterScope ensure_scratch(
6501 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6502
6503 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6504 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
6505 __ movl(Address(ESP, mem + stack_offset), reg);
6506 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
6507 }
6508
Exchange32(XmmRegister reg,int mem)6509 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
6510 ScratchRegisterScope ensure_scratch(
6511 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6512
6513 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6514 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6515 __ movl(temp_reg, Address(ESP, mem + stack_offset));
6516 __ movss(Address(ESP, mem + stack_offset), reg);
6517 __ movd(reg, temp_reg);
6518 }
6519
Exchange128(XmmRegister reg,int mem)6520 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
6521 size_t extra_slot = 4 * kX86WordSize;
6522 __ subl(ESP, Immediate(extra_slot));
6523 __ movups(Address(ESP, 0), XmmRegister(reg));
6524 ExchangeMemory(0, mem + extra_slot, 4);
6525 __ movups(XmmRegister(reg), Address(ESP, 0));
6526 __ addl(ESP, Immediate(extra_slot));
6527 }
6528
ExchangeMemory(int mem1,int mem2,int number_of_words)6529 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
6530 ScratchRegisterScope ensure_scratch1(
6531 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6532
6533 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
6534 ScratchRegisterScope ensure_scratch2(
6535 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6536
6537 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
6538 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
6539
6540 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6541 for (int i = 0; i < number_of_words; i++) {
6542 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
6543 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
6544 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
6545 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
6546 stack_offset += kX86WordSize;
6547 }
6548 }
6549
EmitSwap(size_t index)6550 void ParallelMoveResolverX86::EmitSwap(size_t index) {
6551 MoveOperands* move = moves_[index];
6552 Location source = move->GetSource();
6553 Location destination = move->GetDestination();
6554
6555 if (source.IsRegister() && destination.IsRegister()) {
6556 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
6557 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
6558 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6559 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
6560 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6561 } else if (source.IsRegister() && destination.IsStackSlot()) {
6562 Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
6563 } else if (source.IsStackSlot() && destination.IsRegister()) {
6564 Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
6565 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6566 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6567 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6568 // Use XOR Swap algorithm to avoid a temporary.
6569 DCHECK_NE(source.reg(), destination.reg());
6570 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6571 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6572 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6573 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6574 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6575 } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
6576 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6577 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6578 // Take advantage of the 16 bytes in the XMM register.
6579 XmmRegister reg = source.AsFpuRegister<XmmRegister>();
6580 Address stack(ESP, destination.GetStackIndex());
6581 // Load the double into the high doubleword.
6582 __ movhpd(reg, stack);
6583
6584 // Store the low double into the destination.
6585 __ movsd(stack, reg);
6586
6587 // Move the high double to the low double.
6588 __ psrldq(reg, Immediate(8));
6589 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
6590 // Take advantage of the 16 bytes in the XMM register.
6591 XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
6592 Address stack(ESP, source.GetStackIndex());
6593 // Load the double into the high doubleword.
6594 __ movhpd(reg, stack);
6595
6596 // Store the low double into the destination.
6597 __ movsd(stack, reg);
6598
6599 // Move the high double to the low double.
6600 __ psrldq(reg, Immediate(8));
6601 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
6602 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6603 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6604 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6605 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6606 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6607 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6608 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6609 } else {
6610 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
6611 }
6612 }
6613
SpillScratch(int reg)6614 void ParallelMoveResolverX86::SpillScratch(int reg) {
6615 __ pushl(static_cast<Register>(reg));
6616 }
6617
RestoreScratch(int reg)6618 void ParallelMoveResolverX86::RestoreScratch(int reg) {
6619 __ popl(static_cast<Register>(reg));
6620 }
6621
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6622 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
6623 HLoadClass::LoadKind desired_class_load_kind) {
6624 switch (desired_class_load_kind) {
6625 case HLoadClass::LoadKind::kInvalid:
6626 LOG(FATAL) << "UNREACHABLE";
6627 UNREACHABLE();
6628 case HLoadClass::LoadKind::kReferrersClass:
6629 break;
6630 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6631 case HLoadClass::LoadKind::kBootImageRelRo:
6632 case HLoadClass::LoadKind::kBssEntry:
6633 DCHECK(!Runtime::Current()->UseJitCompilation());
6634 break;
6635 case HLoadClass::LoadKind::kJitBootImageAddress:
6636 case HLoadClass::LoadKind::kJitTableAddress:
6637 DCHECK(Runtime::Current()->UseJitCompilation());
6638 break;
6639 case HLoadClass::LoadKind::kRuntimeCall:
6640 break;
6641 }
6642 return desired_class_load_kind;
6643 }
6644
VisitLoadClass(HLoadClass * cls)6645 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
6646 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6647 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6648 InvokeRuntimeCallingConvention calling_convention;
6649 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6650 cls,
6651 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
6652 Location::RegisterLocation(EAX));
6653 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
6654 return;
6655 }
6656 DCHECK(!cls->NeedsAccessCheck());
6657
6658 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6659 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6660 ? LocationSummary::kCallOnSlowPath
6661 : LocationSummary::kNoCall;
6662 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6663 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6664 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6665 }
6666
6667 if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
6668 load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
6669 load_kind == HLoadClass::LoadKind::kBootImageRelRo ||
6670 load_kind == HLoadClass::LoadKind::kBssEntry) {
6671 locations->SetInAt(0, Location::RequiresRegister());
6672 }
6673 locations->SetOut(Location::RequiresRegister());
6674 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6675 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6676 // Rely on the type resolution and/or initialization to save everything.
6677 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6678 } else {
6679 // For non-Baker read barrier we have a temp-clobbering call.
6680 }
6681 }
6682 }
6683
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6684 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
6685 dex::TypeIndex type_index,
6686 Handle<mirror::Class> handle) {
6687 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6688 // Add a patch entry and return the label.
6689 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6690 PatchInfo<Label>* info = &jit_class_patches_.back();
6691 return &info->label;
6692 }
6693
6694 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6695 // move.
VisitLoadClass(HLoadClass * cls)6696 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6697 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6698 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6699 codegen_->GenerateLoadClassRuntimeCall(cls);
6700 return;
6701 }
6702 DCHECK(!cls->NeedsAccessCheck());
6703
6704 LocationSummary* locations = cls->GetLocations();
6705 Location out_loc = locations->Out();
6706 Register out = out_loc.AsRegister<Register>();
6707
6708 bool generate_null_check = false;
6709 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6710 ? kWithoutReadBarrier
6711 : kCompilerReadBarrierOption;
6712 switch (load_kind) {
6713 case HLoadClass::LoadKind::kReferrersClass: {
6714 DCHECK(!cls->CanCallRuntime());
6715 DCHECK(!cls->MustGenerateClinitCheck());
6716 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6717 Register current_method = locations->InAt(0).AsRegister<Register>();
6718 GenerateGcRootFieldLoad(
6719 cls,
6720 out_loc,
6721 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6722 /* fixup_label= */ nullptr,
6723 read_barrier_option);
6724 break;
6725 }
6726 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
6727 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6728 codegen_->GetCompilerOptions().IsBootImageExtension());
6729 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6730 Register method_address = locations->InAt(0).AsRegister<Register>();
6731 __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6732 codegen_->RecordBootImageTypePatch(cls);
6733 break;
6734 }
6735 case HLoadClass::LoadKind::kBootImageRelRo: {
6736 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6737 Register method_address = locations->InAt(0).AsRegister<Register>();
6738 __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6739 codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
6740 codegen_->GetBootImageOffset(cls));
6741 break;
6742 }
6743 case HLoadClass::LoadKind::kBssEntry: {
6744 Register method_address = locations->InAt(0).AsRegister<Register>();
6745 Address address(method_address, CodeGeneratorX86::kDummy32BitOffset);
6746 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6747 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6748 // No need for memory fence, thanks to the x86 memory model.
6749 generate_null_check = true;
6750 break;
6751 }
6752 case HLoadClass::LoadKind::kJitBootImageAddress: {
6753 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6754 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6755 DCHECK_NE(address, 0u);
6756 __ movl(out, Immediate(address));
6757 break;
6758 }
6759 case HLoadClass::LoadKind::kJitTableAddress: {
6760 Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
6761 Label* fixup_label = codegen_->NewJitRootClassPatch(
6762 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6763 // /* GcRoot<mirror::Class> */ out = *address
6764 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6765 break;
6766 }
6767 case HLoadClass::LoadKind::kRuntimeCall:
6768 case HLoadClass::LoadKind::kInvalid:
6769 LOG(FATAL) << "UNREACHABLE";
6770 UNREACHABLE();
6771 }
6772
6773 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6774 DCHECK(cls->CanCallRuntime());
6775 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
6776 codegen_->AddSlowPath(slow_path);
6777
6778 if (generate_null_check) {
6779 __ testl(out, out);
6780 __ j(kEqual, slow_path->GetEntryLabel());
6781 }
6782
6783 if (cls->MustGenerateClinitCheck()) {
6784 GenerateClassInitializationCheck(slow_path, out);
6785 } else {
6786 __ Bind(slow_path->GetExitLabel());
6787 }
6788 }
6789 }
6790
VisitLoadMethodHandle(HLoadMethodHandle * load)6791 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6792 InvokeRuntimeCallingConvention calling_convention;
6793 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
6794 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6795 }
6796
VisitLoadMethodHandle(HLoadMethodHandle * load)6797 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6798 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6799 }
6800
VisitLoadMethodType(HLoadMethodType * load)6801 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
6802 InvokeRuntimeCallingConvention calling_convention;
6803 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
6804 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6805 }
6806
VisitLoadMethodType(HLoadMethodType * load)6807 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
6808 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6809 }
6810
VisitClinitCheck(HClinitCheck * check)6811 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
6812 LocationSummary* locations =
6813 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6814 locations->SetInAt(0, Location::RequiresRegister());
6815 if (check->HasUses()) {
6816 locations->SetOut(Location::SameAsFirstInput());
6817 }
6818 // Rely on the type initialization to save everything we need.
6819 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6820 }
6821
VisitClinitCheck(HClinitCheck * check)6822 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
6823 // We assume the class to not be null.
6824 SlowPathCode* slow_path =
6825 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
6826 codegen_->AddSlowPath(slow_path);
6827 GenerateClassInitializationCheck(slow_path,
6828 check->GetLocations()->InAt(0).AsRegister<Register>());
6829 }
6830
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)6831 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
6832 SlowPathCode* slow_path, Register class_reg) {
6833 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
6834 const size_t status_byte_offset =
6835 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
6836 constexpr uint32_t shifted_visibly_initialized_value =
6837 enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
6838
6839 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
6840 __ j(kBelow, slow_path->GetEntryLabel());
6841 __ Bind(slow_path->GetExitLabel());
6842 }
6843
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)6844 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6845 Register temp) {
6846 uint32_t path_to_root = check->GetBitstringPathToRoot();
6847 uint32_t mask = check->GetBitstringMask();
6848 DCHECK(IsPowerOfTwo(mask + 1));
6849 size_t mask_bits = WhichPowerOf2(mask + 1);
6850
6851 if (mask_bits == 16u) {
6852 // Compare the bitstring in memory.
6853 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6854 } else {
6855 // /* uint32_t */ temp = temp->status_
6856 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6857 // Compare the bitstring bits using SUB.
6858 __ subl(temp, Immediate(path_to_root));
6859 // Shift out bits that do not contribute to the comparison.
6860 __ shll(temp, Immediate(32u - mask_bits));
6861 }
6862 }
6863
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6864 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
6865 HLoadString::LoadKind desired_string_load_kind) {
6866 switch (desired_string_load_kind) {
6867 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6868 case HLoadString::LoadKind::kBootImageRelRo:
6869 case HLoadString::LoadKind::kBssEntry:
6870 DCHECK(!Runtime::Current()->UseJitCompilation());
6871 break;
6872 case HLoadString::LoadKind::kJitBootImageAddress:
6873 case HLoadString::LoadKind::kJitTableAddress:
6874 DCHECK(Runtime::Current()->UseJitCompilation());
6875 break;
6876 case HLoadString::LoadKind::kRuntimeCall:
6877 break;
6878 }
6879 return desired_string_load_kind;
6880 }
6881
VisitLoadString(HLoadString * load)6882 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
6883 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6884 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6885 HLoadString::LoadKind load_kind = load->GetLoadKind();
6886 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
6887 load_kind == HLoadString::LoadKind::kBootImageRelRo ||
6888 load_kind == HLoadString::LoadKind::kBssEntry) {
6889 locations->SetInAt(0, Location::RequiresRegister());
6890 }
6891 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
6892 locations->SetOut(Location::RegisterLocation(EAX));
6893 } else {
6894 locations->SetOut(Location::RequiresRegister());
6895 if (load_kind == HLoadString::LoadKind::kBssEntry) {
6896 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6897 // Rely on the pResolveString to save everything.
6898 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6899 } else {
6900 // For non-Baker read barrier we have a temp-clobbering call.
6901 }
6902 }
6903 }
6904 }
6905
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6906 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
6907 dex::StringIndex string_index,
6908 Handle<mirror::String> handle) {
6909 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6910 // Add a patch entry and return the label.
6911 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6912 PatchInfo<Label>* info = &jit_string_patches_.back();
6913 return &info->label;
6914 }
6915
6916 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6917 // move.
VisitLoadString(HLoadString * load)6918 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6919 LocationSummary* locations = load->GetLocations();
6920 Location out_loc = locations->Out();
6921 Register out = out_loc.AsRegister<Register>();
6922
6923 switch (load->GetLoadKind()) {
6924 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6925 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6926 codegen_->GetCompilerOptions().IsBootImageExtension());
6927 Register method_address = locations->InAt(0).AsRegister<Register>();
6928 __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6929 codegen_->RecordBootImageStringPatch(load);
6930 return;
6931 }
6932 case HLoadString::LoadKind::kBootImageRelRo: {
6933 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6934 Register method_address = locations->InAt(0).AsRegister<Register>();
6935 __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6936 codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
6937 codegen_->GetBootImageOffset(load));
6938 return;
6939 }
6940 case HLoadString::LoadKind::kBssEntry: {
6941 Register method_address = locations->InAt(0).AsRegister<Register>();
6942 Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset);
6943 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6944 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
6945 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6946 // No need for memory fence, thanks to the x86 memory model.
6947 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
6948 codegen_->AddSlowPath(slow_path);
6949 __ testl(out, out);
6950 __ j(kEqual, slow_path->GetEntryLabel());
6951 __ Bind(slow_path->GetExitLabel());
6952 return;
6953 }
6954 case HLoadString::LoadKind::kJitBootImageAddress: {
6955 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6956 DCHECK_NE(address, 0u);
6957 __ movl(out, Immediate(address));
6958 return;
6959 }
6960 case HLoadString::LoadKind::kJitTableAddress: {
6961 Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
6962 Label* fixup_label = codegen_->NewJitRootStringPatch(
6963 load->GetDexFile(), load->GetStringIndex(), load->GetString());
6964 // /* GcRoot<mirror::String> */ out = *address
6965 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6966 return;
6967 }
6968 default:
6969 break;
6970 }
6971
6972 // TODO: Re-add the compiler code to do string dex cache lookup again.
6973 InvokeRuntimeCallingConvention calling_convention;
6974 DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
6975 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
6976 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
6977 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6978 }
6979
GetExceptionTlsAddress()6980 static Address GetExceptionTlsAddress() {
6981 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
6982 }
6983
VisitLoadException(HLoadException * load)6984 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
6985 LocationSummary* locations =
6986 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6987 locations->SetOut(Location::RequiresRegister());
6988 }
6989
VisitLoadException(HLoadException * load)6990 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
6991 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
6992 }
6993
VisitClearException(HClearException * clear)6994 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
6995 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6996 }
6997
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6998 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6999 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7000 }
7001
VisitThrow(HThrow * instruction)7002 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7003 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7004 instruction, LocationSummary::kCallOnMainOnly);
7005 InvokeRuntimeCallingConvention calling_convention;
7006 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7007 }
7008
VisitThrow(HThrow * instruction)7009 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7010 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7011 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7012 }
7013
7014 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)7015 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
7016 if (kEmitCompilerReadBarrier &&
7017 !kUseBakerReadBarrier &&
7018 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7019 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7020 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7021 return 1;
7022 }
7023 return 0;
7024 }
7025
7026 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7027 // interface pointer, the current interface is compared in memory.
7028 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)7029 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
7030 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7031 return 2;
7032 }
7033 return 1 + NumberOfInstanceOfTemps(type_check_kind);
7034 }
7035
VisitInstanceOf(HInstanceOf * instruction)7036 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7037 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7038 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7039 bool baker_read_barrier_slow_path = false;
7040 switch (type_check_kind) {
7041 case TypeCheckKind::kExactCheck:
7042 case TypeCheckKind::kAbstractClassCheck:
7043 case TypeCheckKind::kClassHierarchyCheck:
7044 case TypeCheckKind::kArrayObjectCheck: {
7045 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
7046 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7047 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
7048 break;
7049 }
7050 case TypeCheckKind::kArrayCheck:
7051 case TypeCheckKind::kUnresolvedCheck:
7052 case TypeCheckKind::kInterfaceCheck:
7053 call_kind = LocationSummary::kCallOnSlowPath;
7054 break;
7055 case TypeCheckKind::kBitstringCheck:
7056 break;
7057 }
7058
7059 LocationSummary* locations =
7060 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7061 if (baker_read_barrier_slow_path) {
7062 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7063 }
7064 locations->SetInAt(0, Location::RequiresRegister());
7065 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7066 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7067 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7068 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7069 } else {
7070 locations->SetInAt(1, Location::Any());
7071 }
7072 // Note that TypeCheckSlowPathX86 uses this "out" register too.
7073 locations->SetOut(Location::RequiresRegister());
7074 // When read barriers are enabled, we need a temporary register for some cases.
7075 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
7076 }
7077
VisitInstanceOf(HInstanceOf * instruction)7078 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7079 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7080 LocationSummary* locations = instruction->GetLocations();
7081 Location obj_loc = locations->InAt(0);
7082 Register obj = obj_loc.AsRegister<Register>();
7083 Location cls = locations->InAt(1);
7084 Location out_loc = locations->Out();
7085 Register out = out_loc.AsRegister<Register>();
7086 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
7087 DCHECK_LE(num_temps, 1u);
7088 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7089 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7090 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7091 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7092 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7093 SlowPathCode* slow_path = nullptr;
7094 NearLabel done, zero;
7095
7096 // Return 0 if `obj` is null.
7097 // Avoid null check if we know obj is not null.
7098 if (instruction->MustDoNullCheck()) {
7099 __ testl(obj, obj);
7100 __ j(kEqual, &zero);
7101 }
7102
7103 switch (type_check_kind) {
7104 case TypeCheckKind::kExactCheck: {
7105 ReadBarrierOption read_barrier_option =
7106 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7107 // /* HeapReference<Class> */ out = obj->klass_
7108 GenerateReferenceLoadTwoRegisters(instruction,
7109 out_loc,
7110 obj_loc,
7111 class_offset,
7112 read_barrier_option);
7113 if (cls.IsRegister()) {
7114 __ cmpl(out, cls.AsRegister<Register>());
7115 } else {
7116 DCHECK(cls.IsStackSlot()) << cls;
7117 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7118 }
7119
7120 // Classes must be equal for the instanceof to succeed.
7121 __ j(kNotEqual, &zero);
7122 __ movl(out, Immediate(1));
7123 __ jmp(&done);
7124 break;
7125 }
7126
7127 case TypeCheckKind::kAbstractClassCheck: {
7128 ReadBarrierOption read_barrier_option =
7129 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7130 // /* HeapReference<Class> */ out = obj->klass_
7131 GenerateReferenceLoadTwoRegisters(instruction,
7132 out_loc,
7133 obj_loc,
7134 class_offset,
7135 read_barrier_option);
7136 // If the class is abstract, we eagerly fetch the super class of the
7137 // object to avoid doing a comparison we know will fail.
7138 NearLabel loop;
7139 __ Bind(&loop);
7140 // /* HeapReference<Class> */ out = out->super_class_
7141 GenerateReferenceLoadOneRegister(instruction,
7142 out_loc,
7143 super_offset,
7144 maybe_temp_loc,
7145 read_barrier_option);
7146 __ testl(out, out);
7147 // If `out` is null, we use it for the result, and jump to `done`.
7148 __ j(kEqual, &done);
7149 if (cls.IsRegister()) {
7150 __ cmpl(out, cls.AsRegister<Register>());
7151 } else {
7152 DCHECK(cls.IsStackSlot()) << cls;
7153 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7154 }
7155 __ j(kNotEqual, &loop);
7156 __ movl(out, Immediate(1));
7157 if (zero.IsLinked()) {
7158 __ jmp(&done);
7159 }
7160 break;
7161 }
7162
7163 case TypeCheckKind::kClassHierarchyCheck: {
7164 ReadBarrierOption read_barrier_option =
7165 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7166 // /* HeapReference<Class> */ out = obj->klass_
7167 GenerateReferenceLoadTwoRegisters(instruction,
7168 out_loc,
7169 obj_loc,
7170 class_offset,
7171 read_barrier_option);
7172 // Walk over the class hierarchy to find a match.
7173 NearLabel loop, success;
7174 __ Bind(&loop);
7175 if (cls.IsRegister()) {
7176 __ cmpl(out, cls.AsRegister<Register>());
7177 } else {
7178 DCHECK(cls.IsStackSlot()) << cls;
7179 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7180 }
7181 __ j(kEqual, &success);
7182 // /* HeapReference<Class> */ out = out->super_class_
7183 GenerateReferenceLoadOneRegister(instruction,
7184 out_loc,
7185 super_offset,
7186 maybe_temp_loc,
7187 read_barrier_option);
7188 __ testl(out, out);
7189 __ j(kNotEqual, &loop);
7190 // If `out` is null, we use it for the result, and jump to `done`.
7191 __ jmp(&done);
7192 __ Bind(&success);
7193 __ movl(out, Immediate(1));
7194 if (zero.IsLinked()) {
7195 __ jmp(&done);
7196 }
7197 break;
7198 }
7199
7200 case TypeCheckKind::kArrayObjectCheck: {
7201 ReadBarrierOption read_barrier_option =
7202 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7203 // /* HeapReference<Class> */ out = obj->klass_
7204 GenerateReferenceLoadTwoRegisters(instruction,
7205 out_loc,
7206 obj_loc,
7207 class_offset,
7208 read_barrier_option);
7209 // Do an exact check.
7210 NearLabel exact_check;
7211 if (cls.IsRegister()) {
7212 __ cmpl(out, cls.AsRegister<Register>());
7213 } else {
7214 DCHECK(cls.IsStackSlot()) << cls;
7215 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7216 }
7217 __ j(kEqual, &exact_check);
7218 // Otherwise, we need to check that the object's class is a non-primitive array.
7219 // /* HeapReference<Class> */ out = out->component_type_
7220 GenerateReferenceLoadOneRegister(instruction,
7221 out_loc,
7222 component_offset,
7223 maybe_temp_loc,
7224 read_barrier_option);
7225 __ testl(out, out);
7226 // If `out` is null, we use it for the result, and jump to `done`.
7227 __ j(kEqual, &done);
7228 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7229 __ j(kNotEqual, &zero);
7230 __ Bind(&exact_check);
7231 __ movl(out, Immediate(1));
7232 __ jmp(&done);
7233 break;
7234 }
7235
7236 case TypeCheckKind::kArrayCheck: {
7237 // No read barrier since the slow path will retry upon failure.
7238 // /* HeapReference<Class> */ out = obj->klass_
7239 GenerateReferenceLoadTwoRegisters(instruction,
7240 out_loc,
7241 obj_loc,
7242 class_offset,
7243 kWithoutReadBarrier);
7244 if (cls.IsRegister()) {
7245 __ cmpl(out, cls.AsRegister<Register>());
7246 } else {
7247 DCHECK(cls.IsStackSlot()) << cls;
7248 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7249 }
7250 DCHECK(locations->OnlyCallsOnSlowPath());
7251 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7252 instruction, /* is_fatal= */ false);
7253 codegen_->AddSlowPath(slow_path);
7254 __ j(kNotEqual, slow_path->GetEntryLabel());
7255 __ movl(out, Immediate(1));
7256 if (zero.IsLinked()) {
7257 __ jmp(&done);
7258 }
7259 break;
7260 }
7261
7262 case TypeCheckKind::kUnresolvedCheck:
7263 case TypeCheckKind::kInterfaceCheck: {
7264 // Note that we indeed only call on slow path, but we always go
7265 // into the slow path for the unresolved and interface check
7266 // cases.
7267 //
7268 // We cannot directly call the InstanceofNonTrivial runtime
7269 // entry point without resorting to a type checking slow path
7270 // here (i.e. by calling InvokeRuntime directly), as it would
7271 // require to assign fixed registers for the inputs of this
7272 // HInstanceOf instruction (following the runtime calling
7273 // convention), which might be cluttered by the potential first
7274 // read barrier emission at the beginning of this method.
7275 //
7276 // TODO: Introduce a new runtime entry point taking the object
7277 // to test (instead of its class) as argument, and let it deal
7278 // with the read barrier issues. This will let us refactor this
7279 // case of the `switch` code as it was previously (with a direct
7280 // call to the runtime not using a type checking slow path).
7281 // This should also be beneficial for the other cases above.
7282 DCHECK(locations->OnlyCallsOnSlowPath());
7283 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7284 instruction, /* is_fatal= */ false);
7285 codegen_->AddSlowPath(slow_path);
7286 __ jmp(slow_path->GetEntryLabel());
7287 if (zero.IsLinked()) {
7288 __ jmp(&done);
7289 }
7290 break;
7291 }
7292
7293 case TypeCheckKind::kBitstringCheck: {
7294 // /* HeapReference<Class> */ temp = obj->klass_
7295 GenerateReferenceLoadTwoRegisters(instruction,
7296 out_loc,
7297 obj_loc,
7298 class_offset,
7299 kWithoutReadBarrier);
7300
7301 GenerateBitstringTypeCheckCompare(instruction, out);
7302 __ j(kNotEqual, &zero);
7303 __ movl(out, Immediate(1));
7304 __ jmp(&done);
7305 break;
7306 }
7307 }
7308
7309 if (zero.IsLinked()) {
7310 __ Bind(&zero);
7311 __ xorl(out, out);
7312 }
7313
7314 if (done.IsLinked()) {
7315 __ Bind(&done);
7316 }
7317
7318 if (slow_path != nullptr) {
7319 __ Bind(slow_path->GetExitLabel());
7320 }
7321 }
7322
VisitCheckCast(HCheckCast * instruction)7323 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
7324 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7325 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
7326 LocationSummary* locations =
7327 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7328 locations->SetInAt(0, Location::RequiresRegister());
7329 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7330 // Require a register for the interface check since there is a loop that compares the class to
7331 // a memory address.
7332 locations->SetInAt(1, Location::RequiresRegister());
7333 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7334 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7335 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7336 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7337 } else {
7338 locations->SetInAt(1, Location::Any());
7339 }
7340 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7341 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7342 }
7343
VisitCheckCast(HCheckCast * instruction)7344 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
7345 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7346 LocationSummary* locations = instruction->GetLocations();
7347 Location obj_loc = locations->InAt(0);
7348 Register obj = obj_loc.AsRegister<Register>();
7349 Location cls = locations->InAt(1);
7350 Location temp_loc = locations->GetTemp(0);
7351 Register temp = temp_loc.AsRegister<Register>();
7352 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7353 DCHECK_GE(num_temps, 1u);
7354 DCHECK_LE(num_temps, 2u);
7355 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
7356 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7357 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7358 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7359 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7360 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7361 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7362 const uint32_t object_array_data_offset =
7363 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7364
7365 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7366 SlowPathCode* type_check_slow_path =
7367 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7368 instruction, is_type_check_slow_path_fatal);
7369 codegen_->AddSlowPath(type_check_slow_path);
7370
7371 NearLabel done;
7372 // Avoid null check if we know obj is not null.
7373 if (instruction->MustDoNullCheck()) {
7374 __ testl(obj, obj);
7375 __ j(kEqual, &done);
7376 }
7377
7378 switch (type_check_kind) {
7379 case TypeCheckKind::kExactCheck:
7380 case TypeCheckKind::kArrayCheck: {
7381 // /* HeapReference<Class> */ temp = obj->klass_
7382 GenerateReferenceLoadTwoRegisters(instruction,
7383 temp_loc,
7384 obj_loc,
7385 class_offset,
7386 kWithoutReadBarrier);
7387
7388 if (cls.IsRegister()) {
7389 __ cmpl(temp, cls.AsRegister<Register>());
7390 } else {
7391 DCHECK(cls.IsStackSlot()) << cls;
7392 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7393 }
7394 // Jump to slow path for throwing the exception or doing a
7395 // more involved array check.
7396 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7397 break;
7398 }
7399
7400 case TypeCheckKind::kAbstractClassCheck: {
7401 // /* HeapReference<Class> */ temp = obj->klass_
7402 GenerateReferenceLoadTwoRegisters(instruction,
7403 temp_loc,
7404 obj_loc,
7405 class_offset,
7406 kWithoutReadBarrier);
7407
7408 // If the class is abstract, we eagerly fetch the super class of the
7409 // object to avoid doing a comparison we know will fail.
7410 NearLabel loop;
7411 __ Bind(&loop);
7412 // /* HeapReference<Class> */ temp = temp->super_class_
7413 GenerateReferenceLoadOneRegister(instruction,
7414 temp_loc,
7415 super_offset,
7416 maybe_temp2_loc,
7417 kWithoutReadBarrier);
7418
7419 // If the class reference currently in `temp` is null, jump to the slow path to throw the
7420 // exception.
7421 __ testl(temp, temp);
7422 __ j(kZero, type_check_slow_path->GetEntryLabel());
7423
7424 // Otherwise, compare the classes
7425 if (cls.IsRegister()) {
7426 __ cmpl(temp, cls.AsRegister<Register>());
7427 } else {
7428 DCHECK(cls.IsStackSlot()) << cls;
7429 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7430 }
7431 __ j(kNotEqual, &loop);
7432 break;
7433 }
7434
7435 case TypeCheckKind::kClassHierarchyCheck: {
7436 // /* HeapReference<Class> */ temp = obj->klass_
7437 GenerateReferenceLoadTwoRegisters(instruction,
7438 temp_loc,
7439 obj_loc,
7440 class_offset,
7441 kWithoutReadBarrier);
7442
7443 // Walk over the class hierarchy to find a match.
7444 NearLabel loop;
7445 __ Bind(&loop);
7446 if (cls.IsRegister()) {
7447 __ cmpl(temp, cls.AsRegister<Register>());
7448 } else {
7449 DCHECK(cls.IsStackSlot()) << cls;
7450 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7451 }
7452 __ j(kEqual, &done);
7453
7454 // /* HeapReference<Class> */ temp = temp->super_class_
7455 GenerateReferenceLoadOneRegister(instruction,
7456 temp_loc,
7457 super_offset,
7458 maybe_temp2_loc,
7459 kWithoutReadBarrier);
7460
7461 // If the class reference currently in `temp` is not null, jump
7462 // back at the beginning of the loop.
7463 __ testl(temp, temp);
7464 __ j(kNotZero, &loop);
7465 // Otherwise, jump to the slow path to throw the exception.;
7466 __ jmp(type_check_slow_path->GetEntryLabel());
7467 break;
7468 }
7469
7470 case TypeCheckKind::kArrayObjectCheck: {
7471 // /* HeapReference<Class> */ temp = obj->klass_
7472 GenerateReferenceLoadTwoRegisters(instruction,
7473 temp_loc,
7474 obj_loc,
7475 class_offset,
7476 kWithoutReadBarrier);
7477
7478 // Do an exact check.
7479 if (cls.IsRegister()) {
7480 __ cmpl(temp, cls.AsRegister<Register>());
7481 } else {
7482 DCHECK(cls.IsStackSlot()) << cls;
7483 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7484 }
7485 __ j(kEqual, &done);
7486
7487 // Otherwise, we need to check that the object's class is a non-primitive array.
7488 // /* HeapReference<Class> */ temp = temp->component_type_
7489 GenerateReferenceLoadOneRegister(instruction,
7490 temp_loc,
7491 component_offset,
7492 maybe_temp2_loc,
7493 kWithoutReadBarrier);
7494
7495 // If the component type is null (i.e. the object not an array), jump to the slow path to
7496 // throw the exception. Otherwise proceed with the check.
7497 __ testl(temp, temp);
7498 __ j(kZero, type_check_slow_path->GetEntryLabel());
7499
7500 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7501 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7502 break;
7503 }
7504
7505 case TypeCheckKind::kUnresolvedCheck:
7506 // We always go into the type check slow path for the unresolved check case.
7507 // We cannot directly call the CheckCast runtime entry point
7508 // without resorting to a type checking slow path here (i.e. by
7509 // calling InvokeRuntime directly), as it would require to
7510 // assign fixed registers for the inputs of this HInstanceOf
7511 // instruction (following the runtime calling convention), which
7512 // might be cluttered by the potential first read barrier
7513 // emission at the beginning of this method.
7514 __ jmp(type_check_slow_path->GetEntryLabel());
7515 break;
7516
7517 case TypeCheckKind::kInterfaceCheck: {
7518 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7519 // We can not get false positives by doing this.
7520 // /* HeapReference<Class> */ temp = obj->klass_
7521 GenerateReferenceLoadTwoRegisters(instruction,
7522 temp_loc,
7523 obj_loc,
7524 class_offset,
7525 kWithoutReadBarrier);
7526
7527 // /* HeapReference<Class> */ temp = temp->iftable_
7528 GenerateReferenceLoadTwoRegisters(instruction,
7529 temp_loc,
7530 temp_loc,
7531 iftable_offset,
7532 kWithoutReadBarrier);
7533 // Iftable is never null.
7534 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
7535 // Maybe poison the `cls` for direct comparison with memory.
7536 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
7537 // Loop through the iftable and check if any class matches.
7538 NearLabel start_loop;
7539 __ Bind(&start_loop);
7540 // Need to subtract first to handle the empty array case.
7541 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
7542 __ j(kNegative, type_check_slow_path->GetEntryLabel());
7543 // Go to next interface if the classes do not match.
7544 __ cmpl(cls.AsRegister<Register>(),
7545 CodeGeneratorX86::ArrayAddress(temp,
7546 maybe_temp2_loc,
7547 TIMES_4,
7548 object_array_data_offset));
7549 __ j(kNotEqual, &start_loop);
7550 // If `cls` was poisoned above, unpoison it.
7551 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
7552 break;
7553 }
7554
7555 case TypeCheckKind::kBitstringCheck: {
7556 // /* HeapReference<Class> */ temp = obj->klass_
7557 GenerateReferenceLoadTwoRegisters(instruction,
7558 temp_loc,
7559 obj_loc,
7560 class_offset,
7561 kWithoutReadBarrier);
7562
7563 GenerateBitstringTypeCheckCompare(instruction, temp);
7564 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7565 break;
7566 }
7567 }
7568 __ Bind(&done);
7569
7570 __ Bind(type_check_slow_path->GetExitLabel());
7571 }
7572
VisitMonitorOperation(HMonitorOperation * instruction)7573 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
7574 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7575 instruction, LocationSummary::kCallOnMainOnly);
7576 InvokeRuntimeCallingConvention calling_convention;
7577 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7578 }
7579
VisitMonitorOperation(HMonitorOperation * instruction)7580 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
7581 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
7582 : kQuickUnlockObject,
7583 instruction,
7584 instruction->GetDexPc());
7585 if (instruction->IsEnter()) {
7586 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7587 } else {
7588 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7589 }
7590 }
7591
VisitX86AndNot(HX86AndNot * instruction)7592 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
7593 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7594 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7595 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7596 locations->SetInAt(0, Location::RequiresRegister());
7597 locations->SetInAt(1, Location::RequiresRegister());
7598 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7599 }
7600
VisitX86AndNot(HX86AndNot * instruction)7601 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
7602 LocationSummary* locations = instruction->GetLocations();
7603 Location first = locations->InAt(0);
7604 Location second = locations->InAt(1);
7605 Location dest = locations->Out();
7606 if (instruction->GetResultType() == DataType::Type::kInt32) {
7607 __ andn(dest.AsRegister<Register>(),
7608 first.AsRegister<Register>(),
7609 second.AsRegister<Register>());
7610 } else {
7611 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7612 __ andn(dest.AsRegisterPairLow<Register>(),
7613 first.AsRegisterPairLow<Register>(),
7614 second.AsRegisterPairLow<Register>());
7615 __ andn(dest.AsRegisterPairHigh<Register>(),
7616 first.AsRegisterPairHigh<Register>(),
7617 second.AsRegisterPairHigh<Register>());
7618 }
7619 }
7620
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7621 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7622 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7623 DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
7624 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7625 locations->SetInAt(0, Location::RequiresRegister());
7626 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7627 }
7628
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7629 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
7630 HX86MaskOrResetLeastSetBit* instruction) {
7631 LocationSummary* locations = instruction->GetLocations();
7632 Location src = locations->InAt(0);
7633 Location dest = locations->Out();
7634 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
7635 switch (instruction->GetOpKind()) {
7636 case HInstruction::kAnd:
7637 __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
7638 break;
7639 case HInstruction::kXor:
7640 __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
7641 break;
7642 default:
7643 LOG(FATAL) << "Unreachable";
7644 }
7645 }
7646
VisitAnd(HAnd * instruction)7647 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7648 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7649 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7650
HandleBitwiseOperation(HBinaryOperation * instruction)7651 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
7652 LocationSummary* locations =
7653 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7654 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7655 || instruction->GetResultType() == DataType::Type::kInt64);
7656 locations->SetInAt(0, Location::RequiresRegister());
7657 locations->SetInAt(1, Location::Any());
7658 locations->SetOut(Location::SameAsFirstInput());
7659 }
7660
VisitAnd(HAnd * instruction)7661 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
7662 HandleBitwiseOperation(instruction);
7663 }
7664
VisitOr(HOr * instruction)7665 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
7666 HandleBitwiseOperation(instruction);
7667 }
7668
VisitXor(HXor * instruction)7669 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
7670 HandleBitwiseOperation(instruction);
7671 }
7672
HandleBitwiseOperation(HBinaryOperation * instruction)7673 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
7674 LocationSummary* locations = instruction->GetLocations();
7675 Location first = locations->InAt(0);
7676 Location second = locations->InAt(1);
7677 DCHECK(first.Equals(locations->Out()));
7678
7679 if (instruction->GetResultType() == DataType::Type::kInt32) {
7680 if (second.IsRegister()) {
7681 if (instruction->IsAnd()) {
7682 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
7683 } else if (instruction->IsOr()) {
7684 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
7685 } else {
7686 DCHECK(instruction->IsXor());
7687 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
7688 }
7689 } else if (second.IsConstant()) {
7690 if (instruction->IsAnd()) {
7691 __ andl(first.AsRegister<Register>(),
7692 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7693 } else if (instruction->IsOr()) {
7694 __ orl(first.AsRegister<Register>(),
7695 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7696 } else {
7697 DCHECK(instruction->IsXor());
7698 __ xorl(first.AsRegister<Register>(),
7699 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7700 }
7701 } else {
7702 if (instruction->IsAnd()) {
7703 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7704 } else if (instruction->IsOr()) {
7705 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7706 } else {
7707 DCHECK(instruction->IsXor());
7708 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7709 }
7710 }
7711 } else {
7712 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7713 if (second.IsRegisterPair()) {
7714 if (instruction->IsAnd()) {
7715 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7716 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7717 } else if (instruction->IsOr()) {
7718 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7719 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7720 } else {
7721 DCHECK(instruction->IsXor());
7722 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7723 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7724 }
7725 } else if (second.IsDoubleStackSlot()) {
7726 if (instruction->IsAnd()) {
7727 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7728 __ andl(first.AsRegisterPairHigh<Register>(),
7729 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7730 } else if (instruction->IsOr()) {
7731 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7732 __ orl(first.AsRegisterPairHigh<Register>(),
7733 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7734 } else {
7735 DCHECK(instruction->IsXor());
7736 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7737 __ xorl(first.AsRegisterPairHigh<Register>(),
7738 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7739 }
7740 } else {
7741 DCHECK(second.IsConstant()) << second;
7742 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
7743 int32_t low_value = Low32Bits(value);
7744 int32_t high_value = High32Bits(value);
7745 Immediate low(low_value);
7746 Immediate high(high_value);
7747 Register first_low = first.AsRegisterPairLow<Register>();
7748 Register first_high = first.AsRegisterPairHigh<Register>();
7749 if (instruction->IsAnd()) {
7750 if (low_value == 0) {
7751 __ xorl(first_low, first_low);
7752 } else if (low_value != -1) {
7753 __ andl(first_low, low);
7754 }
7755 if (high_value == 0) {
7756 __ xorl(first_high, first_high);
7757 } else if (high_value != -1) {
7758 __ andl(first_high, high);
7759 }
7760 } else if (instruction->IsOr()) {
7761 if (low_value != 0) {
7762 __ orl(first_low, low);
7763 }
7764 if (high_value != 0) {
7765 __ orl(first_high, high);
7766 }
7767 } else {
7768 DCHECK(instruction->IsXor());
7769 if (low_value != 0) {
7770 __ xorl(first_low, low);
7771 }
7772 if (high_value != 0) {
7773 __ xorl(first_high, high);
7774 }
7775 }
7776 }
7777 }
7778 }
7779
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7780 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
7781 HInstruction* instruction,
7782 Location out,
7783 uint32_t offset,
7784 Location maybe_temp,
7785 ReadBarrierOption read_barrier_option) {
7786 Register out_reg = out.AsRegister<Register>();
7787 if (read_barrier_option == kWithReadBarrier) {
7788 CHECK(kEmitCompilerReadBarrier);
7789 if (kUseBakerReadBarrier) {
7790 // Load with fast path based Baker's read barrier.
7791 // /* HeapReference<Object> */ out = *(out + offset)
7792 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7793 instruction, out, out_reg, offset, /* needs_null_check= */ false);
7794 } else {
7795 // Load with slow path based read barrier.
7796 // Save the value of `out` into `maybe_temp` before overwriting it
7797 // in the following move operation, as we will need it for the
7798 // read barrier below.
7799 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7800 __ movl(maybe_temp.AsRegister<Register>(), out_reg);
7801 // /* HeapReference<Object> */ out = *(out + offset)
7802 __ movl(out_reg, Address(out_reg, offset));
7803 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7804 }
7805 } else {
7806 // Plain load with no read barrier.
7807 // /* HeapReference<Object> */ out = *(out + offset)
7808 __ movl(out_reg, Address(out_reg, offset));
7809 __ MaybeUnpoisonHeapReference(out_reg);
7810 }
7811 }
7812
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7813 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
7814 HInstruction* instruction,
7815 Location out,
7816 Location obj,
7817 uint32_t offset,
7818 ReadBarrierOption read_barrier_option) {
7819 Register out_reg = out.AsRegister<Register>();
7820 Register obj_reg = obj.AsRegister<Register>();
7821 if (read_barrier_option == kWithReadBarrier) {
7822 CHECK(kEmitCompilerReadBarrier);
7823 if (kUseBakerReadBarrier) {
7824 // Load with fast path based Baker's read barrier.
7825 // /* HeapReference<Object> */ out = *(obj + offset)
7826 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7827 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7828 } else {
7829 // Load with slow path based read barrier.
7830 // /* HeapReference<Object> */ out = *(obj + offset)
7831 __ movl(out_reg, Address(obj_reg, offset));
7832 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7833 }
7834 } else {
7835 // Plain load with no read barrier.
7836 // /* HeapReference<Object> */ out = *(obj + offset)
7837 __ movl(out_reg, Address(obj_reg, offset));
7838 __ MaybeUnpoisonHeapReference(out_reg);
7839 }
7840 }
7841
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7842 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
7843 HInstruction* instruction,
7844 Location root,
7845 const Address& address,
7846 Label* fixup_label,
7847 ReadBarrierOption read_barrier_option) {
7848 Register root_reg = root.AsRegister<Register>();
7849 if (read_barrier_option == kWithReadBarrier) {
7850 DCHECK(kEmitCompilerReadBarrier);
7851 if (kUseBakerReadBarrier) {
7852 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7853 // Baker's read barrier are used:
7854 //
7855 // root = obj.field;
7856 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7857 // if (temp != null) {
7858 // root = temp(root)
7859 // }
7860
7861 // /* GcRoot<mirror::Object> */ root = *address
7862 __ movl(root_reg, address);
7863 if (fixup_label != nullptr) {
7864 __ Bind(fixup_label);
7865 }
7866 static_assert(
7867 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7868 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7869 "have different sizes.");
7870 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7871 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7872 "have different sizes.");
7873
7874 // Slow path marking the GC root `root`.
7875 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
7876 instruction, root, /* unpoison_ref_before_marking= */ false);
7877 codegen_->AddSlowPath(slow_path);
7878
7879 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
7880 const int32_t entry_point_offset =
7881 Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
7882 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
7883 // The entrypoint is null when the GC is not marking.
7884 __ j(kNotEqual, slow_path->GetEntryLabel());
7885 __ Bind(slow_path->GetExitLabel());
7886 } else {
7887 // GC root loaded through a slow path for read barriers other
7888 // than Baker's.
7889 // /* GcRoot<mirror::Object>* */ root = address
7890 __ leal(root_reg, address);
7891 if (fixup_label != nullptr) {
7892 __ Bind(fixup_label);
7893 }
7894 // /* mirror::Object* */ root = root->Read()
7895 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7896 }
7897 } else {
7898 // Plain GC root load with no read barrier.
7899 // /* GcRoot<mirror::Object> */ root = *address
7900 __ movl(root_reg, address);
7901 if (fixup_label != nullptr) {
7902 __ Bind(fixup_label);
7903 }
7904 // Note that GC roots are not affected by heap poisoning, thus we
7905 // do not have to unpoison `root_reg` here.
7906 }
7907 }
7908
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)7909 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7910 Location ref,
7911 Register obj,
7912 uint32_t offset,
7913 bool needs_null_check) {
7914 DCHECK(kEmitCompilerReadBarrier);
7915 DCHECK(kUseBakerReadBarrier);
7916
7917 // /* HeapReference<Object> */ ref = *(obj + offset)
7918 Address src(obj, offset);
7919 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7920 }
7921
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)7922 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7923 Location ref,
7924 Register obj,
7925 uint32_t data_offset,
7926 Location index,
7927 bool needs_null_check) {
7928 DCHECK(kEmitCompilerReadBarrier);
7929 DCHECK(kUseBakerReadBarrier);
7930
7931 static_assert(
7932 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7933 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7934 // /* HeapReference<Object> */ ref =
7935 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
7936 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
7937 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7938 }
7939
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)7940 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7941 Location ref,
7942 Register obj,
7943 const Address& src,
7944 bool needs_null_check,
7945 bool always_update_field,
7946 Register* temp) {
7947 DCHECK(kEmitCompilerReadBarrier);
7948 DCHECK(kUseBakerReadBarrier);
7949
7950 // In slow path based read barriers, the read barrier call is
7951 // inserted after the original load. However, in fast path based
7952 // Baker's read barriers, we need to perform the load of
7953 // mirror::Object::monitor_ *before* the original reference load.
7954 // This load-load ordering is required by the read barrier.
7955 // The fast path/slow path (for Baker's algorithm) should look like:
7956 //
7957 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7958 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
7959 // HeapReference<Object> ref = *src; // Original reference load.
7960 // bool is_gray = (rb_state == ReadBarrier::GrayState());
7961 // if (is_gray) {
7962 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
7963 // }
7964 //
7965 // Note: the original implementation in ReadBarrier::Barrier is
7966 // slightly more complex as:
7967 // - it implements the load-load fence using a data dependency on
7968 // the high-bits of rb_state, which are expected to be all zeroes
7969 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
7970 // which is a no-op thanks to the x86 memory model);
7971 // - it performs additional checks that we do not do here for
7972 // performance reasons.
7973
7974 Register ref_reg = ref.AsRegister<Register>();
7975 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7976
7977 // Given the numeric representation, it's enough to check the low bit of the rb_state.
7978 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7979 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7980 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7981 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7982 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7983
7984 // if (rb_state == ReadBarrier::GrayState())
7985 // ref = ReadBarrier::Mark(ref);
7986 // At this point, just do the "if" and make sure that flags are preserved until the branch.
7987 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7988 if (needs_null_check) {
7989 MaybeRecordImplicitNullCheck(instruction);
7990 }
7991
7992 // Load fence to prevent load-load reordering.
7993 // Note that this is a no-op, thanks to the x86 memory model.
7994 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7995
7996 // The actual reference load.
7997 // /* HeapReference<Object> */ ref = *src
7998 __ movl(ref_reg, src); // Flags are unaffected.
7999
8000 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8001 // Slow path marking the object `ref` when it is gray.
8002 SlowPathCode* slow_path;
8003 if (always_update_field) {
8004 DCHECK(temp != nullptr);
8005 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8006 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8007 } else {
8008 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8009 instruction, ref, /* unpoison_ref_before_marking= */ true);
8010 }
8011 AddSlowPath(slow_path);
8012
8013 // We have done the "if" of the gray bit check above, now branch based on the flags.
8014 __ j(kNotZero, slow_path->GetEntryLabel());
8015
8016 // Object* ref = ref_addr->AsMirrorPtr()
8017 __ MaybeUnpoisonHeapReference(ref_reg);
8018
8019 __ Bind(slow_path->GetExitLabel());
8020 }
8021
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8022 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8023 Location out,
8024 Location ref,
8025 Location obj,
8026 uint32_t offset,
8027 Location index) {
8028 DCHECK(kEmitCompilerReadBarrier);
8029
8030 // Insert a slow path based read barrier *after* the reference load.
8031 //
8032 // If heap poisoning is enabled, the unpoisoning of the loaded
8033 // reference will be carried out by the runtime within the slow
8034 // path.
8035 //
8036 // Note that `ref` currently does not get unpoisoned (when heap
8037 // poisoning is enabled), which is alright as the `ref` argument is
8038 // not used by the artReadBarrierSlow entry point.
8039 //
8040 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8041 SlowPathCode* slow_path = new (GetScopedAllocator())
8042 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8043 AddSlowPath(slow_path);
8044
8045 __ jmp(slow_path->GetEntryLabel());
8046 __ Bind(slow_path->GetExitLabel());
8047 }
8048
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8049 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8050 Location out,
8051 Location ref,
8052 Location obj,
8053 uint32_t offset,
8054 Location index) {
8055 if (kEmitCompilerReadBarrier) {
8056 // Baker's read barriers shall be handled by the fast path
8057 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8058 DCHECK(!kUseBakerReadBarrier);
8059 // If heap poisoning is enabled, unpoisoning will be taken care of
8060 // by the runtime within the slow path.
8061 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8062 } else if (kPoisonHeapReferences) {
8063 __ UnpoisonHeapReference(out.AsRegister<Register>());
8064 }
8065 }
8066
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8067 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8068 Location out,
8069 Location root) {
8070 DCHECK(kEmitCompilerReadBarrier);
8071
8072 // Insert a slow path based read barrier *after* the GC root load.
8073 //
8074 // Note that GC roots are not affected by heap poisoning, so we do
8075 // not need to do anything special for this here.
8076 SlowPathCode* slow_path =
8077 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8078 AddSlowPath(slow_path);
8079
8080 __ jmp(slow_path->GetEntryLabel());
8081 __ Bind(slow_path->GetExitLabel());
8082 }
8083
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8084 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8085 // Nothing to do, this should be removed during prepare for register allocator.
8086 LOG(FATAL) << "Unreachable";
8087 }
8088
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8089 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8090 // Nothing to do, this should be removed during prepare for register allocator.
8091 LOG(FATAL) << "Unreachable";
8092 }
8093
8094 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8095 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8096 LocationSummary* locations =
8097 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8098 locations->SetInAt(0, Location::RequiresRegister());
8099 }
8100
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8101 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8102 int32_t lower_bound,
8103 uint32_t num_entries,
8104 HBasicBlock* switch_block,
8105 HBasicBlock* default_block) {
8106 // Figure out the correct compare values and jump conditions.
8107 // Handle the first compare/branch as a special case because it might
8108 // jump to the default case.
8109 DCHECK_GT(num_entries, 2u);
8110 Condition first_condition;
8111 uint32_t index;
8112 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8113 if (lower_bound != 0) {
8114 first_condition = kLess;
8115 __ cmpl(value_reg, Immediate(lower_bound));
8116 __ j(first_condition, codegen_->GetLabelOf(default_block));
8117 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8118
8119 index = 1;
8120 } else {
8121 // Handle all the compare/jumps below.
8122 first_condition = kBelow;
8123 index = 0;
8124 }
8125
8126 // Handle the rest of the compare/jumps.
8127 for (; index + 1 < num_entries; index += 2) {
8128 int32_t compare_to_value = lower_bound + index + 1;
8129 __ cmpl(value_reg, Immediate(compare_to_value));
8130 // Jump to successors[index] if value < case_value[index].
8131 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8132 // Jump to successors[index + 1] if value == case_value[index + 1].
8133 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8134 }
8135
8136 if (index != num_entries) {
8137 // There are an odd number of entries. Handle the last one.
8138 DCHECK_EQ(index + 1, num_entries);
8139 __ cmpl(value_reg, Immediate(lower_bound + index));
8140 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8141 }
8142
8143 // And the default for any other value.
8144 if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8145 __ jmp(codegen_->GetLabelOf(default_block));
8146 }
8147 }
8148
VisitPackedSwitch(HPackedSwitch * switch_instr)8149 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8150 int32_t lower_bound = switch_instr->GetStartValue();
8151 uint32_t num_entries = switch_instr->GetNumEntries();
8152 LocationSummary* locations = switch_instr->GetLocations();
8153 Register value_reg = locations->InAt(0).AsRegister<Register>();
8154
8155 GenPackedSwitchWithCompares(value_reg,
8156 lower_bound,
8157 num_entries,
8158 switch_instr->GetBlock(),
8159 switch_instr->GetDefaultBlock());
8160 }
8161
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8162 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8163 LocationSummary* locations =
8164 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8165 locations->SetInAt(0, Location::RequiresRegister());
8166
8167 // Constant area pointer.
8168 locations->SetInAt(1, Location::RequiresRegister());
8169
8170 // And the temporary we need.
8171 locations->AddTemp(Location::RequiresRegister());
8172 }
8173
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8174 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8175 int32_t lower_bound = switch_instr->GetStartValue();
8176 uint32_t num_entries = switch_instr->GetNumEntries();
8177 LocationSummary* locations = switch_instr->GetLocations();
8178 Register value_reg = locations->InAt(0).AsRegister<Register>();
8179 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8180
8181 if (num_entries <= kPackedSwitchJumpTableThreshold) {
8182 GenPackedSwitchWithCompares(value_reg,
8183 lower_bound,
8184 num_entries,
8185 switch_instr->GetBlock(),
8186 default_block);
8187 return;
8188 }
8189
8190 // Optimizing has a jump area.
8191 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
8192 Register constant_area = locations->InAt(1).AsRegister<Register>();
8193
8194 // Remove the bias, if needed.
8195 if (lower_bound != 0) {
8196 __ leal(temp_reg, Address(value_reg, -lower_bound));
8197 value_reg = temp_reg;
8198 }
8199
8200 // Is the value in range?
8201 DCHECK_GE(num_entries, 1u);
8202 __ cmpl(value_reg, Immediate(num_entries - 1));
8203 __ j(kAbove, codegen_->GetLabelOf(default_block));
8204
8205 // We are in the range of the table.
8206 // Load (target-constant_area) from the jump table, indexing by the value.
8207 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
8208
8209 // Compute the actual target address by adding in constant_area.
8210 __ addl(temp_reg, constant_area);
8211
8212 // And jump.
8213 __ jmp(temp_reg);
8214 }
8215
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8216 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
8217 HX86ComputeBaseMethodAddress* insn) {
8218 LocationSummary* locations =
8219 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8220 locations->SetOut(Location::RequiresRegister());
8221 }
8222
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8223 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
8224 HX86ComputeBaseMethodAddress* insn) {
8225 LocationSummary* locations = insn->GetLocations();
8226 Register reg = locations->Out().AsRegister<Register>();
8227
8228 // Generate call to next instruction.
8229 Label next_instruction;
8230 __ call(&next_instruction);
8231 __ Bind(&next_instruction);
8232
8233 // Remember this offset for later use with constant area.
8234 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
8235
8236 // Grab the return address off the stack.
8237 __ popl(reg);
8238 }
8239
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8240 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
8241 HX86LoadFromConstantTable* insn) {
8242 LocationSummary* locations =
8243 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8244
8245 locations->SetInAt(0, Location::RequiresRegister());
8246 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
8247
8248 // If we don't need to be materialized, we only need the inputs to be set.
8249 if (insn->IsEmittedAtUseSite()) {
8250 return;
8251 }
8252
8253 switch (insn->GetType()) {
8254 case DataType::Type::kFloat32:
8255 case DataType::Type::kFloat64:
8256 locations->SetOut(Location::RequiresFpuRegister());
8257 break;
8258
8259 case DataType::Type::kInt32:
8260 locations->SetOut(Location::RequiresRegister());
8261 break;
8262
8263 default:
8264 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8265 }
8266 }
8267
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8268 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
8269 if (insn->IsEmittedAtUseSite()) {
8270 return;
8271 }
8272
8273 LocationSummary* locations = insn->GetLocations();
8274 Location out = locations->Out();
8275 Register const_area = locations->InAt(0).AsRegister<Register>();
8276 HConstant *value = insn->GetConstant();
8277
8278 switch (insn->GetType()) {
8279 case DataType::Type::kFloat32:
8280 __ movss(out.AsFpuRegister<XmmRegister>(),
8281 codegen_->LiteralFloatAddress(
8282 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8283 break;
8284
8285 case DataType::Type::kFloat64:
8286 __ movsd(out.AsFpuRegister<XmmRegister>(),
8287 codegen_->LiteralDoubleAddress(
8288 value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8289 break;
8290
8291 case DataType::Type::kInt32:
8292 __ movl(out.AsRegister<Register>(),
8293 codegen_->LiteralInt32Address(
8294 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8295 break;
8296
8297 default:
8298 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8299 }
8300 }
8301
8302 /**
8303 * Class to handle late fixup of offsets into constant area.
8304 */
8305 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8306 public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)8307 RIPFixup(CodeGeneratorX86& codegen,
8308 HX86ComputeBaseMethodAddress* base_method_address,
8309 size_t offset)
8310 : codegen_(&codegen),
8311 base_method_address_(base_method_address),
8312 offset_into_constant_area_(offset) {}
8313
8314 protected:
SetOffset(size_t offset)8315 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8316
8317 CodeGeneratorX86* codegen_;
8318 HX86ComputeBaseMethodAddress* base_method_address_;
8319
8320 private:
Process(const MemoryRegion & region,int pos)8321 void Process(const MemoryRegion& region, int pos) override {
8322 // Patch the correct offset for the instruction. The place to patch is the
8323 // last 4 bytes of the instruction.
8324 // The value to patch is the distance from the offset in the constant area
8325 // from the address computed by the HX86ComputeBaseMethodAddress instruction.
8326 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8327 int32_t relative_position =
8328 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
8329
8330 // Patch in the right value.
8331 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8332 }
8333
8334 // Location in constant area that the fixup refers to.
8335 int32_t offset_into_constant_area_;
8336 };
8337
8338 /**
8339 * Class to handle late fixup of offsets to a jump table that will be created in the
8340 * constant area.
8341 */
8342 class JumpTableRIPFixup : public RIPFixup {
8343 public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)8344 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
8345 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
8346 switch_instr_(switch_instr) {}
8347
CreateJumpTable()8348 void CreateJumpTable() {
8349 X86Assembler* assembler = codegen_->GetAssembler();
8350
8351 // Ensure that the reference to the jump table has the correct offset.
8352 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8353 SetOffset(offset_in_constant_table);
8354
8355 // The label values in the jump table are computed relative to the
8356 // instruction addressing the constant area.
8357 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
8358
8359 // Populate the jump table with the correct values for the jump table.
8360 int32_t num_entries = switch_instr_->GetNumEntries();
8361 HBasicBlock* block = switch_instr_->GetBlock();
8362 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8363 // The value that we want is the target offset - the position of the table.
8364 for (int32_t i = 0; i < num_entries; i++) {
8365 HBasicBlock* b = successors[i];
8366 Label* l = codegen_->GetLabelOf(b);
8367 DCHECK(l->IsBound());
8368 int32_t offset_to_block = l->Position() - relative_offset;
8369 assembler->AppendInt32(offset_to_block);
8370 }
8371 }
8372
8373 private:
8374 const HX86PackedSwitch* switch_instr_;
8375 };
8376
Finalize(CodeAllocator * allocator)8377 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
8378 // Generate the constant area if needed.
8379 X86Assembler* assembler = GetAssembler();
8380
8381 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8382 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
8383 // byte values.
8384 assembler->Align(4, 0);
8385 constant_area_start_ = assembler->CodeSize();
8386
8387 // Populate any jump tables.
8388 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8389 jump_table->CreateJumpTable();
8390 }
8391
8392 // And now add the constant area to the generated code.
8393 assembler->AddConstantArea();
8394 }
8395
8396 // And finish up.
8397 CodeGenerator::Finalize(allocator);
8398 }
8399
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)8400 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
8401 HX86ComputeBaseMethodAddress* method_base,
8402 Register reg) {
8403 AssemblerFixup* fixup =
8404 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
8405 return Address(reg, kDummy32BitOffset, fixup);
8406 }
8407
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)8408 Address CodeGeneratorX86::LiteralFloatAddress(float v,
8409 HX86ComputeBaseMethodAddress* method_base,
8410 Register reg) {
8411 AssemblerFixup* fixup =
8412 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
8413 return Address(reg, kDummy32BitOffset, fixup);
8414 }
8415
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8416 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
8417 HX86ComputeBaseMethodAddress* method_base,
8418 Register reg) {
8419 AssemblerFixup* fixup =
8420 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
8421 return Address(reg, kDummy32BitOffset, fixup);
8422 }
8423
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8424 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
8425 HX86ComputeBaseMethodAddress* method_base,
8426 Register reg) {
8427 AssemblerFixup* fixup =
8428 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
8429 return Address(reg, kDummy32BitOffset, fixup);
8430 }
8431
Load32BitValue(Register dest,int32_t value)8432 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
8433 if (value == 0) {
8434 __ xorl(dest, dest);
8435 } else {
8436 __ movl(dest, Immediate(value));
8437 }
8438 }
8439
Compare32BitValue(Register dest,int32_t value)8440 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
8441 if (value == 0) {
8442 __ testl(dest, dest);
8443 } else {
8444 __ cmpl(dest, Immediate(value));
8445 }
8446 }
8447
GenerateIntCompare(Location lhs,Location rhs)8448 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
8449 Register lhs_reg = lhs.AsRegister<Register>();
8450 GenerateIntCompare(lhs_reg, rhs);
8451 }
8452
GenerateIntCompare(Register lhs,Location rhs)8453 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
8454 if (rhs.IsConstant()) {
8455 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8456 Compare32BitValue(lhs, value);
8457 } else if (rhs.IsStackSlot()) {
8458 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
8459 } else {
8460 __ cmpl(lhs, rhs.AsRegister<Register>());
8461 }
8462 }
8463
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)8464 Address CodeGeneratorX86::ArrayAddress(Register obj,
8465 Location index,
8466 ScaleFactor scale,
8467 uint32_t data_offset) {
8468 return index.IsConstant() ?
8469 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
8470 Address(obj, index.AsRegister<Register>(), scale, data_offset);
8471 }
8472
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)8473 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
8474 Register reg,
8475 Register value) {
8476 // Create a fixup to be used to create and address the jump table.
8477 JumpTableRIPFixup* table_fixup =
8478 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8479
8480 // We have to populate the jump tables.
8481 fixups_to_jump_tables_.push_back(table_fixup);
8482
8483 // We want a scaled address, as we are extracting the correct offset from the table.
8484 return Address(reg, value, TIMES_4, kDummy32BitOffset, table_fixup);
8485 }
8486
8487 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)8488 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
8489 if (!target.IsValid()) {
8490 DCHECK_EQ(type, DataType::Type::kVoid);
8491 return;
8492 }
8493
8494 DCHECK_NE(type, DataType::Type::kVoid);
8495
8496 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
8497 if (target.Equals(return_loc)) {
8498 return;
8499 }
8500
8501 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
8502 // with the else branch.
8503 if (type == DataType::Type::kInt64) {
8504 HParallelMove parallel_move(GetGraph()->GetAllocator());
8505 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
8506 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
8507 GetMoveResolver()->EmitNativeCode(¶llel_move);
8508 } else {
8509 // Let the parallel move resolver take care of all of this.
8510 HParallelMove parallel_move(GetGraph()->GetAllocator());
8511 parallel_move.AddMove(return_loc, target, type, nullptr);
8512 GetMoveResolver()->EmitNativeCode(¶llel_move);
8513 }
8514 }
8515
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8516 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
8517 const uint8_t* roots_data,
8518 const PatchInfo<Label>& info,
8519 uint64_t index_in_table) const {
8520 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8521 uintptr_t address =
8522 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8523 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8524 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8525 dchecked_integral_cast<uint32_t>(address);
8526 }
8527
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8528 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8529 for (const PatchInfo<Label>& info : jit_string_patches_) {
8530 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8531 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8532 PatchJitRootUse(code, roots_data, info, index_in_table);
8533 }
8534
8535 for (const PatchInfo<Label>& info : jit_class_patches_) {
8536 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8537 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8538 PatchJitRootUse(code, roots_data, info, index_in_table);
8539 }
8540 }
8541
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8542 void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8543 ATTRIBUTE_UNUSED) {
8544 LOG(FATAL) << "Unreachable";
8545 }
8546
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8547 void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8548 ATTRIBUTE_UNUSED) {
8549 LOG(FATAL) << "Unreachable";
8550 }
8551
CpuHasAvxFeatureFlag()8552 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
8553 return codegen_->GetInstructionSetFeatures().HasAVX();
8554 }
CpuHasAvx2FeatureFlag()8555 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
8556 return codegen_->GetInstructionSetFeatures().HasAVX2();
8557 }
CpuHasAvxFeatureFlag()8558 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
8559 return codegen_->GetInstructionSetFeatures().HasAVX();
8560 }
CpuHasAvx2FeatureFlag()8561 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
8562 return codegen_->GetInstructionSetFeatures().HasAVX2();
8563 }
8564
8565 #undef __
8566
8567 } // namespace x86
8568 } // namespace art
8569