1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "art_method.h"
20 #include "code_generator_utils.h"
21 #include "compiled_method.h"
22 #include "entrypoints/quick/quick_entrypoints.h"
23 #include "entrypoints/quick/quick_entrypoints_enum.h"
24 #include "gc/accounting/card_table.h"
25 #include "intrinsics.h"
26 #include "intrinsics_x86.h"
27 #include "mirror/array-inl.h"
28 #include "mirror/class-inl.h"
29 #include "lock_word.h"
30 #include "thread.h"
31 #include "utils/assembler.h"
32 #include "utils/stack_checks.h"
33 #include "utils/x86/assembler_x86.h"
34 #include "utils/x86/managed_register_x86.h"
35
36 namespace art {
37
38 template<class MirrorType>
39 class GcRoot;
40
41 namespace x86 {
42
43 static constexpr int kCurrentMethodStackOffset = 0;
44 static constexpr Register kMethodRegisterArgument = EAX;
45 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
46
47 static constexpr int kC2ConditionMask = 0x400;
48
49 static constexpr int kFakeReturnRegister = Register(8);
50
51 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
52 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
53 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
54
55 class NullCheckSlowPathX86 : public SlowPathCode {
56 public:
NullCheckSlowPathX86(HNullCheck * instruction)57 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
58
EmitNativeCode(CodeGenerator * codegen)59 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
60 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
61 __ Bind(GetEntryLabel());
62 if (instruction_->CanThrowIntoCatchBlock()) {
63 // Live registers will be restored in the catch block if caught.
64 SaveLiveRegisters(codegen, instruction_->GetLocations());
65 }
66 x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
67 instruction_,
68 instruction_->GetDexPc(),
69 this);
70 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
71 }
72
IsFatal() const73 bool IsFatal() const OVERRIDE { return true; }
74
GetDescription() const75 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86"; }
76
77 private:
78 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
79 };
80
81 class DivZeroCheckSlowPathX86 : public SlowPathCode {
82 public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)83 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
84
EmitNativeCode(CodeGenerator * codegen)85 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
86 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
87 __ Bind(GetEntryLabel());
88 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
89 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
90 }
91
IsFatal() const92 bool IsFatal() const OVERRIDE { return true; }
93
GetDescription() const94 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86"; }
95
96 private:
97 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
98 };
99
100 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
101 public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)102 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
103 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
104
EmitNativeCode(CodeGenerator * codegen)105 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
106 __ Bind(GetEntryLabel());
107 if (is_div_) {
108 __ negl(reg_);
109 } else {
110 __ movl(reg_, Immediate(0));
111 }
112 __ jmp(GetExitLabel());
113 }
114
GetDescription() const115 const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86"; }
116
117 private:
118 Register reg_;
119 bool is_div_;
120 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
121 };
122
123 class BoundsCheckSlowPathX86 : public SlowPathCode {
124 public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)125 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
126
EmitNativeCode(CodeGenerator * codegen)127 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
128 LocationSummary* locations = instruction_->GetLocations();
129 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
130 __ Bind(GetEntryLabel());
131 // We're moving two locations to locations that could overlap, so we need a parallel
132 // move resolver.
133 if (instruction_->CanThrowIntoCatchBlock()) {
134 // Live registers will be restored in the catch block if caught.
135 SaveLiveRegisters(codegen, instruction_->GetLocations());
136 }
137
138 // Are we using an array length from memory?
139 HInstruction* array_length = instruction_->InputAt(1);
140 Location length_loc = locations->InAt(1);
141 InvokeRuntimeCallingConvention calling_convention;
142 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
143 // Load the array length into our temporary.
144 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
145 Location array_loc = array_length->GetLocations()->InAt(0);
146 Address array_len(array_loc.AsRegister<Register>(), len_offset);
147 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
148 // Check for conflicts with index.
149 if (length_loc.Equals(locations->InAt(0))) {
150 // We know we aren't using parameter 2.
151 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
152 }
153 __ movl(length_loc.AsRegister<Register>(), array_len);
154 if (mirror::kUseStringCompression) {
155 __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
156 }
157 }
158 x86_codegen->EmitParallelMoves(
159 locations->InAt(0),
160 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
161 Primitive::kPrimInt,
162 length_loc,
163 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
164 Primitive::kPrimInt);
165 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
166 ? kQuickThrowStringBounds
167 : kQuickThrowArrayBounds;
168 x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
169 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
170 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
171 }
172
IsFatal() const173 bool IsFatal() const OVERRIDE { return true; }
174
GetDescription() const175 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86"; }
176
177 private:
178 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
179 };
180
181 class SuspendCheckSlowPathX86 : public SlowPathCode {
182 public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)183 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
184 : SlowPathCode(instruction), successor_(successor) {}
185
EmitNativeCode(CodeGenerator * codegen)186 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
187 LocationSummary* locations = instruction_->GetLocations();
188 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
189 __ Bind(GetEntryLabel());
190 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
191 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
192 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
193 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
194 if (successor_ == nullptr) {
195 __ jmp(GetReturnLabel());
196 } else {
197 __ jmp(x86_codegen->GetLabelOf(successor_));
198 }
199 }
200
GetReturnLabel()201 Label* GetReturnLabel() {
202 DCHECK(successor_ == nullptr);
203 return &return_label_;
204 }
205
GetSuccessor() const206 HBasicBlock* GetSuccessor() const {
207 return successor_;
208 }
209
GetDescription() const210 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86"; }
211
212 private:
213 HBasicBlock* const successor_;
214 Label return_label_;
215
216 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
217 };
218
219 class LoadStringSlowPathX86 : public SlowPathCode {
220 public:
LoadStringSlowPathX86(HLoadString * instruction)221 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
222
EmitNativeCode(CodeGenerator * codegen)223 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
224 LocationSummary* locations = instruction_->GetLocations();
225 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
226
227 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
228 __ Bind(GetEntryLabel());
229 SaveLiveRegisters(codegen, locations);
230
231 InvokeRuntimeCallingConvention calling_convention;
232 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
233 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
234 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
235 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
236 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
237 RestoreLiveRegisters(codegen, locations);
238
239 // Store the resolved String to the BSS entry.
240 Register method_address = locations->InAt(0).AsRegister<Register>();
241 __ movl(Address(method_address, CodeGeneratorX86::kDummy32BitOffset),
242 locations->Out().AsRegister<Register>());
243 Label* fixup_label = x86_codegen->NewStringBssEntryPatch(instruction_->AsLoadString());
244 __ Bind(fixup_label);
245
246 __ jmp(GetExitLabel());
247 }
248
GetDescription() const249 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; }
250
251 private:
252 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
253 };
254
255 class LoadClassSlowPathX86 : public SlowPathCode {
256 public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)257 LoadClassSlowPathX86(HLoadClass* cls,
258 HInstruction* at,
259 uint32_t dex_pc,
260 bool do_clinit)
261 : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
262 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
263 }
264
EmitNativeCode(CodeGenerator * codegen)265 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
266 LocationSummary* locations = instruction_->GetLocations();
267 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
268 __ Bind(GetEntryLabel());
269 SaveLiveRegisters(codegen, locations);
270
271 InvokeRuntimeCallingConvention calling_convention;
272 dex::TypeIndex type_index = cls_->GetTypeIndex();
273 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
274 x86_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage
275 : kQuickInitializeType,
276 instruction_,
277 dex_pc_,
278 this);
279 if (do_clinit_) {
280 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
281 } else {
282 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
283 }
284
285 // Move the class to the desired location.
286 Location out = locations->Out();
287 if (out.IsValid()) {
288 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
289 x86_codegen->Move32(out, Location::RegisterLocation(EAX));
290 }
291 RestoreLiveRegisters(codegen, locations);
292 // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
293 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
294 if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
295 DCHECK(out.IsValid());
296 Register method_address = locations->InAt(0).AsRegister<Register>();
297 __ movl(Address(method_address, CodeGeneratorX86::kDummy32BitOffset),
298 locations->Out().AsRegister<Register>());
299 Label* fixup_label = x86_codegen->NewTypeBssEntryPatch(cls_);
300 __ Bind(fixup_label);
301 }
302 __ jmp(GetExitLabel());
303 }
304
GetDescription() const305 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86"; }
306
307 private:
308 // The class this slow path will load.
309 HLoadClass* const cls_;
310
311 // The dex PC of `at_`.
312 const uint32_t dex_pc_;
313
314 // Whether to initialize the class.
315 const bool do_clinit_;
316
317 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
318 };
319
320 class TypeCheckSlowPathX86 : public SlowPathCode {
321 public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)322 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
323 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
324
EmitNativeCode(CodeGenerator * codegen)325 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
326 LocationSummary* locations = instruction_->GetLocations();
327 DCHECK(instruction_->IsCheckCast()
328 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
329
330 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
331 __ Bind(GetEntryLabel());
332
333 if (!is_fatal_) {
334 SaveLiveRegisters(codegen, locations);
335 }
336
337 // We're moving two locations to locations that could overlap, so we need a parallel
338 // move resolver.
339 InvokeRuntimeCallingConvention calling_convention;
340 x86_codegen->EmitParallelMoves(locations->InAt(0),
341 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
342 Primitive::kPrimNot,
343 locations->InAt(1),
344 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
345 Primitive::kPrimNot);
346 if (instruction_->IsInstanceOf()) {
347 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
348 instruction_,
349 instruction_->GetDexPc(),
350 this);
351 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
352 } else {
353 DCHECK(instruction_->IsCheckCast());
354 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
355 instruction_,
356 instruction_->GetDexPc(),
357 this);
358 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
359 }
360
361 if (!is_fatal_) {
362 if (instruction_->IsInstanceOf()) {
363 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
364 }
365 RestoreLiveRegisters(codegen, locations);
366
367 __ jmp(GetExitLabel());
368 }
369 }
370
GetDescription() const371 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86"; }
IsFatal() const372 bool IsFatal() const OVERRIDE { return is_fatal_; }
373
374 private:
375 const bool is_fatal_;
376
377 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
378 };
379
380 class DeoptimizationSlowPathX86 : public SlowPathCode {
381 public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)382 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
383 : SlowPathCode(instruction) {}
384
EmitNativeCode(CodeGenerator * codegen)385 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
386 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
387 __ Bind(GetEntryLabel());
388 LocationSummary* locations = instruction_->GetLocations();
389 SaveLiveRegisters(codegen, locations);
390 InvokeRuntimeCallingConvention calling_convention;
391 x86_codegen->Load32BitValue(
392 calling_convention.GetRegisterAt(0),
393 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
394 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
395 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
396 }
397
GetDescription() const398 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
399
400 private:
401 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
402 };
403
404 class ArraySetSlowPathX86 : public SlowPathCode {
405 public:
ArraySetSlowPathX86(HInstruction * instruction)406 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
407
EmitNativeCode(CodeGenerator * codegen)408 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
409 LocationSummary* locations = instruction_->GetLocations();
410 __ Bind(GetEntryLabel());
411 SaveLiveRegisters(codegen, locations);
412
413 InvokeRuntimeCallingConvention calling_convention;
414 HParallelMove parallel_move(codegen->GetGraph()->GetArena());
415 parallel_move.AddMove(
416 locations->InAt(0),
417 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
418 Primitive::kPrimNot,
419 nullptr);
420 parallel_move.AddMove(
421 locations->InAt(1),
422 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
423 Primitive::kPrimInt,
424 nullptr);
425 parallel_move.AddMove(
426 locations->InAt(2),
427 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
428 Primitive::kPrimNot,
429 nullptr);
430 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
431
432 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
433 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
434 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
435 RestoreLiveRegisters(codegen, locations);
436 __ jmp(GetExitLabel());
437 }
438
GetDescription() const439 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; }
440
441 private:
442 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
443 };
444
445 // Slow path marking an object reference `ref` during a read
446 // barrier. The field `obj.field` in the object `obj` holding this
447 // reference does not get updated by this slow path after marking (see
448 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
449 //
450 // This means that after the execution of this slow path, `ref` will
451 // always be up-to-date, but `obj.field` may not; i.e., after the
452 // flip, `ref` will be a to-space reference, but `obj.field` will
453 // probably still be a from-space reference (unless it gets updated by
454 // another thread, or if another thread installed another object
455 // reference (different from `ref`) in `obj.field`).
456 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
457 public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)458 ReadBarrierMarkSlowPathX86(HInstruction* instruction,
459 Location ref,
460 bool unpoison_ref_before_marking)
461 : SlowPathCode(instruction),
462 ref_(ref),
463 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
464 DCHECK(kEmitCompilerReadBarrier);
465 }
466
GetDescription() const467 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; }
468
EmitNativeCode(CodeGenerator * codegen)469 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
470 LocationSummary* locations = instruction_->GetLocations();
471 Register ref_reg = ref_.AsRegister<Register>();
472 DCHECK(locations->CanCall());
473 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
474 DCHECK(instruction_->IsInstanceFieldGet() ||
475 instruction_->IsStaticFieldGet() ||
476 instruction_->IsArrayGet() ||
477 instruction_->IsArraySet() ||
478 instruction_->IsLoadClass() ||
479 instruction_->IsLoadString() ||
480 instruction_->IsInstanceOf() ||
481 instruction_->IsCheckCast() ||
482 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
483 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
484 << "Unexpected instruction in read barrier marking slow path: "
485 << instruction_->DebugName();
486
487 __ Bind(GetEntryLabel());
488 if (unpoison_ref_before_marking_) {
489 // Object* ref = ref_addr->AsMirrorPtr()
490 __ MaybeUnpoisonHeapReference(ref_reg);
491 }
492 // No need to save live registers; it's taken care of by the
493 // entrypoint. Also, there is no need to update the stack mask,
494 // as this runtime call will not trigger a garbage collection.
495 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
496 DCHECK_NE(ref_reg, ESP);
497 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
498 // "Compact" slow path, saving two moves.
499 //
500 // Instead of using the standard runtime calling convention (input
501 // and output in EAX):
502 //
503 // EAX <- ref
504 // EAX <- ReadBarrierMark(EAX)
505 // ref <- EAX
506 //
507 // we just use rX (the register containing `ref`) as input and output
508 // of a dedicated entrypoint:
509 //
510 // rX <- ReadBarrierMarkRegX(rX)
511 //
512 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
513 // This runtime call does not require a stack map.
514 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
515 __ jmp(GetExitLabel());
516 }
517
518 private:
519 // The location (register) of the marked object reference.
520 const Location ref_;
521 // Should the reference in `ref_` be unpoisoned prior to marking it?
522 const bool unpoison_ref_before_marking_;
523
524 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
525 };
526
527 // Slow path marking an object reference `ref` during a read barrier,
528 // and if needed, atomically updating the field `obj.field` in the
529 // object `obj` holding this reference after marking (contrary to
530 // ReadBarrierMarkSlowPathX86 above, which never tries to update
531 // `obj.field`).
532 //
533 // This means that after the execution of this slow path, both `ref`
534 // and `obj.field` will be up-to-date; i.e., after the flip, both will
535 // hold the same to-space reference (unless another thread installed
536 // another object reference (different from `ref`) in `obj.field`).
537 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
538 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)539 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
540 Location ref,
541 Register obj,
542 const Address& field_addr,
543 bool unpoison_ref_before_marking,
544 Register temp)
545 : SlowPathCode(instruction),
546 ref_(ref),
547 obj_(obj),
548 field_addr_(field_addr),
549 unpoison_ref_before_marking_(unpoison_ref_before_marking),
550 temp_(temp) {
551 DCHECK(kEmitCompilerReadBarrier);
552 }
553
GetDescription() const554 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
555
EmitNativeCode(CodeGenerator * codegen)556 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
557 LocationSummary* locations = instruction_->GetLocations();
558 Register ref_reg = ref_.AsRegister<Register>();
559 DCHECK(locations->CanCall());
560 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
561 // This slow path is only used by the UnsafeCASObject intrinsic.
562 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
563 << "Unexpected instruction in read barrier marking and field updating slow path: "
564 << instruction_->DebugName();
565 DCHECK(instruction_->GetLocations()->Intrinsified());
566 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
567
568 __ Bind(GetEntryLabel());
569 if (unpoison_ref_before_marking_) {
570 // Object* ref = ref_addr->AsMirrorPtr()
571 __ MaybeUnpoisonHeapReference(ref_reg);
572 }
573
574 // Save the old (unpoisoned) reference.
575 __ movl(temp_, ref_reg);
576
577 // No need to save live registers; it's taken care of by the
578 // entrypoint. Also, there is no need to update the stack mask,
579 // as this runtime call will not trigger a garbage collection.
580 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
581 DCHECK_NE(ref_reg, ESP);
582 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
583 // "Compact" slow path, saving two moves.
584 //
585 // Instead of using the standard runtime calling convention (input
586 // and output in EAX):
587 //
588 // EAX <- ref
589 // EAX <- ReadBarrierMark(EAX)
590 // ref <- EAX
591 //
592 // we just use rX (the register containing `ref`) as input and output
593 // of a dedicated entrypoint:
594 //
595 // rX <- ReadBarrierMarkRegX(rX)
596 //
597 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
598 // This runtime call does not require a stack map.
599 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
600
601 // If the new reference is different from the old reference,
602 // update the field in the holder (`*field_addr`).
603 //
604 // Note that this field could also hold a different object, if
605 // another thread had concurrently changed it. In that case, the
606 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
607 // operation below would abort the CAS, leaving the field as-is.
608 NearLabel done;
609 __ cmpl(temp_, ref_reg);
610 __ j(kEqual, &done);
611
612 // Update the the holder's field atomically. This may fail if
613 // mutator updates before us, but it's OK. This is achieved
614 // using a strong compare-and-set (CAS) operation with relaxed
615 // memory synchronization ordering, where the expected value is
616 // the old reference and the desired value is the new reference.
617 // This operation is implemented with a 32-bit LOCK CMPXLCHG
618 // instruction, which requires the expected value (the old
619 // reference) to be in EAX. Save EAX beforehand, and move the
620 // expected value (stored in `temp_`) into EAX.
621 __ pushl(EAX);
622 __ movl(EAX, temp_);
623
624 // Convenience aliases.
625 Register base = obj_;
626 Register expected = EAX;
627 Register value = ref_reg;
628
629 bool base_equals_value = (base == value);
630 if (kPoisonHeapReferences) {
631 if (base_equals_value) {
632 // If `base` and `value` are the same register location, move
633 // `value` to a temporary register. This way, poisoning
634 // `value` won't invalidate `base`.
635 value = temp_;
636 __ movl(value, base);
637 }
638
639 // Check that the register allocator did not assign the location
640 // of `expected` (EAX) to `value` nor to `base`, so that heap
641 // poisoning (when enabled) works as intended below.
642 // - If `value` were equal to `expected`, both references would
643 // be poisoned twice, meaning they would not be poisoned at
644 // all, as heap poisoning uses address negation.
645 // - If `base` were equal to `expected`, poisoning `expected`
646 // would invalidate `base`.
647 DCHECK_NE(value, expected);
648 DCHECK_NE(base, expected);
649
650 __ PoisonHeapReference(expected);
651 __ PoisonHeapReference(value);
652 }
653
654 __ LockCmpxchgl(field_addr_, value);
655
656 // If heap poisoning is enabled, we need to unpoison the values
657 // that were poisoned earlier.
658 if (kPoisonHeapReferences) {
659 if (base_equals_value) {
660 // `value` has been moved to a temporary register, no need
661 // to unpoison it.
662 } else {
663 __ UnpoisonHeapReference(value);
664 }
665 // No need to unpoison `expected` (EAX), as it is be overwritten below.
666 }
667
668 // Restore EAX.
669 __ popl(EAX);
670
671 __ Bind(&done);
672 __ jmp(GetExitLabel());
673 }
674
675 private:
676 // The location (register) of the marked object reference.
677 const Location ref_;
678 // The register containing the object holding the marked object reference field.
679 const Register obj_;
680 // The address of the marked reference field. The base of this address must be `obj_`.
681 const Address field_addr_;
682
683 // Should the reference in `ref_` be unpoisoned prior to marking it?
684 const bool unpoison_ref_before_marking_;
685
686 const Register temp_;
687
688 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
689 };
690
691 // Slow path generating a read barrier for a heap reference.
692 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
693 public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)694 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
695 Location out,
696 Location ref,
697 Location obj,
698 uint32_t offset,
699 Location index)
700 : SlowPathCode(instruction),
701 out_(out),
702 ref_(ref),
703 obj_(obj),
704 offset_(offset),
705 index_(index) {
706 DCHECK(kEmitCompilerReadBarrier);
707 // If `obj` is equal to `out` or `ref`, it means the initial object
708 // has been overwritten by (or after) the heap object reference load
709 // to be instrumented, e.g.:
710 //
711 // __ movl(out, Address(out, offset));
712 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
713 //
714 // In that case, we have lost the information about the original
715 // object, and the emitted read barrier cannot work properly.
716 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
717 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
718 }
719
EmitNativeCode(CodeGenerator * codegen)720 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
721 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
722 LocationSummary* locations = instruction_->GetLocations();
723 Register reg_out = out_.AsRegister<Register>();
724 DCHECK(locations->CanCall());
725 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
726 DCHECK(instruction_->IsInstanceFieldGet() ||
727 instruction_->IsStaticFieldGet() ||
728 instruction_->IsArrayGet() ||
729 instruction_->IsInstanceOf() ||
730 instruction_->IsCheckCast() ||
731 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
732 << "Unexpected instruction in read barrier for heap reference slow path: "
733 << instruction_->DebugName();
734
735 __ Bind(GetEntryLabel());
736 SaveLiveRegisters(codegen, locations);
737
738 // We may have to change the index's value, but as `index_` is a
739 // constant member (like other "inputs" of this slow path),
740 // introduce a copy of it, `index`.
741 Location index = index_;
742 if (index_.IsValid()) {
743 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
744 if (instruction_->IsArrayGet()) {
745 // Compute the actual memory offset and store it in `index`.
746 Register index_reg = index_.AsRegister<Register>();
747 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
748 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
749 // We are about to change the value of `index_reg` (see the
750 // calls to art::x86::X86Assembler::shll and
751 // art::x86::X86Assembler::AddImmediate below), but it has
752 // not been saved by the previous call to
753 // art::SlowPathCode::SaveLiveRegisters, as it is a
754 // callee-save register --
755 // art::SlowPathCode::SaveLiveRegisters does not consider
756 // callee-save registers, as it has been designed with the
757 // assumption that callee-save registers are supposed to be
758 // handled by the called function. So, as a callee-save
759 // register, `index_reg` _would_ eventually be saved onto
760 // the stack, but it would be too late: we would have
761 // changed its value earlier. Therefore, we manually save
762 // it here into another freely available register,
763 // `free_reg`, chosen of course among the caller-save
764 // registers (as a callee-save `free_reg` register would
765 // exhibit the same problem).
766 //
767 // Note we could have requested a temporary register from
768 // the register allocator instead; but we prefer not to, as
769 // this is a slow path, and we know we can find a
770 // caller-save register that is available.
771 Register free_reg = FindAvailableCallerSaveRegister(codegen);
772 __ movl(free_reg, index_reg);
773 index_reg = free_reg;
774 index = Location::RegisterLocation(index_reg);
775 } else {
776 // The initial register stored in `index_` has already been
777 // saved in the call to art::SlowPathCode::SaveLiveRegisters
778 // (as it is not a callee-save register), so we can freely
779 // use it.
780 }
781 // Shifting the index value contained in `index_reg` by the scale
782 // factor (2) cannot overflow in practice, as the runtime is
783 // unable to allocate object arrays with a size larger than
784 // 2^26 - 1 (that is, 2^28 - 4 bytes).
785 __ shll(index_reg, Immediate(TIMES_4));
786 static_assert(
787 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
788 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
789 __ AddImmediate(index_reg, Immediate(offset_));
790 } else {
791 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
792 // intrinsics, `index_` is not shifted by a scale factor of 2
793 // (as in the case of ArrayGet), as it is actually an offset
794 // to an object field within an object.
795 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
796 DCHECK(instruction_->GetLocations()->Intrinsified());
797 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
798 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
799 << instruction_->AsInvoke()->GetIntrinsic();
800 DCHECK_EQ(offset_, 0U);
801 DCHECK(index_.IsRegisterPair());
802 // UnsafeGet's offset location is a register pair, the low
803 // part contains the correct offset.
804 index = index_.ToLow();
805 }
806 }
807
808 // We're moving two or three locations to locations that could
809 // overlap, so we need a parallel move resolver.
810 InvokeRuntimeCallingConvention calling_convention;
811 HParallelMove parallel_move(codegen->GetGraph()->GetArena());
812 parallel_move.AddMove(ref_,
813 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
814 Primitive::kPrimNot,
815 nullptr);
816 parallel_move.AddMove(obj_,
817 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
818 Primitive::kPrimNot,
819 nullptr);
820 if (index.IsValid()) {
821 parallel_move.AddMove(index,
822 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
823 Primitive::kPrimInt,
824 nullptr);
825 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
826 } else {
827 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
828 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
829 }
830 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
831 CheckEntrypointTypes<
832 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
833 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
834
835 RestoreLiveRegisters(codegen, locations);
836 __ jmp(GetExitLabel());
837 }
838
GetDescription() const839 const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86"; }
840
841 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)842 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
843 size_t ref = static_cast<int>(ref_.AsRegister<Register>());
844 size_t obj = static_cast<int>(obj_.AsRegister<Register>());
845 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
846 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
847 return static_cast<Register>(i);
848 }
849 }
850 // We shall never fail to find a free caller-save register, as
851 // there are more than two core caller-save registers on x86
852 // (meaning it is possible to find one which is different from
853 // `ref` and `obj`).
854 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
855 LOG(FATAL) << "Could not find a free caller-save register";
856 UNREACHABLE();
857 }
858
859 const Location out_;
860 const Location ref_;
861 const Location obj_;
862 const uint32_t offset_;
863 // An additional location containing an index to an array.
864 // Only used for HArrayGet and the UnsafeGetObject &
865 // UnsafeGetObjectVolatile intrinsics.
866 const Location index_;
867
868 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
869 };
870
871 // Slow path generating a read barrier for a GC root.
872 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
873 public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)874 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
875 : SlowPathCode(instruction), out_(out), root_(root) {
876 DCHECK(kEmitCompilerReadBarrier);
877 }
878
EmitNativeCode(CodeGenerator * codegen)879 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
880 LocationSummary* locations = instruction_->GetLocations();
881 Register reg_out = out_.AsRegister<Register>();
882 DCHECK(locations->CanCall());
883 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
884 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
885 << "Unexpected instruction in read barrier for GC root slow path: "
886 << instruction_->DebugName();
887
888 __ Bind(GetEntryLabel());
889 SaveLiveRegisters(codegen, locations);
890
891 InvokeRuntimeCallingConvention calling_convention;
892 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
893 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
894 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
895 instruction_,
896 instruction_->GetDexPc(),
897 this);
898 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
899 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
900
901 RestoreLiveRegisters(codegen, locations);
902 __ jmp(GetExitLabel());
903 }
904
GetDescription() const905 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; }
906
907 private:
908 const Location out_;
909 const Location root_;
910
911 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
912 };
913
914 #undef __
915 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
916 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
917
X86Condition(IfCondition cond)918 inline Condition X86Condition(IfCondition cond) {
919 switch (cond) {
920 case kCondEQ: return kEqual;
921 case kCondNE: return kNotEqual;
922 case kCondLT: return kLess;
923 case kCondLE: return kLessEqual;
924 case kCondGT: return kGreater;
925 case kCondGE: return kGreaterEqual;
926 case kCondB: return kBelow;
927 case kCondBE: return kBelowEqual;
928 case kCondA: return kAbove;
929 case kCondAE: return kAboveEqual;
930 }
931 LOG(FATAL) << "Unreachable";
932 UNREACHABLE();
933 }
934
935 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)936 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
937 switch (cond) {
938 case kCondEQ: return kEqual;
939 case kCondNE: return kNotEqual;
940 // Signed to unsigned, and FP to x86 name.
941 case kCondLT: return kBelow;
942 case kCondLE: return kBelowEqual;
943 case kCondGT: return kAbove;
944 case kCondGE: return kAboveEqual;
945 // Unsigned remain unchanged.
946 case kCondB: return kBelow;
947 case kCondBE: return kBelowEqual;
948 case kCondA: return kAbove;
949 case kCondAE: return kAboveEqual;
950 }
951 LOG(FATAL) << "Unreachable";
952 UNREACHABLE();
953 }
954
DumpCoreRegister(std::ostream & stream,int reg) const955 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
956 stream << Register(reg);
957 }
958
DumpFloatingPointRegister(std::ostream & stream,int reg) const959 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
960 stream << XmmRegister(reg);
961 }
962
SaveCoreRegister(size_t stack_index,uint32_t reg_id)963 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
964 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
965 return kX86WordSize;
966 }
967
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)968 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
969 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
970 return kX86WordSize;
971 }
972
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)973 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
974 if (GetGraph()->HasSIMD()) {
975 __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
976 } else {
977 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
978 }
979 return GetFloatingPointSpillSlotSize();
980 }
981
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)982 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
983 if (GetGraph()->HasSIMD()) {
984 __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
985 } else {
986 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
987 }
988 return GetFloatingPointSpillSlotSize();
989 }
990
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)991 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
992 HInstruction* instruction,
993 uint32_t dex_pc,
994 SlowPathCode* slow_path) {
995 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
996 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
997 if (EntrypointRequiresStackMap(entrypoint)) {
998 RecordPcInfo(instruction, dex_pc, slow_path);
999 }
1000 }
1001
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1002 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1003 HInstruction* instruction,
1004 SlowPathCode* slow_path) {
1005 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1006 GenerateInvokeRuntime(entry_point_offset);
1007 }
1008
GenerateInvokeRuntime(int32_t entry_point_offset)1009 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1010 __ fs()->call(Address::Absolute(entry_point_offset));
1011 }
1012
CodeGeneratorX86(HGraph * graph,const X86InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1013 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1014 const X86InstructionSetFeatures& isa_features,
1015 const CompilerOptions& compiler_options,
1016 OptimizingCompilerStats* stats)
1017 : CodeGenerator(graph,
1018 kNumberOfCpuRegisters,
1019 kNumberOfXmmRegisters,
1020 kNumberOfRegisterPairs,
1021 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1022 arraysize(kCoreCalleeSaves))
1023 | (1 << kFakeReturnRegister),
1024 0,
1025 compiler_options,
1026 stats),
1027 block_labels_(nullptr),
1028 location_builder_(graph, this),
1029 instruction_visitor_(graph, this),
1030 move_resolver_(graph->GetArena(), this),
1031 assembler_(graph->GetArena()),
1032 isa_features_(isa_features),
1033 boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1034 method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1035 boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1036 type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1037 string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1038 jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1039 jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1040 constant_area_start_(-1),
1041 fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1042 method_address_offset_(std::less<uint32_t>(),
1043 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1044 // Use a fake return address register to mimic Quick.
1045 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1046 }
1047
SetupBlockedRegisters() const1048 void CodeGeneratorX86::SetupBlockedRegisters() const {
1049 // Stack register is always reserved.
1050 blocked_core_registers_[ESP] = true;
1051 }
1052
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1053 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1054 : InstructionCodeGenerator(graph, codegen),
1055 assembler_(codegen->GetAssembler()),
1056 codegen_(codegen) {}
1057
DWARFReg(Register reg)1058 static dwarf::Reg DWARFReg(Register reg) {
1059 return dwarf::Reg::X86Core(static_cast<int>(reg));
1060 }
1061
GenerateFrameEntry()1062 void CodeGeneratorX86::GenerateFrameEntry() {
1063 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
1064 __ Bind(&frame_entry_label_);
1065 bool skip_overflow_check =
1066 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1067 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1068
1069 if (!skip_overflow_check) {
1070 __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86))));
1071 RecordPcInfo(nullptr, 0);
1072 }
1073
1074 if (HasEmptyFrame()) {
1075 return;
1076 }
1077
1078 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1079 Register reg = kCoreCalleeSaves[i];
1080 if (allocated_registers_.ContainsCoreRegister(reg)) {
1081 __ pushl(reg);
1082 __ cfi().AdjustCFAOffset(kX86WordSize);
1083 __ cfi().RelOffset(DWARFReg(reg), 0);
1084 }
1085 }
1086
1087 int adjust = GetFrameSize() - FrameEntrySpillSize();
1088 __ subl(ESP, Immediate(adjust));
1089 __ cfi().AdjustCFAOffset(adjust);
1090 // Save the current method if we need it. Note that we do not
1091 // do this in HCurrentMethod, as the instruction might have been removed
1092 // in the SSA graph.
1093 if (RequiresCurrentMethod()) {
1094 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1095 }
1096
1097 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1098 // Initialize should_deoptimize flag to 0.
1099 __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1100 }
1101 }
1102
GenerateFrameExit()1103 void CodeGeneratorX86::GenerateFrameExit() {
1104 __ cfi().RememberState();
1105 if (!HasEmptyFrame()) {
1106 int adjust = GetFrameSize() - FrameEntrySpillSize();
1107 __ addl(ESP, Immediate(adjust));
1108 __ cfi().AdjustCFAOffset(-adjust);
1109
1110 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1111 Register reg = kCoreCalleeSaves[i];
1112 if (allocated_registers_.ContainsCoreRegister(reg)) {
1113 __ popl(reg);
1114 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1115 __ cfi().Restore(DWARFReg(reg));
1116 }
1117 }
1118 }
1119 __ ret();
1120 __ cfi().RestoreState();
1121 __ cfi().DefCFAOffset(GetFrameSize());
1122 }
1123
Bind(HBasicBlock * block)1124 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1125 __ Bind(GetLabelOf(block));
1126 }
1127
GetReturnLocation(Primitive::Type type) const1128 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(Primitive::Type type) const {
1129 switch (type) {
1130 case Primitive::kPrimBoolean:
1131 case Primitive::kPrimByte:
1132 case Primitive::kPrimChar:
1133 case Primitive::kPrimShort:
1134 case Primitive::kPrimInt:
1135 case Primitive::kPrimNot:
1136 return Location::RegisterLocation(EAX);
1137
1138 case Primitive::kPrimLong:
1139 return Location::RegisterPairLocation(EAX, EDX);
1140
1141 case Primitive::kPrimVoid:
1142 return Location::NoLocation();
1143
1144 case Primitive::kPrimDouble:
1145 case Primitive::kPrimFloat:
1146 return Location::FpuRegisterLocation(XMM0);
1147 }
1148
1149 UNREACHABLE();
1150 }
1151
GetMethodLocation() const1152 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1153 return Location::RegisterLocation(kMethodRegisterArgument);
1154 }
1155
GetNextLocation(Primitive::Type type)1156 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type type) {
1157 switch (type) {
1158 case Primitive::kPrimBoolean:
1159 case Primitive::kPrimByte:
1160 case Primitive::kPrimChar:
1161 case Primitive::kPrimShort:
1162 case Primitive::kPrimInt:
1163 case Primitive::kPrimNot: {
1164 uint32_t index = gp_index_++;
1165 stack_index_++;
1166 if (index < calling_convention.GetNumberOfRegisters()) {
1167 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1168 } else {
1169 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1170 }
1171 }
1172
1173 case Primitive::kPrimLong: {
1174 uint32_t index = gp_index_;
1175 gp_index_ += 2;
1176 stack_index_ += 2;
1177 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1178 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1179 calling_convention.GetRegisterPairAt(index));
1180 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1181 } else {
1182 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1183 }
1184 }
1185
1186 case Primitive::kPrimFloat: {
1187 uint32_t index = float_index_++;
1188 stack_index_++;
1189 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1190 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1191 } else {
1192 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1193 }
1194 }
1195
1196 case Primitive::kPrimDouble: {
1197 uint32_t index = float_index_++;
1198 stack_index_ += 2;
1199 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1200 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1201 } else {
1202 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1203 }
1204 }
1205
1206 case Primitive::kPrimVoid:
1207 LOG(FATAL) << "Unexpected parameter type " << type;
1208 break;
1209 }
1210 return Location::NoLocation();
1211 }
1212
Move32(Location destination,Location source)1213 void CodeGeneratorX86::Move32(Location destination, Location source) {
1214 if (source.Equals(destination)) {
1215 return;
1216 }
1217 if (destination.IsRegister()) {
1218 if (source.IsRegister()) {
1219 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1220 } else if (source.IsFpuRegister()) {
1221 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1222 } else {
1223 DCHECK(source.IsStackSlot());
1224 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1225 }
1226 } else if (destination.IsFpuRegister()) {
1227 if (source.IsRegister()) {
1228 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1229 } else if (source.IsFpuRegister()) {
1230 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1231 } else {
1232 DCHECK(source.IsStackSlot());
1233 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1234 }
1235 } else {
1236 DCHECK(destination.IsStackSlot()) << destination;
1237 if (source.IsRegister()) {
1238 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1239 } else if (source.IsFpuRegister()) {
1240 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1241 } else if (source.IsConstant()) {
1242 HConstant* constant = source.GetConstant();
1243 int32_t value = GetInt32ValueOf(constant);
1244 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1245 } else {
1246 DCHECK(source.IsStackSlot());
1247 __ pushl(Address(ESP, source.GetStackIndex()));
1248 __ popl(Address(ESP, destination.GetStackIndex()));
1249 }
1250 }
1251 }
1252
Move64(Location destination,Location source)1253 void CodeGeneratorX86::Move64(Location destination, Location source) {
1254 if (source.Equals(destination)) {
1255 return;
1256 }
1257 if (destination.IsRegisterPair()) {
1258 if (source.IsRegisterPair()) {
1259 EmitParallelMoves(
1260 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1261 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1262 Primitive::kPrimInt,
1263 Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1264 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1265 Primitive::kPrimInt);
1266 } else if (source.IsFpuRegister()) {
1267 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1268 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1269 __ psrlq(src_reg, Immediate(32));
1270 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1271 } else {
1272 // No conflict possible, so just do the moves.
1273 DCHECK(source.IsDoubleStackSlot());
1274 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1275 __ movl(destination.AsRegisterPairHigh<Register>(),
1276 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1277 }
1278 } else if (destination.IsFpuRegister()) {
1279 if (source.IsFpuRegister()) {
1280 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1281 } else if (source.IsDoubleStackSlot()) {
1282 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1283 } else if (source.IsRegisterPair()) {
1284 size_t elem_size = Primitive::ComponentSize(Primitive::kPrimInt);
1285 // Create stack space for 2 elements.
1286 __ subl(ESP, Immediate(2 * elem_size));
1287 __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
1288 __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
1289 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1290 // And remove the temporary stack space we allocated.
1291 __ addl(ESP, Immediate(2 * elem_size));
1292 } else {
1293 LOG(FATAL) << "Unimplemented";
1294 }
1295 } else {
1296 DCHECK(destination.IsDoubleStackSlot()) << destination;
1297 if (source.IsRegisterPair()) {
1298 // No conflict possible, so just do the moves.
1299 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1300 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1301 source.AsRegisterPairHigh<Register>());
1302 } else if (source.IsFpuRegister()) {
1303 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1304 } else if (source.IsConstant()) {
1305 HConstant* constant = source.GetConstant();
1306 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1307 int64_t value = GetInt64ValueOf(constant);
1308 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1309 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1310 Immediate(High32Bits(value)));
1311 } else {
1312 DCHECK(source.IsDoubleStackSlot()) << source;
1313 EmitParallelMoves(
1314 Location::StackSlot(source.GetStackIndex()),
1315 Location::StackSlot(destination.GetStackIndex()),
1316 Primitive::kPrimInt,
1317 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1318 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1319 Primitive::kPrimInt);
1320 }
1321 }
1322 }
1323
MoveConstant(Location location,int32_t value)1324 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1325 DCHECK(location.IsRegister());
1326 __ movl(location.AsRegister<Register>(), Immediate(value));
1327 }
1328
MoveLocation(Location dst,Location src,Primitive::Type dst_type)1329 void CodeGeneratorX86::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
1330 HParallelMove move(GetGraph()->GetArena());
1331 if (dst_type == Primitive::kPrimLong && !src.IsConstant() && !src.IsFpuRegister()) {
1332 move.AddMove(src.ToLow(), dst.ToLow(), Primitive::kPrimInt, nullptr);
1333 move.AddMove(src.ToHigh(), dst.ToHigh(), Primitive::kPrimInt, nullptr);
1334 } else {
1335 move.AddMove(src, dst, dst_type, nullptr);
1336 }
1337 GetMoveResolver()->EmitNativeCode(&move);
1338 }
1339
AddLocationAsTemp(Location location,LocationSummary * locations)1340 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1341 if (location.IsRegister()) {
1342 locations->AddTemp(location);
1343 } else if (location.IsRegisterPair()) {
1344 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1345 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1346 } else {
1347 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1348 }
1349 }
1350
HandleGoto(HInstruction * got,HBasicBlock * successor)1351 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1352 DCHECK(!successor->IsExitBlock());
1353
1354 HBasicBlock* block = got->GetBlock();
1355 HInstruction* previous = got->GetPrevious();
1356
1357 HLoopInformation* info = block->GetLoopInformation();
1358 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1359 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1360 return;
1361 }
1362
1363 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1364 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1365 }
1366 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1367 __ jmp(codegen_->GetLabelOf(successor));
1368 }
1369 }
1370
VisitGoto(HGoto * got)1371 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1372 got->SetLocations(nullptr);
1373 }
1374
VisitGoto(HGoto * got)1375 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1376 HandleGoto(got, got->GetSuccessor());
1377 }
1378
VisitTryBoundary(HTryBoundary * try_boundary)1379 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1380 try_boundary->SetLocations(nullptr);
1381 }
1382
VisitTryBoundary(HTryBoundary * try_boundary)1383 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1384 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1385 if (!successor->IsExitBlock()) {
1386 HandleGoto(try_boundary, successor);
1387 }
1388 }
1389
VisitExit(HExit * exit)1390 void LocationsBuilderX86::VisitExit(HExit* exit) {
1391 exit->SetLocations(nullptr);
1392 }
1393
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1394 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1395 }
1396
1397 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1398 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1399 LabelType* true_label,
1400 LabelType* false_label) {
1401 if (cond->IsFPConditionTrueIfNaN()) {
1402 __ j(kUnordered, true_label);
1403 } else if (cond->IsFPConditionFalseIfNaN()) {
1404 __ j(kUnordered, false_label);
1405 }
1406 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1407 }
1408
1409 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1410 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1411 LabelType* true_label,
1412 LabelType* false_label) {
1413 LocationSummary* locations = cond->GetLocations();
1414 Location left = locations->InAt(0);
1415 Location right = locations->InAt(1);
1416 IfCondition if_cond = cond->GetCondition();
1417
1418 Register left_high = left.AsRegisterPairHigh<Register>();
1419 Register left_low = left.AsRegisterPairLow<Register>();
1420 IfCondition true_high_cond = if_cond;
1421 IfCondition false_high_cond = cond->GetOppositeCondition();
1422 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part
1423
1424 // Set the conditions for the test, remembering that == needs to be
1425 // decided using the low words.
1426 switch (if_cond) {
1427 case kCondEQ:
1428 case kCondNE:
1429 // Nothing to do.
1430 break;
1431 case kCondLT:
1432 false_high_cond = kCondGT;
1433 break;
1434 case kCondLE:
1435 true_high_cond = kCondLT;
1436 break;
1437 case kCondGT:
1438 false_high_cond = kCondLT;
1439 break;
1440 case kCondGE:
1441 true_high_cond = kCondGT;
1442 break;
1443 case kCondB:
1444 false_high_cond = kCondA;
1445 break;
1446 case kCondBE:
1447 true_high_cond = kCondB;
1448 break;
1449 case kCondA:
1450 false_high_cond = kCondB;
1451 break;
1452 case kCondAE:
1453 true_high_cond = kCondA;
1454 break;
1455 }
1456
1457 if (right.IsConstant()) {
1458 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1459 int32_t val_high = High32Bits(value);
1460 int32_t val_low = Low32Bits(value);
1461
1462 codegen_->Compare32BitValue(left_high, val_high);
1463 if (if_cond == kCondNE) {
1464 __ j(X86Condition(true_high_cond), true_label);
1465 } else if (if_cond == kCondEQ) {
1466 __ j(X86Condition(false_high_cond), false_label);
1467 } else {
1468 __ j(X86Condition(true_high_cond), true_label);
1469 __ j(X86Condition(false_high_cond), false_label);
1470 }
1471 // Must be equal high, so compare the lows.
1472 codegen_->Compare32BitValue(left_low, val_low);
1473 } else if (right.IsRegisterPair()) {
1474 Register right_high = right.AsRegisterPairHigh<Register>();
1475 Register right_low = right.AsRegisterPairLow<Register>();
1476
1477 __ cmpl(left_high, right_high);
1478 if (if_cond == kCondNE) {
1479 __ j(X86Condition(true_high_cond), true_label);
1480 } else if (if_cond == kCondEQ) {
1481 __ j(X86Condition(false_high_cond), false_label);
1482 } else {
1483 __ j(X86Condition(true_high_cond), true_label);
1484 __ j(X86Condition(false_high_cond), false_label);
1485 }
1486 // Must be equal high, so compare the lows.
1487 __ cmpl(left_low, right_low);
1488 } else {
1489 DCHECK(right.IsDoubleStackSlot());
1490 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1491 if (if_cond == kCondNE) {
1492 __ j(X86Condition(true_high_cond), true_label);
1493 } else if (if_cond == kCondEQ) {
1494 __ j(X86Condition(false_high_cond), false_label);
1495 } else {
1496 __ j(X86Condition(true_high_cond), true_label);
1497 __ j(X86Condition(false_high_cond), false_label);
1498 }
1499 // Must be equal high, so compare the lows.
1500 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1501 }
1502 // The last comparison might be unsigned.
1503 __ j(final_condition, true_label);
1504 }
1505
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1506 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1507 Location rhs,
1508 HInstruction* insn,
1509 bool is_double) {
1510 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1511 if (is_double) {
1512 if (rhs.IsFpuRegister()) {
1513 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1514 } else if (const_area != nullptr) {
1515 DCHECK(const_area->IsEmittedAtUseSite());
1516 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1517 codegen_->LiteralDoubleAddress(
1518 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1519 const_area->GetBaseMethodAddress(),
1520 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1521 } else {
1522 DCHECK(rhs.IsDoubleStackSlot());
1523 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1524 }
1525 } else {
1526 if (rhs.IsFpuRegister()) {
1527 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1528 } else if (const_area != nullptr) {
1529 DCHECK(const_area->IsEmittedAtUseSite());
1530 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
1531 codegen_->LiteralFloatAddress(
1532 const_area->GetConstant()->AsFloatConstant()->GetValue(),
1533 const_area->GetBaseMethodAddress(),
1534 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1535 } else {
1536 DCHECK(rhs.IsStackSlot());
1537 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1538 }
1539 }
1540 }
1541
1542 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1543 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
1544 LabelType* true_target_in,
1545 LabelType* false_target_in) {
1546 // Generated branching requires both targets to be explicit. If either of the
1547 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1548 LabelType fallthrough_target;
1549 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1550 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1551
1552 LocationSummary* locations = condition->GetLocations();
1553 Location left = locations->InAt(0);
1554 Location right = locations->InAt(1);
1555
1556 Primitive::Type type = condition->InputAt(0)->GetType();
1557 switch (type) {
1558 case Primitive::kPrimLong:
1559 GenerateLongComparesAndJumps(condition, true_target, false_target);
1560 break;
1561 case Primitive::kPrimFloat:
1562 GenerateFPCompare(left, right, condition, false);
1563 GenerateFPJumps(condition, true_target, false_target);
1564 break;
1565 case Primitive::kPrimDouble:
1566 GenerateFPCompare(left, right, condition, true);
1567 GenerateFPJumps(condition, true_target, false_target);
1568 break;
1569 default:
1570 LOG(FATAL) << "Unexpected compare type " << type;
1571 }
1572
1573 if (false_target != &fallthrough_target) {
1574 __ jmp(false_target);
1575 }
1576
1577 if (fallthrough_target.IsLinked()) {
1578 __ Bind(&fallthrough_target);
1579 }
1580 }
1581
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1582 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1583 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1584 // are set only strictly before `branch`. We can't use the eflags on long/FP
1585 // conditions if they are materialized due to the complex branching.
1586 return cond->IsCondition() &&
1587 cond->GetNext() == branch &&
1588 cond->InputAt(0)->GetType() != Primitive::kPrimLong &&
1589 !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
1590 }
1591
1592 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1593 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
1594 size_t condition_input_index,
1595 LabelType* true_target,
1596 LabelType* false_target) {
1597 HInstruction* cond = instruction->InputAt(condition_input_index);
1598
1599 if (true_target == nullptr && false_target == nullptr) {
1600 // Nothing to do. The code always falls through.
1601 return;
1602 } else if (cond->IsIntConstant()) {
1603 // Constant condition, statically compared against "true" (integer value 1).
1604 if (cond->AsIntConstant()->IsTrue()) {
1605 if (true_target != nullptr) {
1606 __ jmp(true_target);
1607 }
1608 } else {
1609 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1610 if (false_target != nullptr) {
1611 __ jmp(false_target);
1612 }
1613 }
1614 return;
1615 }
1616
1617 // The following code generates these patterns:
1618 // (1) true_target == nullptr && false_target != nullptr
1619 // - opposite condition true => branch to false_target
1620 // (2) true_target != nullptr && false_target == nullptr
1621 // - condition true => branch to true_target
1622 // (3) true_target != nullptr && false_target != nullptr
1623 // - condition true => branch to true_target
1624 // - branch to false_target
1625 if (IsBooleanValueOrMaterializedCondition(cond)) {
1626 if (AreEflagsSetFrom(cond, instruction)) {
1627 if (true_target == nullptr) {
1628 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
1629 } else {
1630 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
1631 }
1632 } else {
1633 // Materialized condition, compare against 0.
1634 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1635 if (lhs.IsRegister()) {
1636 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
1637 } else {
1638 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
1639 }
1640 if (true_target == nullptr) {
1641 __ j(kEqual, false_target);
1642 } else {
1643 __ j(kNotEqual, true_target);
1644 }
1645 }
1646 } else {
1647 // Condition has not been materialized, use its inputs as the comparison and
1648 // its condition as the branch condition.
1649 HCondition* condition = cond->AsCondition();
1650
1651 // If this is a long or FP comparison that has been folded into
1652 // the HCondition, generate the comparison directly.
1653 Primitive::Type type = condition->InputAt(0)->GetType();
1654 if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
1655 GenerateCompareTestAndBranch(condition, true_target, false_target);
1656 return;
1657 }
1658
1659 Location lhs = condition->GetLocations()->InAt(0);
1660 Location rhs = condition->GetLocations()->InAt(1);
1661 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
1662 codegen_->GenerateIntCompare(lhs, rhs);
1663 if (true_target == nullptr) {
1664 __ j(X86Condition(condition->GetOppositeCondition()), false_target);
1665 } else {
1666 __ j(X86Condition(condition->GetCondition()), true_target);
1667 }
1668 }
1669
1670 // If neither branch falls through (case 3), the conditional branch to `true_target`
1671 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1672 if (true_target != nullptr && false_target != nullptr) {
1673 __ jmp(false_target);
1674 }
1675 }
1676
VisitIf(HIf * if_instr)1677 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
1678 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
1679 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1680 locations->SetInAt(0, Location::Any());
1681 }
1682 }
1683
VisitIf(HIf * if_instr)1684 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
1685 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1686 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1687 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1688 nullptr : codegen_->GetLabelOf(true_successor);
1689 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1690 nullptr : codegen_->GetLabelOf(false_successor);
1691 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1692 }
1693
VisitDeoptimize(HDeoptimize * deoptimize)1694 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1695 LocationSummary* locations = new (GetGraph()->GetArena())
1696 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1697 InvokeRuntimeCallingConvention calling_convention;
1698 RegisterSet caller_saves = RegisterSet::Empty();
1699 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1700 locations->SetCustomSlowPathCallerSaves(caller_saves);
1701 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1702 locations->SetInAt(0, Location::Any());
1703 }
1704 }
1705
VisitDeoptimize(HDeoptimize * deoptimize)1706 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1707 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
1708 GenerateTestAndBranch<Label>(deoptimize,
1709 /* condition_input_index */ 0,
1710 slow_path->GetEntryLabel(),
1711 /* false_target */ nullptr);
1712 }
1713
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1714 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1715 LocationSummary* locations = new (GetGraph()->GetArena())
1716 LocationSummary(flag, LocationSummary::kNoCall);
1717 locations->SetOut(Location::RequiresRegister());
1718 }
1719
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1720 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1721 __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
1722 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1723 }
1724
SelectCanUseCMOV(HSelect * select)1725 static bool SelectCanUseCMOV(HSelect* select) {
1726 // There are no conditional move instructions for XMMs.
1727 if (Primitive::IsFloatingPointType(select->GetType())) {
1728 return false;
1729 }
1730
1731 // A FP condition doesn't generate the single CC that we need.
1732 // In 32 bit mode, a long condition doesn't generate a single CC either.
1733 HInstruction* condition = select->GetCondition();
1734 if (condition->IsCondition()) {
1735 Primitive::Type compare_type = condition->InputAt(0)->GetType();
1736 if (compare_type == Primitive::kPrimLong ||
1737 Primitive::IsFloatingPointType(compare_type)) {
1738 return false;
1739 }
1740 }
1741
1742 // We can generate a CMOV for this Select.
1743 return true;
1744 }
1745
VisitSelect(HSelect * select)1746 void LocationsBuilderX86::VisitSelect(HSelect* select) {
1747 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
1748 if (Primitive::IsFloatingPointType(select->GetType())) {
1749 locations->SetInAt(0, Location::RequiresFpuRegister());
1750 locations->SetInAt(1, Location::Any());
1751 } else {
1752 locations->SetInAt(0, Location::RequiresRegister());
1753 if (SelectCanUseCMOV(select)) {
1754 if (select->InputAt(1)->IsConstant()) {
1755 // Cmov can't handle a constant value.
1756 locations->SetInAt(1, Location::RequiresRegister());
1757 } else {
1758 locations->SetInAt(1, Location::Any());
1759 }
1760 } else {
1761 locations->SetInAt(1, Location::Any());
1762 }
1763 }
1764 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1765 locations->SetInAt(2, Location::RequiresRegister());
1766 }
1767 locations->SetOut(Location::SameAsFirstInput());
1768 }
1769
VisitSelect(HSelect * select)1770 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
1771 LocationSummary* locations = select->GetLocations();
1772 DCHECK(locations->InAt(0).Equals(locations->Out()));
1773 if (SelectCanUseCMOV(select)) {
1774 // If both the condition and the source types are integer, we can generate
1775 // a CMOV to implement Select.
1776
1777 HInstruction* select_condition = select->GetCondition();
1778 Condition cond = kNotEqual;
1779
1780 // Figure out how to test the 'condition'.
1781 if (select_condition->IsCondition()) {
1782 HCondition* condition = select_condition->AsCondition();
1783 if (!condition->IsEmittedAtUseSite()) {
1784 // This was a previously materialized condition.
1785 // Can we use the existing condition code?
1786 if (AreEflagsSetFrom(condition, select)) {
1787 // Materialization was the previous instruction. Condition codes are right.
1788 cond = X86Condition(condition->GetCondition());
1789 } else {
1790 // No, we have to recreate the condition code.
1791 Register cond_reg = locations->InAt(2).AsRegister<Register>();
1792 __ testl(cond_reg, cond_reg);
1793 }
1794 } else {
1795 // We can't handle FP or long here.
1796 DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
1797 DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
1798 LocationSummary* cond_locations = condition->GetLocations();
1799 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
1800 cond = X86Condition(condition->GetCondition());
1801 }
1802 } else {
1803 // Must be a Boolean condition, which needs to be compared to 0.
1804 Register cond_reg = locations->InAt(2).AsRegister<Register>();
1805 __ testl(cond_reg, cond_reg);
1806 }
1807
1808 // If the condition is true, overwrite the output, which already contains false.
1809 Location false_loc = locations->InAt(0);
1810 Location true_loc = locations->InAt(1);
1811 if (select->GetType() == Primitive::kPrimLong) {
1812 // 64 bit conditional move.
1813 Register false_high = false_loc.AsRegisterPairHigh<Register>();
1814 Register false_low = false_loc.AsRegisterPairLow<Register>();
1815 if (true_loc.IsRegisterPair()) {
1816 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
1817 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
1818 } else {
1819 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
1820 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
1821 }
1822 } else {
1823 // 32 bit conditional move.
1824 Register false_reg = false_loc.AsRegister<Register>();
1825 if (true_loc.IsRegister()) {
1826 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
1827 } else {
1828 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
1829 }
1830 }
1831 } else {
1832 NearLabel false_target;
1833 GenerateTestAndBranch<NearLabel>(
1834 select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
1835 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1836 __ Bind(&false_target);
1837 }
1838 }
1839
VisitNativeDebugInfo(HNativeDebugInfo * info)1840 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1841 new (GetGraph()->GetArena()) LocationSummary(info);
1842 }
1843
VisitNativeDebugInfo(HNativeDebugInfo *)1844 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
1845 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1846 }
1847
GenerateNop()1848 void CodeGeneratorX86::GenerateNop() {
1849 __ nop();
1850 }
1851
HandleCondition(HCondition * cond)1852 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
1853 LocationSummary* locations =
1854 new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
1855 // Handle the long/FP comparisons made in instruction simplification.
1856 switch (cond->InputAt(0)->GetType()) {
1857 case Primitive::kPrimLong: {
1858 locations->SetInAt(0, Location::RequiresRegister());
1859 locations->SetInAt(1, Location::Any());
1860 if (!cond->IsEmittedAtUseSite()) {
1861 locations->SetOut(Location::RequiresRegister());
1862 }
1863 break;
1864 }
1865 case Primitive::kPrimFloat:
1866 case Primitive::kPrimDouble: {
1867 locations->SetInAt(0, Location::RequiresFpuRegister());
1868 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
1869 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
1870 } else if (cond->InputAt(1)->IsConstant()) {
1871 locations->SetInAt(1, Location::RequiresFpuRegister());
1872 } else {
1873 locations->SetInAt(1, Location::Any());
1874 }
1875 if (!cond->IsEmittedAtUseSite()) {
1876 locations->SetOut(Location::RequiresRegister());
1877 }
1878 break;
1879 }
1880 default:
1881 locations->SetInAt(0, Location::RequiresRegister());
1882 locations->SetInAt(1, Location::Any());
1883 if (!cond->IsEmittedAtUseSite()) {
1884 // We need a byte register.
1885 locations->SetOut(Location::RegisterLocation(ECX));
1886 }
1887 break;
1888 }
1889 }
1890
HandleCondition(HCondition * cond)1891 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
1892 if (cond->IsEmittedAtUseSite()) {
1893 return;
1894 }
1895
1896 LocationSummary* locations = cond->GetLocations();
1897 Location lhs = locations->InAt(0);
1898 Location rhs = locations->InAt(1);
1899 Register reg = locations->Out().AsRegister<Register>();
1900 NearLabel true_label, false_label;
1901
1902 switch (cond->InputAt(0)->GetType()) {
1903 default: {
1904 // Integer case.
1905
1906 // Clear output register: setb only sets the low byte.
1907 __ xorl(reg, reg);
1908 codegen_->GenerateIntCompare(lhs, rhs);
1909 __ setb(X86Condition(cond->GetCondition()), reg);
1910 return;
1911 }
1912 case Primitive::kPrimLong:
1913 GenerateLongComparesAndJumps(cond, &true_label, &false_label);
1914 break;
1915 case Primitive::kPrimFloat:
1916 GenerateFPCompare(lhs, rhs, cond, false);
1917 GenerateFPJumps(cond, &true_label, &false_label);
1918 break;
1919 case Primitive::kPrimDouble:
1920 GenerateFPCompare(lhs, rhs, cond, true);
1921 GenerateFPJumps(cond, &true_label, &false_label);
1922 break;
1923 }
1924
1925 // Convert the jumps into the result.
1926 NearLabel done_label;
1927
1928 // False case: result = 0.
1929 __ Bind(&false_label);
1930 __ xorl(reg, reg);
1931 __ jmp(&done_label);
1932
1933 // True case: result = 1.
1934 __ Bind(&true_label);
1935 __ movl(reg, Immediate(1));
1936 __ Bind(&done_label);
1937 }
1938
VisitEqual(HEqual * comp)1939 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
1940 HandleCondition(comp);
1941 }
1942
VisitEqual(HEqual * comp)1943 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
1944 HandleCondition(comp);
1945 }
1946
VisitNotEqual(HNotEqual * comp)1947 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
1948 HandleCondition(comp);
1949 }
1950
VisitNotEqual(HNotEqual * comp)1951 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
1952 HandleCondition(comp);
1953 }
1954
VisitLessThan(HLessThan * comp)1955 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
1956 HandleCondition(comp);
1957 }
1958
VisitLessThan(HLessThan * comp)1959 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
1960 HandleCondition(comp);
1961 }
1962
VisitLessThanOrEqual(HLessThanOrEqual * comp)1963 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1964 HandleCondition(comp);
1965 }
1966
VisitLessThanOrEqual(HLessThanOrEqual * comp)1967 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1968 HandleCondition(comp);
1969 }
1970
VisitGreaterThan(HGreaterThan * comp)1971 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
1972 HandleCondition(comp);
1973 }
1974
VisitGreaterThan(HGreaterThan * comp)1975 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
1976 HandleCondition(comp);
1977 }
1978
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1979 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1980 HandleCondition(comp);
1981 }
1982
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1983 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1984 HandleCondition(comp);
1985 }
1986
VisitBelow(HBelow * comp)1987 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
1988 HandleCondition(comp);
1989 }
1990
VisitBelow(HBelow * comp)1991 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
1992 HandleCondition(comp);
1993 }
1994
VisitBelowOrEqual(HBelowOrEqual * comp)1995 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
1996 HandleCondition(comp);
1997 }
1998
VisitBelowOrEqual(HBelowOrEqual * comp)1999 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2000 HandleCondition(comp);
2001 }
2002
VisitAbove(HAbove * comp)2003 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2004 HandleCondition(comp);
2005 }
2006
VisitAbove(HAbove * comp)2007 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2008 HandleCondition(comp);
2009 }
2010
VisitAboveOrEqual(HAboveOrEqual * comp)2011 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2012 HandleCondition(comp);
2013 }
2014
VisitAboveOrEqual(HAboveOrEqual * comp)2015 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2016 HandleCondition(comp);
2017 }
2018
VisitIntConstant(HIntConstant * constant)2019 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2020 LocationSummary* locations =
2021 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2022 locations->SetOut(Location::ConstantLocation(constant));
2023 }
2024
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2025 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2026 // Will be generated at use site.
2027 }
2028
VisitNullConstant(HNullConstant * constant)2029 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2030 LocationSummary* locations =
2031 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2032 locations->SetOut(Location::ConstantLocation(constant));
2033 }
2034
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2035 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2036 // Will be generated at use site.
2037 }
2038
VisitLongConstant(HLongConstant * constant)2039 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2040 LocationSummary* locations =
2041 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2042 locations->SetOut(Location::ConstantLocation(constant));
2043 }
2044
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2045 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2046 // Will be generated at use site.
2047 }
2048
VisitFloatConstant(HFloatConstant * constant)2049 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2050 LocationSummary* locations =
2051 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2052 locations->SetOut(Location::ConstantLocation(constant));
2053 }
2054
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2055 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2056 // Will be generated at use site.
2057 }
2058
VisitDoubleConstant(HDoubleConstant * constant)2059 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2060 LocationSummary* locations =
2061 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2062 locations->SetOut(Location::ConstantLocation(constant));
2063 }
2064
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2065 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2066 // Will be generated at use site.
2067 }
2068
VisitConstructorFence(HConstructorFence * constructor_fence)2069 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2070 constructor_fence->SetLocations(nullptr);
2071 }
2072
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2073 void InstructionCodeGeneratorX86::VisitConstructorFence(
2074 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2075 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2076 }
2077
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2078 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2079 memory_barrier->SetLocations(nullptr);
2080 }
2081
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2082 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2083 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2084 }
2085
VisitReturnVoid(HReturnVoid * ret)2086 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2087 ret->SetLocations(nullptr);
2088 }
2089
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2090 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2091 codegen_->GenerateFrameExit();
2092 }
2093
VisitReturn(HReturn * ret)2094 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2095 LocationSummary* locations =
2096 new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
2097 switch (ret->InputAt(0)->GetType()) {
2098 case Primitive::kPrimBoolean:
2099 case Primitive::kPrimByte:
2100 case Primitive::kPrimChar:
2101 case Primitive::kPrimShort:
2102 case Primitive::kPrimInt:
2103 case Primitive::kPrimNot:
2104 locations->SetInAt(0, Location::RegisterLocation(EAX));
2105 break;
2106
2107 case Primitive::kPrimLong:
2108 locations->SetInAt(
2109 0, Location::RegisterPairLocation(EAX, EDX));
2110 break;
2111
2112 case Primitive::kPrimFloat:
2113 case Primitive::kPrimDouble:
2114 locations->SetInAt(
2115 0, Location::FpuRegisterLocation(XMM0));
2116 break;
2117
2118 default:
2119 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2120 }
2121 }
2122
VisitReturn(HReturn * ret)2123 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2124 if (kIsDebugBuild) {
2125 switch (ret->InputAt(0)->GetType()) {
2126 case Primitive::kPrimBoolean:
2127 case Primitive::kPrimByte:
2128 case Primitive::kPrimChar:
2129 case Primitive::kPrimShort:
2130 case Primitive::kPrimInt:
2131 case Primitive::kPrimNot:
2132 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2133 break;
2134
2135 case Primitive::kPrimLong:
2136 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2137 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2138 break;
2139
2140 case Primitive::kPrimFloat:
2141 case Primitive::kPrimDouble:
2142 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2143 break;
2144
2145 default:
2146 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2147 }
2148 }
2149 codegen_->GenerateFrameExit();
2150 }
2151
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2152 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2153 // The trampoline uses the same calling convention as dex calling conventions,
2154 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2155 // the method_idx.
2156 HandleInvoke(invoke);
2157 }
2158
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2159 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2160 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2161 }
2162
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2163 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2164 // Explicit clinit checks triggered by static invokes must have been pruned by
2165 // art::PrepareForRegisterAllocation.
2166 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2167
2168 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2169 if (intrinsic.TryDispatch(invoke)) {
2170 if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeMethodLoadKind()) {
2171 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2172 }
2173 return;
2174 }
2175
2176 HandleInvoke(invoke);
2177
2178 // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
2179 if (invoke->HasPcRelativeMethodLoadKind()) {
2180 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2181 }
2182 }
2183
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2184 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2185 if (invoke->GetLocations()->Intrinsified()) {
2186 IntrinsicCodeGeneratorX86 intrinsic(codegen);
2187 intrinsic.Dispatch(invoke);
2188 return true;
2189 }
2190 return false;
2191 }
2192
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2193 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2194 // Explicit clinit checks triggered by static invokes must have been pruned by
2195 // art::PrepareForRegisterAllocation.
2196 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2197
2198 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2199 return;
2200 }
2201
2202 LocationSummary* locations = invoke->GetLocations();
2203 codegen_->GenerateStaticOrDirectCall(
2204 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2205 }
2206
VisitInvokeVirtual(HInvokeVirtual * invoke)2207 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2208 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2209 if (intrinsic.TryDispatch(invoke)) {
2210 return;
2211 }
2212
2213 HandleInvoke(invoke);
2214 }
2215
HandleInvoke(HInvoke * invoke)2216 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2217 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2218 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2219 }
2220
VisitInvokeVirtual(HInvokeVirtual * invoke)2221 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2222 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2223 return;
2224 }
2225
2226 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2227 DCHECK(!codegen_->IsLeafMethod());
2228 }
2229
VisitInvokeInterface(HInvokeInterface * invoke)2230 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2231 // This call to HandleInvoke allocates a temporary (core) register
2232 // which is also used to transfer the hidden argument from FP to
2233 // core register.
2234 HandleInvoke(invoke);
2235 // Add the hidden argument.
2236 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2237 }
2238
VisitInvokeInterface(HInvokeInterface * invoke)2239 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2240 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2241 LocationSummary* locations = invoke->GetLocations();
2242 Register temp = locations->GetTemp(0).AsRegister<Register>();
2243 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2244 Location receiver = locations->InAt(0);
2245 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2246
2247 // Set the hidden argument. This is safe to do this here, as XMM7
2248 // won't be modified thereafter, before the `call` instruction.
2249 DCHECK_EQ(XMM7, hidden_reg);
2250 __ movl(temp, Immediate(invoke->GetDexMethodIndex()));
2251 __ movd(hidden_reg, temp);
2252
2253 if (receiver.IsStackSlot()) {
2254 __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2255 // /* HeapReference<Class> */ temp = temp->klass_
2256 __ movl(temp, Address(temp, class_offset));
2257 } else {
2258 // /* HeapReference<Class> */ temp = receiver->klass_
2259 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2260 }
2261 codegen_->MaybeRecordImplicitNullCheck(invoke);
2262 // Instead of simply (possibly) unpoisoning `temp` here, we should
2263 // emit a read barrier for the previous class reference load.
2264 // However this is not required in practice, as this is an
2265 // intermediate/temporary reference and because the current
2266 // concurrent copying collector keeps the from-space memory
2267 // intact/accessible until the end of the marking phase (the
2268 // concurrent copying collector may not in the future).
2269 __ MaybeUnpoisonHeapReference(temp);
2270 // temp = temp->GetAddressOfIMT()
2271 __ movl(temp,
2272 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2273 // temp = temp->GetImtEntryAt(method_offset);
2274 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2275 invoke->GetImtIndex(), kX86PointerSize));
2276 __ movl(temp, Address(temp, method_offset));
2277 // call temp->GetEntryPoint();
2278 __ call(Address(temp,
2279 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2280
2281 DCHECK(!codegen_->IsLeafMethod());
2282 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2283 }
2284
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2285 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2286 HandleInvoke(invoke);
2287 }
2288
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2289 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2290 codegen_->GenerateInvokePolymorphicCall(invoke);
2291 }
2292
VisitNeg(HNeg * neg)2293 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2294 LocationSummary* locations =
2295 new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
2296 switch (neg->GetResultType()) {
2297 case Primitive::kPrimInt:
2298 case Primitive::kPrimLong:
2299 locations->SetInAt(0, Location::RequiresRegister());
2300 locations->SetOut(Location::SameAsFirstInput());
2301 break;
2302
2303 case Primitive::kPrimFloat:
2304 locations->SetInAt(0, Location::RequiresFpuRegister());
2305 locations->SetOut(Location::SameAsFirstInput());
2306 locations->AddTemp(Location::RequiresRegister());
2307 locations->AddTemp(Location::RequiresFpuRegister());
2308 break;
2309
2310 case Primitive::kPrimDouble:
2311 locations->SetInAt(0, Location::RequiresFpuRegister());
2312 locations->SetOut(Location::SameAsFirstInput());
2313 locations->AddTemp(Location::RequiresFpuRegister());
2314 break;
2315
2316 default:
2317 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2318 }
2319 }
2320
VisitNeg(HNeg * neg)2321 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2322 LocationSummary* locations = neg->GetLocations();
2323 Location out = locations->Out();
2324 Location in = locations->InAt(0);
2325 switch (neg->GetResultType()) {
2326 case Primitive::kPrimInt:
2327 DCHECK(in.IsRegister());
2328 DCHECK(in.Equals(out));
2329 __ negl(out.AsRegister<Register>());
2330 break;
2331
2332 case Primitive::kPrimLong:
2333 DCHECK(in.IsRegisterPair());
2334 DCHECK(in.Equals(out));
2335 __ negl(out.AsRegisterPairLow<Register>());
2336 // Negation is similar to subtraction from zero. The least
2337 // significant byte triggers a borrow when it is different from
2338 // zero; to take it into account, add 1 to the most significant
2339 // byte if the carry flag (CF) is set to 1 after the first NEGL
2340 // operation.
2341 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2342 __ negl(out.AsRegisterPairHigh<Register>());
2343 break;
2344
2345 case Primitive::kPrimFloat: {
2346 DCHECK(in.Equals(out));
2347 Register constant = locations->GetTemp(0).AsRegister<Register>();
2348 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2349 // Implement float negation with an exclusive or with value
2350 // 0x80000000 (mask for bit 31, representing the sign of a
2351 // single-precision floating-point number).
2352 __ movl(constant, Immediate(INT32_C(0x80000000)));
2353 __ movd(mask, constant);
2354 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2355 break;
2356 }
2357
2358 case Primitive::kPrimDouble: {
2359 DCHECK(in.Equals(out));
2360 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2361 // Implement double negation with an exclusive or with value
2362 // 0x8000000000000000 (mask for bit 63, representing the sign of
2363 // a double-precision floating-point number).
2364 __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2365 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2366 break;
2367 }
2368
2369 default:
2370 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2371 }
2372 }
2373
VisitX86FPNeg(HX86FPNeg * neg)2374 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2375 LocationSummary* locations =
2376 new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
2377 DCHECK(Primitive::IsFloatingPointType(neg->GetType()));
2378 locations->SetInAt(0, Location::RequiresFpuRegister());
2379 locations->SetInAt(1, Location::RequiresRegister());
2380 locations->SetOut(Location::SameAsFirstInput());
2381 locations->AddTemp(Location::RequiresFpuRegister());
2382 }
2383
VisitX86FPNeg(HX86FPNeg * neg)2384 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2385 LocationSummary* locations = neg->GetLocations();
2386 Location out = locations->Out();
2387 DCHECK(locations->InAt(0).Equals(out));
2388
2389 Register constant_area = locations->InAt(1).AsRegister<Register>();
2390 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2391 if (neg->GetType() == Primitive::kPrimFloat) {
2392 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2393 neg->GetBaseMethodAddress(),
2394 constant_area));
2395 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2396 } else {
2397 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2398 neg->GetBaseMethodAddress(),
2399 constant_area));
2400 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2401 }
2402 }
2403
VisitTypeConversion(HTypeConversion * conversion)2404 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2405 Primitive::Type result_type = conversion->GetResultType();
2406 Primitive::Type input_type = conversion->GetInputType();
2407 DCHECK_NE(result_type, input_type);
2408
2409 // The float-to-long and double-to-long type conversions rely on a
2410 // call to the runtime.
2411 LocationSummary::CallKind call_kind =
2412 ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
2413 && result_type == Primitive::kPrimLong)
2414 ? LocationSummary::kCallOnMainOnly
2415 : LocationSummary::kNoCall;
2416 LocationSummary* locations =
2417 new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
2418
2419 // The Java language does not allow treating boolean as an integral type but
2420 // our bit representation makes it safe.
2421
2422 switch (result_type) {
2423 case Primitive::kPrimByte:
2424 switch (input_type) {
2425 case Primitive::kPrimLong: {
2426 // Type conversion from long to byte is a result of code transformations.
2427 HInstruction* input = conversion->InputAt(0);
2428 Location input_location = input->IsConstant()
2429 ? Location::ConstantLocation(input->AsConstant())
2430 : Location::RegisterPairLocation(EAX, EDX);
2431 locations->SetInAt(0, input_location);
2432 // Make the output overlap to please the register allocator. This greatly simplifies
2433 // the validation of the linear scan implementation
2434 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2435 break;
2436 }
2437 case Primitive::kPrimBoolean:
2438 // Boolean input is a result of code transformations.
2439 case Primitive::kPrimShort:
2440 case Primitive::kPrimInt:
2441 case Primitive::kPrimChar:
2442 // Processing a Dex `int-to-byte' instruction.
2443 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2444 // Make the output overlap to please the register allocator. This greatly simplifies
2445 // the validation of the linear scan implementation
2446 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2447 break;
2448
2449 default:
2450 LOG(FATAL) << "Unexpected type conversion from " << input_type
2451 << " to " << result_type;
2452 }
2453 break;
2454
2455 case Primitive::kPrimShort:
2456 switch (input_type) {
2457 case Primitive::kPrimLong:
2458 // Type conversion from long to short is a result of code transformations.
2459 case Primitive::kPrimBoolean:
2460 // Boolean input is a result of code transformations.
2461 case Primitive::kPrimByte:
2462 case Primitive::kPrimInt:
2463 case Primitive::kPrimChar:
2464 // Processing a Dex `int-to-short' instruction.
2465 locations->SetInAt(0, Location::Any());
2466 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2467 break;
2468
2469 default:
2470 LOG(FATAL) << "Unexpected type conversion from " << input_type
2471 << " to " << result_type;
2472 }
2473 break;
2474
2475 case Primitive::kPrimInt:
2476 switch (input_type) {
2477 case Primitive::kPrimLong:
2478 // Processing a Dex `long-to-int' instruction.
2479 locations->SetInAt(0, Location::Any());
2480 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2481 break;
2482
2483 case Primitive::kPrimFloat:
2484 // Processing a Dex `float-to-int' instruction.
2485 locations->SetInAt(0, Location::RequiresFpuRegister());
2486 locations->SetOut(Location::RequiresRegister());
2487 locations->AddTemp(Location::RequiresFpuRegister());
2488 break;
2489
2490 case Primitive::kPrimDouble:
2491 // Processing a Dex `double-to-int' instruction.
2492 locations->SetInAt(0, Location::RequiresFpuRegister());
2493 locations->SetOut(Location::RequiresRegister());
2494 locations->AddTemp(Location::RequiresFpuRegister());
2495 break;
2496
2497 default:
2498 LOG(FATAL) << "Unexpected type conversion from " << input_type
2499 << " to " << result_type;
2500 }
2501 break;
2502
2503 case Primitive::kPrimLong:
2504 switch (input_type) {
2505 case Primitive::kPrimBoolean:
2506 // Boolean input is a result of code transformations.
2507 case Primitive::kPrimByte:
2508 case Primitive::kPrimShort:
2509 case Primitive::kPrimInt:
2510 case Primitive::kPrimChar:
2511 // Processing a Dex `int-to-long' instruction.
2512 locations->SetInAt(0, Location::RegisterLocation(EAX));
2513 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2514 break;
2515
2516 case Primitive::kPrimFloat:
2517 case Primitive::kPrimDouble: {
2518 // Processing a Dex `float-to-long' or 'double-to-long' instruction.
2519 InvokeRuntimeCallingConvention calling_convention;
2520 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
2521 locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
2522
2523 // The runtime helper puts the result in EAX, EDX.
2524 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2525 }
2526 break;
2527
2528 default:
2529 LOG(FATAL) << "Unexpected type conversion from " << input_type
2530 << " to " << result_type;
2531 }
2532 break;
2533
2534 case Primitive::kPrimChar:
2535 switch (input_type) {
2536 case Primitive::kPrimLong:
2537 // Type conversion from long to char is a result of code transformations.
2538 case Primitive::kPrimBoolean:
2539 // Boolean input is a result of code transformations.
2540 case Primitive::kPrimByte:
2541 case Primitive::kPrimShort:
2542 case Primitive::kPrimInt:
2543 // Processing a Dex `int-to-char' instruction.
2544 locations->SetInAt(0, Location::Any());
2545 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2546 break;
2547
2548 default:
2549 LOG(FATAL) << "Unexpected type conversion from " << input_type
2550 << " to " << result_type;
2551 }
2552 break;
2553
2554 case Primitive::kPrimFloat:
2555 switch (input_type) {
2556 case Primitive::kPrimBoolean:
2557 // Boolean input is a result of code transformations.
2558 case Primitive::kPrimByte:
2559 case Primitive::kPrimShort:
2560 case Primitive::kPrimInt:
2561 case Primitive::kPrimChar:
2562 // Processing a Dex `int-to-float' instruction.
2563 locations->SetInAt(0, Location::RequiresRegister());
2564 locations->SetOut(Location::RequiresFpuRegister());
2565 break;
2566
2567 case Primitive::kPrimLong:
2568 // Processing a Dex `long-to-float' instruction.
2569 locations->SetInAt(0, Location::Any());
2570 locations->SetOut(Location::Any());
2571 break;
2572
2573 case Primitive::kPrimDouble:
2574 // Processing a Dex `double-to-float' instruction.
2575 locations->SetInAt(0, Location::RequiresFpuRegister());
2576 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2577 break;
2578
2579 default:
2580 LOG(FATAL) << "Unexpected type conversion from " << input_type
2581 << " to " << result_type;
2582 };
2583 break;
2584
2585 case Primitive::kPrimDouble:
2586 switch (input_type) {
2587 case Primitive::kPrimBoolean:
2588 // Boolean input is a result of code transformations.
2589 case Primitive::kPrimByte:
2590 case Primitive::kPrimShort:
2591 case Primitive::kPrimInt:
2592 case Primitive::kPrimChar:
2593 // Processing a Dex `int-to-double' instruction.
2594 locations->SetInAt(0, Location::RequiresRegister());
2595 locations->SetOut(Location::RequiresFpuRegister());
2596 break;
2597
2598 case Primitive::kPrimLong:
2599 // Processing a Dex `long-to-double' instruction.
2600 locations->SetInAt(0, Location::Any());
2601 locations->SetOut(Location::Any());
2602 break;
2603
2604 case Primitive::kPrimFloat:
2605 // Processing a Dex `float-to-double' instruction.
2606 locations->SetInAt(0, Location::RequiresFpuRegister());
2607 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2608 break;
2609
2610 default:
2611 LOG(FATAL) << "Unexpected type conversion from " << input_type
2612 << " to " << result_type;
2613 }
2614 break;
2615
2616 default:
2617 LOG(FATAL) << "Unexpected type conversion from " << input_type
2618 << " to " << result_type;
2619 }
2620 }
2621
VisitTypeConversion(HTypeConversion * conversion)2622 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
2623 LocationSummary* locations = conversion->GetLocations();
2624 Location out = locations->Out();
2625 Location in = locations->InAt(0);
2626 Primitive::Type result_type = conversion->GetResultType();
2627 Primitive::Type input_type = conversion->GetInputType();
2628 DCHECK_NE(result_type, input_type);
2629 switch (result_type) {
2630 case Primitive::kPrimByte:
2631 switch (input_type) {
2632 case Primitive::kPrimLong:
2633 // Type conversion from long to byte is a result of code transformations.
2634 if (in.IsRegisterPair()) {
2635 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2636 } else {
2637 DCHECK(in.GetConstant()->IsLongConstant());
2638 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2639 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2640 }
2641 break;
2642 case Primitive::kPrimBoolean:
2643 // Boolean input is a result of code transformations.
2644 case Primitive::kPrimShort:
2645 case Primitive::kPrimInt:
2646 case Primitive::kPrimChar:
2647 // Processing a Dex `int-to-byte' instruction.
2648 if (in.IsRegister()) {
2649 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2650 } else {
2651 DCHECK(in.GetConstant()->IsIntConstant());
2652 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2653 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2654 }
2655 break;
2656
2657 default:
2658 LOG(FATAL) << "Unexpected type conversion from " << input_type
2659 << " to " << result_type;
2660 }
2661 break;
2662
2663 case Primitive::kPrimShort:
2664 switch (input_type) {
2665 case Primitive::kPrimLong:
2666 // Type conversion from long to short is a result of code transformations.
2667 if (in.IsRegisterPair()) {
2668 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2669 } else if (in.IsDoubleStackSlot()) {
2670 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2671 } else {
2672 DCHECK(in.GetConstant()->IsLongConstant());
2673 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2674 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2675 }
2676 break;
2677 case Primitive::kPrimBoolean:
2678 // Boolean input is a result of code transformations.
2679 case Primitive::kPrimByte:
2680 case Primitive::kPrimInt:
2681 case Primitive::kPrimChar:
2682 // Processing a Dex `int-to-short' instruction.
2683 if (in.IsRegister()) {
2684 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2685 } else if (in.IsStackSlot()) {
2686 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2687 } else {
2688 DCHECK(in.GetConstant()->IsIntConstant());
2689 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2690 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2691 }
2692 break;
2693
2694 default:
2695 LOG(FATAL) << "Unexpected type conversion from " << input_type
2696 << " to " << result_type;
2697 }
2698 break;
2699
2700 case Primitive::kPrimInt:
2701 switch (input_type) {
2702 case Primitive::kPrimLong:
2703 // Processing a Dex `long-to-int' instruction.
2704 if (in.IsRegisterPair()) {
2705 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2706 } else if (in.IsDoubleStackSlot()) {
2707 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2708 } else {
2709 DCHECK(in.IsConstant());
2710 DCHECK(in.GetConstant()->IsLongConstant());
2711 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2712 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
2713 }
2714 break;
2715
2716 case Primitive::kPrimFloat: {
2717 // Processing a Dex `float-to-int' instruction.
2718 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2719 Register output = out.AsRegister<Register>();
2720 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2721 NearLabel done, nan;
2722
2723 __ movl(output, Immediate(kPrimIntMax));
2724 // temp = int-to-float(output)
2725 __ cvtsi2ss(temp, output);
2726 // if input >= temp goto done
2727 __ comiss(input, temp);
2728 __ j(kAboveEqual, &done);
2729 // if input == NaN goto nan
2730 __ j(kUnordered, &nan);
2731 // output = float-to-int-truncate(input)
2732 __ cvttss2si(output, input);
2733 __ jmp(&done);
2734 __ Bind(&nan);
2735 // output = 0
2736 __ xorl(output, output);
2737 __ Bind(&done);
2738 break;
2739 }
2740
2741 case Primitive::kPrimDouble: {
2742 // Processing a Dex `double-to-int' instruction.
2743 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2744 Register output = out.AsRegister<Register>();
2745 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2746 NearLabel done, nan;
2747
2748 __ movl(output, Immediate(kPrimIntMax));
2749 // temp = int-to-double(output)
2750 __ cvtsi2sd(temp, output);
2751 // if input >= temp goto done
2752 __ comisd(input, temp);
2753 __ j(kAboveEqual, &done);
2754 // if input == NaN goto nan
2755 __ j(kUnordered, &nan);
2756 // output = double-to-int-truncate(input)
2757 __ cvttsd2si(output, input);
2758 __ jmp(&done);
2759 __ Bind(&nan);
2760 // output = 0
2761 __ xorl(output, output);
2762 __ Bind(&done);
2763 break;
2764 }
2765
2766 default:
2767 LOG(FATAL) << "Unexpected type conversion from " << input_type
2768 << " to " << result_type;
2769 }
2770 break;
2771
2772 case Primitive::kPrimLong:
2773 switch (input_type) {
2774 case Primitive::kPrimBoolean:
2775 // Boolean input is a result of code transformations.
2776 case Primitive::kPrimByte:
2777 case Primitive::kPrimShort:
2778 case Primitive::kPrimInt:
2779 case Primitive::kPrimChar:
2780 // Processing a Dex `int-to-long' instruction.
2781 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
2782 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
2783 DCHECK_EQ(in.AsRegister<Register>(), EAX);
2784 __ cdq();
2785 break;
2786
2787 case Primitive::kPrimFloat:
2788 // Processing a Dex `float-to-long' instruction.
2789 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
2790 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
2791 break;
2792
2793 case Primitive::kPrimDouble:
2794 // Processing a Dex `double-to-long' instruction.
2795 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
2796 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
2797 break;
2798
2799 default:
2800 LOG(FATAL) << "Unexpected type conversion from " << input_type
2801 << " to " << result_type;
2802 }
2803 break;
2804
2805 case Primitive::kPrimChar:
2806 switch (input_type) {
2807 case Primitive::kPrimLong:
2808 // Type conversion from long to short is a result of code transformations.
2809 if (in.IsRegisterPair()) {
2810 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2811 } else if (in.IsDoubleStackSlot()) {
2812 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2813 } else {
2814 DCHECK(in.GetConstant()->IsLongConstant());
2815 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2816 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2817 }
2818 break;
2819 case Primitive::kPrimBoolean:
2820 // Boolean input is a result of code transformations.
2821 case Primitive::kPrimByte:
2822 case Primitive::kPrimShort:
2823 case Primitive::kPrimInt:
2824 // Processing a Dex `Process a Dex `int-to-char'' instruction.
2825 if (in.IsRegister()) {
2826 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2827 } else if (in.IsStackSlot()) {
2828 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2829 } else {
2830 DCHECK(in.GetConstant()->IsIntConstant());
2831 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2832 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2833 }
2834 break;
2835
2836 default:
2837 LOG(FATAL) << "Unexpected type conversion from " << input_type
2838 << " to " << result_type;
2839 }
2840 break;
2841
2842 case Primitive::kPrimFloat:
2843 switch (input_type) {
2844 case Primitive::kPrimBoolean:
2845 // Boolean input is a result of code transformations.
2846 case Primitive::kPrimByte:
2847 case Primitive::kPrimShort:
2848 case Primitive::kPrimInt:
2849 case Primitive::kPrimChar:
2850 // Processing a Dex `int-to-float' instruction.
2851 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
2852 break;
2853
2854 case Primitive::kPrimLong: {
2855 // Processing a Dex `long-to-float' instruction.
2856 size_t adjustment = 0;
2857
2858 // Create stack space for the call to
2859 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
2860 // TODO: enhance register allocator to ask for stack temporaries.
2861 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
2862 adjustment = Primitive::ComponentSize(Primitive::kPrimLong);
2863 __ subl(ESP, Immediate(adjustment));
2864 }
2865
2866 // Load the value to the FP stack, using temporaries if needed.
2867 PushOntoFPStack(in, 0, adjustment, false, true);
2868
2869 if (out.IsStackSlot()) {
2870 __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
2871 } else {
2872 __ fstps(Address(ESP, 0));
2873 Location stack_temp = Location::StackSlot(0);
2874 codegen_->Move32(out, stack_temp);
2875 }
2876
2877 // Remove the temporary stack space we allocated.
2878 if (adjustment != 0) {
2879 __ addl(ESP, Immediate(adjustment));
2880 }
2881 break;
2882 }
2883
2884 case Primitive::kPrimDouble:
2885 // Processing a Dex `double-to-float' instruction.
2886 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2887 break;
2888
2889 default:
2890 LOG(FATAL) << "Unexpected type conversion from " << input_type
2891 << " to " << result_type;
2892 };
2893 break;
2894
2895 case Primitive::kPrimDouble:
2896 switch (input_type) {
2897 case Primitive::kPrimBoolean:
2898 // Boolean input is a result of code transformations.
2899 case Primitive::kPrimByte:
2900 case Primitive::kPrimShort:
2901 case Primitive::kPrimInt:
2902 case Primitive::kPrimChar:
2903 // Processing a Dex `int-to-double' instruction.
2904 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
2905 break;
2906
2907 case Primitive::kPrimLong: {
2908 // Processing a Dex `long-to-double' instruction.
2909 size_t adjustment = 0;
2910
2911 // Create stack space for the call to
2912 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
2913 // TODO: enhance register allocator to ask for stack temporaries.
2914 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
2915 adjustment = Primitive::ComponentSize(Primitive::kPrimLong);
2916 __ subl(ESP, Immediate(adjustment));
2917 }
2918
2919 // Load the value to the FP stack, using temporaries if needed.
2920 PushOntoFPStack(in, 0, adjustment, false, true);
2921
2922 if (out.IsDoubleStackSlot()) {
2923 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
2924 } else {
2925 __ fstpl(Address(ESP, 0));
2926 Location stack_temp = Location::DoubleStackSlot(0);
2927 codegen_->Move64(out, stack_temp);
2928 }
2929
2930 // Remove the temporary stack space we allocated.
2931 if (adjustment != 0) {
2932 __ addl(ESP, Immediate(adjustment));
2933 }
2934 break;
2935 }
2936
2937 case Primitive::kPrimFloat:
2938 // Processing a Dex `float-to-double' instruction.
2939 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2940 break;
2941
2942 default:
2943 LOG(FATAL) << "Unexpected type conversion from " << input_type
2944 << " to " << result_type;
2945 };
2946 break;
2947
2948 default:
2949 LOG(FATAL) << "Unexpected type conversion from " << input_type
2950 << " to " << result_type;
2951 }
2952 }
2953
VisitAdd(HAdd * add)2954 void LocationsBuilderX86::VisitAdd(HAdd* add) {
2955 LocationSummary* locations =
2956 new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
2957 switch (add->GetResultType()) {
2958 case Primitive::kPrimInt: {
2959 locations->SetInAt(0, Location::RequiresRegister());
2960 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
2961 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2962 break;
2963 }
2964
2965 case Primitive::kPrimLong: {
2966 locations->SetInAt(0, Location::RequiresRegister());
2967 locations->SetInAt(1, Location::Any());
2968 locations->SetOut(Location::SameAsFirstInput());
2969 break;
2970 }
2971
2972 case Primitive::kPrimFloat:
2973 case Primitive::kPrimDouble: {
2974 locations->SetInAt(0, Location::RequiresFpuRegister());
2975 if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
2976 DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
2977 } else if (add->InputAt(1)->IsConstant()) {
2978 locations->SetInAt(1, Location::RequiresFpuRegister());
2979 } else {
2980 locations->SetInAt(1, Location::Any());
2981 }
2982 locations->SetOut(Location::SameAsFirstInput());
2983 break;
2984 }
2985
2986 default:
2987 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2988 break;
2989 }
2990 }
2991
VisitAdd(HAdd * add)2992 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
2993 LocationSummary* locations = add->GetLocations();
2994 Location first = locations->InAt(0);
2995 Location second = locations->InAt(1);
2996 Location out = locations->Out();
2997
2998 switch (add->GetResultType()) {
2999 case Primitive::kPrimInt: {
3000 if (second.IsRegister()) {
3001 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3002 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3003 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3004 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3005 } else {
3006 __ leal(out.AsRegister<Register>(), Address(
3007 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3008 }
3009 } else if (second.IsConstant()) {
3010 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3011 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3012 __ addl(out.AsRegister<Register>(), Immediate(value));
3013 } else {
3014 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3015 }
3016 } else {
3017 DCHECK(first.Equals(locations->Out()));
3018 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3019 }
3020 break;
3021 }
3022
3023 case Primitive::kPrimLong: {
3024 if (second.IsRegisterPair()) {
3025 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3026 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3027 } else if (second.IsDoubleStackSlot()) {
3028 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3029 __ adcl(first.AsRegisterPairHigh<Register>(),
3030 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3031 } else {
3032 DCHECK(second.IsConstant()) << second;
3033 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3034 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3035 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3036 }
3037 break;
3038 }
3039
3040 case Primitive::kPrimFloat: {
3041 if (second.IsFpuRegister()) {
3042 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3043 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3044 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3045 DCHECK(const_area->IsEmittedAtUseSite());
3046 __ addss(first.AsFpuRegister<XmmRegister>(),
3047 codegen_->LiteralFloatAddress(
3048 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3049 const_area->GetBaseMethodAddress(),
3050 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3051 } else {
3052 DCHECK(second.IsStackSlot());
3053 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3054 }
3055 break;
3056 }
3057
3058 case Primitive::kPrimDouble: {
3059 if (second.IsFpuRegister()) {
3060 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3061 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3062 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3063 DCHECK(const_area->IsEmittedAtUseSite());
3064 __ addsd(first.AsFpuRegister<XmmRegister>(),
3065 codegen_->LiteralDoubleAddress(
3066 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3067 const_area->GetBaseMethodAddress(),
3068 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3069 } else {
3070 DCHECK(second.IsDoubleStackSlot());
3071 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3072 }
3073 break;
3074 }
3075
3076 default:
3077 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3078 }
3079 }
3080
VisitSub(HSub * sub)3081 void LocationsBuilderX86::VisitSub(HSub* sub) {
3082 LocationSummary* locations =
3083 new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
3084 switch (sub->GetResultType()) {
3085 case Primitive::kPrimInt:
3086 case Primitive::kPrimLong: {
3087 locations->SetInAt(0, Location::RequiresRegister());
3088 locations->SetInAt(1, Location::Any());
3089 locations->SetOut(Location::SameAsFirstInput());
3090 break;
3091 }
3092 case Primitive::kPrimFloat:
3093 case Primitive::kPrimDouble: {
3094 locations->SetInAt(0, Location::RequiresFpuRegister());
3095 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3096 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3097 } else if (sub->InputAt(1)->IsConstant()) {
3098 locations->SetInAt(1, Location::RequiresFpuRegister());
3099 } else {
3100 locations->SetInAt(1, Location::Any());
3101 }
3102 locations->SetOut(Location::SameAsFirstInput());
3103 break;
3104 }
3105
3106 default:
3107 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3108 }
3109 }
3110
VisitSub(HSub * sub)3111 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3112 LocationSummary* locations = sub->GetLocations();
3113 Location first = locations->InAt(0);
3114 Location second = locations->InAt(1);
3115 DCHECK(first.Equals(locations->Out()));
3116 switch (sub->GetResultType()) {
3117 case Primitive::kPrimInt: {
3118 if (second.IsRegister()) {
3119 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3120 } else if (second.IsConstant()) {
3121 __ subl(first.AsRegister<Register>(),
3122 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3123 } else {
3124 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3125 }
3126 break;
3127 }
3128
3129 case Primitive::kPrimLong: {
3130 if (second.IsRegisterPair()) {
3131 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3132 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3133 } else if (second.IsDoubleStackSlot()) {
3134 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3135 __ sbbl(first.AsRegisterPairHigh<Register>(),
3136 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3137 } else {
3138 DCHECK(second.IsConstant()) << second;
3139 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3140 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3141 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3142 }
3143 break;
3144 }
3145
3146 case Primitive::kPrimFloat: {
3147 if (second.IsFpuRegister()) {
3148 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3149 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3150 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3151 DCHECK(const_area->IsEmittedAtUseSite());
3152 __ subss(first.AsFpuRegister<XmmRegister>(),
3153 codegen_->LiteralFloatAddress(
3154 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3155 const_area->GetBaseMethodAddress(),
3156 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3157 } else {
3158 DCHECK(second.IsStackSlot());
3159 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3160 }
3161 break;
3162 }
3163
3164 case Primitive::kPrimDouble: {
3165 if (second.IsFpuRegister()) {
3166 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3167 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3168 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3169 DCHECK(const_area->IsEmittedAtUseSite());
3170 __ subsd(first.AsFpuRegister<XmmRegister>(),
3171 codegen_->LiteralDoubleAddress(
3172 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3173 const_area->GetBaseMethodAddress(),
3174 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3175 } else {
3176 DCHECK(second.IsDoubleStackSlot());
3177 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3178 }
3179 break;
3180 }
3181
3182 default:
3183 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3184 }
3185 }
3186
VisitMul(HMul * mul)3187 void LocationsBuilderX86::VisitMul(HMul* mul) {
3188 LocationSummary* locations =
3189 new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
3190 switch (mul->GetResultType()) {
3191 case Primitive::kPrimInt:
3192 locations->SetInAt(0, Location::RequiresRegister());
3193 locations->SetInAt(1, Location::Any());
3194 if (mul->InputAt(1)->IsIntConstant()) {
3195 // Can use 3 operand multiply.
3196 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3197 } else {
3198 locations->SetOut(Location::SameAsFirstInput());
3199 }
3200 break;
3201 case Primitive::kPrimLong: {
3202 locations->SetInAt(0, Location::RequiresRegister());
3203 locations->SetInAt(1, Location::Any());
3204 locations->SetOut(Location::SameAsFirstInput());
3205 // Needed for imul on 32bits with 64bits output.
3206 locations->AddTemp(Location::RegisterLocation(EAX));
3207 locations->AddTemp(Location::RegisterLocation(EDX));
3208 break;
3209 }
3210 case Primitive::kPrimFloat:
3211 case Primitive::kPrimDouble: {
3212 locations->SetInAt(0, Location::RequiresFpuRegister());
3213 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3214 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3215 } else if (mul->InputAt(1)->IsConstant()) {
3216 locations->SetInAt(1, Location::RequiresFpuRegister());
3217 } else {
3218 locations->SetInAt(1, Location::Any());
3219 }
3220 locations->SetOut(Location::SameAsFirstInput());
3221 break;
3222 }
3223
3224 default:
3225 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3226 }
3227 }
3228
VisitMul(HMul * mul)3229 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3230 LocationSummary* locations = mul->GetLocations();
3231 Location first = locations->InAt(0);
3232 Location second = locations->InAt(1);
3233 Location out = locations->Out();
3234
3235 switch (mul->GetResultType()) {
3236 case Primitive::kPrimInt:
3237 // The constant may have ended up in a register, so test explicitly to avoid
3238 // problems where the output may not be the same as the first operand.
3239 if (mul->InputAt(1)->IsIntConstant()) {
3240 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3241 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3242 } else if (second.IsRegister()) {
3243 DCHECK(first.Equals(out));
3244 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3245 } else {
3246 DCHECK(second.IsStackSlot());
3247 DCHECK(first.Equals(out));
3248 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3249 }
3250 break;
3251
3252 case Primitive::kPrimLong: {
3253 Register in1_hi = first.AsRegisterPairHigh<Register>();
3254 Register in1_lo = first.AsRegisterPairLow<Register>();
3255 Register eax = locations->GetTemp(0).AsRegister<Register>();
3256 Register edx = locations->GetTemp(1).AsRegister<Register>();
3257
3258 DCHECK_EQ(EAX, eax);
3259 DCHECK_EQ(EDX, edx);
3260
3261 // input: in1 - 64 bits, in2 - 64 bits.
3262 // output: in1
3263 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3264 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3265 // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3266 if (second.IsConstant()) {
3267 DCHECK(second.GetConstant()->IsLongConstant());
3268
3269 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3270 int32_t low_value = Low32Bits(value);
3271 int32_t high_value = High32Bits(value);
3272 Immediate low(low_value);
3273 Immediate high(high_value);
3274
3275 __ movl(eax, high);
3276 // eax <- in1.lo * in2.hi
3277 __ imull(eax, in1_lo);
3278 // in1.hi <- in1.hi * in2.lo
3279 __ imull(in1_hi, low);
3280 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3281 __ addl(in1_hi, eax);
3282 // move in2_lo to eax to prepare for double precision
3283 __ movl(eax, low);
3284 // edx:eax <- in1.lo * in2.lo
3285 __ mull(in1_lo);
3286 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3287 __ addl(in1_hi, edx);
3288 // in1.lo <- (in1.lo * in2.lo)[31:0];
3289 __ movl(in1_lo, eax);
3290 } else if (second.IsRegisterPair()) {
3291 Register in2_hi = second.AsRegisterPairHigh<Register>();
3292 Register in2_lo = second.AsRegisterPairLow<Register>();
3293
3294 __ movl(eax, in2_hi);
3295 // eax <- in1.lo * in2.hi
3296 __ imull(eax, in1_lo);
3297 // in1.hi <- in1.hi * in2.lo
3298 __ imull(in1_hi, in2_lo);
3299 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3300 __ addl(in1_hi, eax);
3301 // move in1_lo to eax to prepare for double precision
3302 __ movl(eax, in1_lo);
3303 // edx:eax <- in1.lo * in2.lo
3304 __ mull(in2_lo);
3305 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3306 __ addl(in1_hi, edx);
3307 // in1.lo <- (in1.lo * in2.lo)[31:0];
3308 __ movl(in1_lo, eax);
3309 } else {
3310 DCHECK(second.IsDoubleStackSlot()) << second;
3311 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3312 Address in2_lo(ESP, second.GetStackIndex());
3313
3314 __ movl(eax, in2_hi);
3315 // eax <- in1.lo * in2.hi
3316 __ imull(eax, in1_lo);
3317 // in1.hi <- in1.hi * in2.lo
3318 __ imull(in1_hi, in2_lo);
3319 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3320 __ addl(in1_hi, eax);
3321 // move in1_lo to eax to prepare for double precision
3322 __ movl(eax, in1_lo);
3323 // edx:eax <- in1.lo * in2.lo
3324 __ mull(in2_lo);
3325 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3326 __ addl(in1_hi, edx);
3327 // in1.lo <- (in1.lo * in2.lo)[31:0];
3328 __ movl(in1_lo, eax);
3329 }
3330
3331 break;
3332 }
3333
3334 case Primitive::kPrimFloat: {
3335 DCHECK(first.Equals(locations->Out()));
3336 if (second.IsFpuRegister()) {
3337 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3338 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3339 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3340 DCHECK(const_area->IsEmittedAtUseSite());
3341 __ mulss(first.AsFpuRegister<XmmRegister>(),
3342 codegen_->LiteralFloatAddress(
3343 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3344 const_area->GetBaseMethodAddress(),
3345 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3346 } else {
3347 DCHECK(second.IsStackSlot());
3348 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3349 }
3350 break;
3351 }
3352
3353 case Primitive::kPrimDouble: {
3354 DCHECK(first.Equals(locations->Out()));
3355 if (second.IsFpuRegister()) {
3356 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3357 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3358 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3359 DCHECK(const_area->IsEmittedAtUseSite());
3360 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3361 codegen_->LiteralDoubleAddress(
3362 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3363 const_area->GetBaseMethodAddress(),
3364 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3365 } else {
3366 DCHECK(second.IsDoubleStackSlot());
3367 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3368 }
3369 break;
3370 }
3371
3372 default:
3373 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3374 }
3375 }
3376
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3377 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3378 uint32_t temp_offset,
3379 uint32_t stack_adjustment,
3380 bool is_fp,
3381 bool is_wide) {
3382 if (source.IsStackSlot()) {
3383 DCHECK(!is_wide);
3384 if (is_fp) {
3385 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3386 } else {
3387 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3388 }
3389 } else if (source.IsDoubleStackSlot()) {
3390 DCHECK(is_wide);
3391 if (is_fp) {
3392 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3393 } else {
3394 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3395 }
3396 } else {
3397 // Write the value to the temporary location on the stack and load to FP stack.
3398 if (!is_wide) {
3399 Location stack_temp = Location::StackSlot(temp_offset);
3400 codegen_->Move32(stack_temp, source);
3401 if (is_fp) {
3402 __ flds(Address(ESP, temp_offset));
3403 } else {
3404 __ filds(Address(ESP, temp_offset));
3405 }
3406 } else {
3407 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3408 codegen_->Move64(stack_temp, source);
3409 if (is_fp) {
3410 __ fldl(Address(ESP, temp_offset));
3411 } else {
3412 __ fildl(Address(ESP, temp_offset));
3413 }
3414 }
3415 }
3416 }
3417
GenerateRemFP(HRem * rem)3418 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3419 Primitive::Type type = rem->GetResultType();
3420 bool is_float = type == Primitive::kPrimFloat;
3421 size_t elem_size = Primitive::ComponentSize(type);
3422 LocationSummary* locations = rem->GetLocations();
3423 Location first = locations->InAt(0);
3424 Location second = locations->InAt(1);
3425 Location out = locations->Out();
3426
3427 // Create stack space for 2 elements.
3428 // TODO: enhance register allocator to ask for stack temporaries.
3429 __ subl(ESP, Immediate(2 * elem_size));
3430
3431 // Load the values to the FP stack in reverse order, using temporaries if needed.
3432 const bool is_wide = !is_float;
3433 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide);
3434 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide);
3435
3436 // Loop doing FPREM until we stabilize.
3437 NearLabel retry;
3438 __ Bind(&retry);
3439 __ fprem();
3440
3441 // Move FP status to AX.
3442 __ fstsw();
3443
3444 // And see if the argument reduction is complete. This is signaled by the
3445 // C2 FPU flag bit set to 0.
3446 __ andl(EAX, Immediate(kC2ConditionMask));
3447 __ j(kNotEqual, &retry);
3448
3449 // We have settled on the final value. Retrieve it into an XMM register.
3450 // Store FP top of stack to real stack.
3451 if (is_float) {
3452 __ fsts(Address(ESP, 0));
3453 } else {
3454 __ fstl(Address(ESP, 0));
3455 }
3456
3457 // Pop the 2 items from the FP stack.
3458 __ fucompp();
3459
3460 // Load the value from the stack into an XMM register.
3461 DCHECK(out.IsFpuRegister()) << out;
3462 if (is_float) {
3463 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3464 } else {
3465 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3466 }
3467
3468 // And remove the temporary stack space we allocated.
3469 __ addl(ESP, Immediate(2 * elem_size));
3470 }
3471
3472
DivRemOneOrMinusOne(HBinaryOperation * instruction)3473 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3474 DCHECK(instruction->IsDiv() || instruction->IsRem());
3475
3476 LocationSummary* locations = instruction->GetLocations();
3477 DCHECK(locations->InAt(1).IsConstant());
3478 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3479
3480 Register out_register = locations->Out().AsRegister<Register>();
3481 Register input_register = locations->InAt(0).AsRegister<Register>();
3482 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3483
3484 DCHECK(imm == 1 || imm == -1);
3485
3486 if (instruction->IsRem()) {
3487 __ xorl(out_register, out_register);
3488 } else {
3489 __ movl(out_register, input_register);
3490 if (imm == -1) {
3491 __ negl(out_register);
3492 }
3493 }
3494 }
3495
3496
DivByPowerOfTwo(HDiv * instruction)3497 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
3498 LocationSummary* locations = instruction->GetLocations();
3499
3500 Register out_register = locations->Out().AsRegister<Register>();
3501 Register input_register = locations->InAt(0).AsRegister<Register>();
3502 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3503 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3504 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3505
3506 Register num = locations->GetTemp(0).AsRegister<Register>();
3507
3508 __ leal(num, Address(input_register, abs_imm - 1));
3509 __ testl(input_register, input_register);
3510 __ cmovl(kGreaterEqual, num, input_register);
3511 int shift = CTZ(imm);
3512 __ sarl(num, Immediate(shift));
3513
3514 if (imm < 0) {
3515 __ negl(num);
3516 }
3517
3518 __ movl(out_register, num);
3519 }
3520
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3521 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3522 DCHECK(instruction->IsDiv() || instruction->IsRem());
3523
3524 LocationSummary* locations = instruction->GetLocations();
3525 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3526
3527 Register eax = locations->InAt(0).AsRegister<Register>();
3528 Register out = locations->Out().AsRegister<Register>();
3529 Register num;
3530 Register edx;
3531
3532 if (instruction->IsDiv()) {
3533 edx = locations->GetTemp(0).AsRegister<Register>();
3534 num = locations->GetTemp(1).AsRegister<Register>();
3535 } else {
3536 edx = locations->Out().AsRegister<Register>();
3537 num = locations->GetTemp(0).AsRegister<Register>();
3538 }
3539
3540 DCHECK_EQ(EAX, eax);
3541 DCHECK_EQ(EDX, edx);
3542 if (instruction->IsDiv()) {
3543 DCHECK_EQ(EAX, out);
3544 } else {
3545 DCHECK_EQ(EDX, out);
3546 }
3547
3548 int64_t magic;
3549 int shift;
3550 CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3551
3552 // Save the numerator.
3553 __ movl(num, eax);
3554
3555 // EAX = magic
3556 __ movl(eax, Immediate(magic));
3557
3558 // EDX:EAX = magic * numerator
3559 __ imull(num);
3560
3561 if (imm > 0 && magic < 0) {
3562 // EDX += num
3563 __ addl(edx, num);
3564 } else if (imm < 0 && magic > 0) {
3565 __ subl(edx, num);
3566 }
3567
3568 // Shift if needed.
3569 if (shift != 0) {
3570 __ sarl(edx, Immediate(shift));
3571 }
3572
3573 // EDX += 1 if EDX < 0
3574 __ movl(eax, edx);
3575 __ shrl(edx, Immediate(31));
3576 __ addl(edx, eax);
3577
3578 if (instruction->IsRem()) {
3579 __ movl(eax, num);
3580 __ imull(edx, Immediate(imm));
3581 __ subl(eax, edx);
3582 __ movl(edx, eax);
3583 } else {
3584 __ movl(eax, edx);
3585 }
3586 }
3587
GenerateDivRemIntegral(HBinaryOperation * instruction)3588 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3589 DCHECK(instruction->IsDiv() || instruction->IsRem());
3590
3591 LocationSummary* locations = instruction->GetLocations();
3592 Location out = locations->Out();
3593 Location first = locations->InAt(0);
3594 Location second = locations->InAt(1);
3595 bool is_div = instruction->IsDiv();
3596
3597 switch (instruction->GetResultType()) {
3598 case Primitive::kPrimInt: {
3599 DCHECK_EQ(EAX, first.AsRegister<Register>());
3600 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
3601
3602 if (second.IsConstant()) {
3603 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
3604
3605 if (imm == 0) {
3606 // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
3607 } else if (imm == 1 || imm == -1) {
3608 DivRemOneOrMinusOne(instruction);
3609 } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
3610 DivByPowerOfTwo(instruction->AsDiv());
3611 } else {
3612 DCHECK(imm <= -2 || imm >= 2);
3613 GenerateDivRemWithAnyConstant(instruction);
3614 }
3615 } else {
3616 SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(
3617 instruction, out.AsRegister<Register>(), is_div);
3618 codegen_->AddSlowPath(slow_path);
3619
3620 Register second_reg = second.AsRegister<Register>();
3621 // 0x80000000/-1 triggers an arithmetic exception!
3622 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
3623 // it's safe to just use negl instead of more complex comparisons.
3624
3625 __ cmpl(second_reg, Immediate(-1));
3626 __ j(kEqual, slow_path->GetEntryLabel());
3627
3628 // edx:eax <- sign-extended of eax
3629 __ cdq();
3630 // eax = quotient, edx = remainder
3631 __ idivl(second_reg);
3632 __ Bind(slow_path->GetExitLabel());
3633 }
3634 break;
3635 }
3636
3637 case Primitive::kPrimLong: {
3638 InvokeRuntimeCallingConvention calling_convention;
3639 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
3640 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
3641 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
3642 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
3643 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
3644 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
3645
3646 if (is_div) {
3647 codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
3648 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
3649 } else {
3650 codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
3651 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
3652 }
3653 break;
3654 }
3655
3656 default:
3657 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
3658 }
3659 }
3660
VisitDiv(HDiv * div)3661 void LocationsBuilderX86::VisitDiv(HDiv* div) {
3662 LocationSummary::CallKind call_kind = (div->GetResultType() == Primitive::kPrimLong)
3663 ? LocationSummary::kCallOnMainOnly
3664 : LocationSummary::kNoCall;
3665 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
3666
3667 switch (div->GetResultType()) {
3668 case Primitive::kPrimInt: {
3669 locations->SetInAt(0, Location::RegisterLocation(EAX));
3670 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3671 locations->SetOut(Location::SameAsFirstInput());
3672 // Intel uses edx:eax as the dividend.
3673 locations->AddTemp(Location::RegisterLocation(EDX));
3674 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3675 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
3676 // output and request another temp.
3677 if (div->InputAt(1)->IsIntConstant()) {
3678 locations->AddTemp(Location::RequiresRegister());
3679 }
3680 break;
3681 }
3682 case Primitive::kPrimLong: {
3683 InvokeRuntimeCallingConvention calling_convention;
3684 locations->SetInAt(0, Location::RegisterPairLocation(
3685 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3686 locations->SetInAt(1, Location::RegisterPairLocation(
3687 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3688 // Runtime helper puts the result in EAX, EDX.
3689 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3690 break;
3691 }
3692 case Primitive::kPrimFloat:
3693 case Primitive::kPrimDouble: {
3694 locations->SetInAt(0, Location::RequiresFpuRegister());
3695 if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3696 DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
3697 } else if (div->InputAt(1)->IsConstant()) {
3698 locations->SetInAt(1, Location::RequiresFpuRegister());
3699 } else {
3700 locations->SetInAt(1, Location::Any());
3701 }
3702 locations->SetOut(Location::SameAsFirstInput());
3703 break;
3704 }
3705
3706 default:
3707 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3708 }
3709 }
3710
VisitDiv(HDiv * div)3711 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
3712 LocationSummary* locations = div->GetLocations();
3713 Location first = locations->InAt(0);
3714 Location second = locations->InAt(1);
3715
3716 switch (div->GetResultType()) {
3717 case Primitive::kPrimInt:
3718 case Primitive::kPrimLong: {
3719 GenerateDivRemIntegral(div);
3720 break;
3721 }
3722
3723 case Primitive::kPrimFloat: {
3724 if (second.IsFpuRegister()) {
3725 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3726 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3727 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3728 DCHECK(const_area->IsEmittedAtUseSite());
3729 __ divss(first.AsFpuRegister<XmmRegister>(),
3730 codegen_->LiteralFloatAddress(
3731 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3732 const_area->GetBaseMethodAddress(),
3733 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3734 } else {
3735 DCHECK(second.IsStackSlot());
3736 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3737 }
3738 break;
3739 }
3740
3741 case Primitive::kPrimDouble: {
3742 if (second.IsFpuRegister()) {
3743 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3744 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3745 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3746 DCHECK(const_area->IsEmittedAtUseSite());
3747 __ divsd(first.AsFpuRegister<XmmRegister>(),
3748 codegen_->LiteralDoubleAddress(
3749 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3750 const_area->GetBaseMethodAddress(),
3751 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3752 } else {
3753 DCHECK(second.IsDoubleStackSlot());
3754 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3755 }
3756 break;
3757 }
3758
3759 default:
3760 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3761 }
3762 }
3763
VisitRem(HRem * rem)3764 void LocationsBuilderX86::VisitRem(HRem* rem) {
3765 Primitive::Type type = rem->GetResultType();
3766
3767 LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong)
3768 ? LocationSummary::kCallOnMainOnly
3769 : LocationSummary::kNoCall;
3770 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
3771
3772 switch (type) {
3773 case Primitive::kPrimInt: {
3774 locations->SetInAt(0, Location::RegisterLocation(EAX));
3775 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3776 locations->SetOut(Location::RegisterLocation(EDX));
3777 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3778 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
3779 // output and request another temp.
3780 if (rem->InputAt(1)->IsIntConstant()) {
3781 locations->AddTemp(Location::RequiresRegister());
3782 }
3783 break;
3784 }
3785 case Primitive::kPrimLong: {
3786 InvokeRuntimeCallingConvention calling_convention;
3787 locations->SetInAt(0, Location::RegisterPairLocation(
3788 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3789 locations->SetInAt(1, Location::RegisterPairLocation(
3790 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3791 // Runtime helper puts the result in EAX, EDX.
3792 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3793 break;
3794 }
3795 case Primitive::kPrimDouble:
3796 case Primitive::kPrimFloat: {
3797 locations->SetInAt(0, Location::Any());
3798 locations->SetInAt(1, Location::Any());
3799 locations->SetOut(Location::RequiresFpuRegister());
3800 locations->AddTemp(Location::RegisterLocation(EAX));
3801 break;
3802 }
3803
3804 default:
3805 LOG(FATAL) << "Unexpected rem type " << type;
3806 }
3807 }
3808
VisitRem(HRem * rem)3809 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
3810 Primitive::Type type = rem->GetResultType();
3811 switch (type) {
3812 case Primitive::kPrimInt:
3813 case Primitive::kPrimLong: {
3814 GenerateDivRemIntegral(rem);
3815 break;
3816 }
3817 case Primitive::kPrimFloat:
3818 case Primitive::kPrimDouble: {
3819 GenerateRemFP(rem);
3820 break;
3821 }
3822 default:
3823 LOG(FATAL) << "Unexpected rem type " << type;
3824 }
3825 }
3826
VisitDivZeroCheck(HDivZeroCheck * instruction)3827 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3828 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3829 switch (instruction->GetType()) {
3830 case Primitive::kPrimBoolean:
3831 case Primitive::kPrimByte:
3832 case Primitive::kPrimChar:
3833 case Primitive::kPrimShort:
3834 case Primitive::kPrimInt: {
3835 locations->SetInAt(0, Location::Any());
3836 break;
3837 }
3838 case Primitive::kPrimLong: {
3839 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3840 if (!instruction->IsConstant()) {
3841 locations->AddTemp(Location::RequiresRegister());
3842 }
3843 break;
3844 }
3845 default:
3846 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3847 }
3848 }
3849
VisitDivZeroCheck(HDivZeroCheck * instruction)3850 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3851 SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86(instruction);
3852 codegen_->AddSlowPath(slow_path);
3853
3854 LocationSummary* locations = instruction->GetLocations();
3855 Location value = locations->InAt(0);
3856
3857 switch (instruction->GetType()) {
3858 case Primitive::kPrimBoolean:
3859 case Primitive::kPrimByte:
3860 case Primitive::kPrimChar:
3861 case Primitive::kPrimShort:
3862 case Primitive::kPrimInt: {
3863 if (value.IsRegister()) {
3864 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
3865 __ j(kEqual, slow_path->GetEntryLabel());
3866 } else if (value.IsStackSlot()) {
3867 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
3868 __ j(kEqual, slow_path->GetEntryLabel());
3869 } else {
3870 DCHECK(value.IsConstant()) << value;
3871 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3872 __ jmp(slow_path->GetEntryLabel());
3873 }
3874 }
3875 break;
3876 }
3877 case Primitive::kPrimLong: {
3878 if (value.IsRegisterPair()) {
3879 Register temp = locations->GetTemp(0).AsRegister<Register>();
3880 __ movl(temp, value.AsRegisterPairLow<Register>());
3881 __ orl(temp, value.AsRegisterPairHigh<Register>());
3882 __ j(kEqual, slow_path->GetEntryLabel());
3883 } else {
3884 DCHECK(value.IsConstant()) << value;
3885 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3886 __ jmp(slow_path->GetEntryLabel());
3887 }
3888 }
3889 break;
3890 }
3891 default:
3892 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
3893 }
3894 }
3895
HandleShift(HBinaryOperation * op)3896 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
3897 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3898
3899 LocationSummary* locations =
3900 new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
3901
3902 switch (op->GetResultType()) {
3903 case Primitive::kPrimInt:
3904 case Primitive::kPrimLong: {
3905 // Can't have Location::Any() and output SameAsFirstInput()
3906 locations->SetInAt(0, Location::RequiresRegister());
3907 // The shift count needs to be in CL or a constant.
3908 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
3909 locations->SetOut(Location::SameAsFirstInput());
3910 break;
3911 }
3912 default:
3913 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
3914 }
3915 }
3916
HandleShift(HBinaryOperation * op)3917 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
3918 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3919
3920 LocationSummary* locations = op->GetLocations();
3921 Location first = locations->InAt(0);
3922 Location second = locations->InAt(1);
3923 DCHECK(first.Equals(locations->Out()));
3924
3925 switch (op->GetResultType()) {
3926 case Primitive::kPrimInt: {
3927 DCHECK(first.IsRegister());
3928 Register first_reg = first.AsRegister<Register>();
3929 if (second.IsRegister()) {
3930 Register second_reg = second.AsRegister<Register>();
3931 DCHECK_EQ(ECX, second_reg);
3932 if (op->IsShl()) {
3933 __ shll(first_reg, second_reg);
3934 } else if (op->IsShr()) {
3935 __ sarl(first_reg, second_reg);
3936 } else {
3937 __ shrl(first_reg, second_reg);
3938 }
3939 } else {
3940 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
3941 if (shift == 0) {
3942 return;
3943 }
3944 Immediate imm(shift);
3945 if (op->IsShl()) {
3946 __ shll(first_reg, imm);
3947 } else if (op->IsShr()) {
3948 __ sarl(first_reg, imm);
3949 } else {
3950 __ shrl(first_reg, imm);
3951 }
3952 }
3953 break;
3954 }
3955 case Primitive::kPrimLong: {
3956 if (second.IsRegister()) {
3957 Register second_reg = second.AsRegister<Register>();
3958 DCHECK_EQ(ECX, second_reg);
3959 if (op->IsShl()) {
3960 GenerateShlLong(first, second_reg);
3961 } else if (op->IsShr()) {
3962 GenerateShrLong(first, second_reg);
3963 } else {
3964 GenerateUShrLong(first, second_reg);
3965 }
3966 } else {
3967 // Shift by a constant.
3968 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
3969 // Nothing to do if the shift is 0, as the input is already the output.
3970 if (shift != 0) {
3971 if (op->IsShl()) {
3972 GenerateShlLong(first, shift);
3973 } else if (op->IsShr()) {
3974 GenerateShrLong(first, shift);
3975 } else {
3976 GenerateUShrLong(first, shift);
3977 }
3978 }
3979 }
3980 break;
3981 }
3982 default:
3983 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
3984 }
3985 }
3986
GenerateShlLong(const Location & loc,int shift)3987 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
3988 Register low = loc.AsRegisterPairLow<Register>();
3989 Register high = loc.AsRegisterPairHigh<Register>();
3990 if (shift == 1) {
3991 // This is just an addition.
3992 __ addl(low, low);
3993 __ adcl(high, high);
3994 } else if (shift == 32) {
3995 // Shift by 32 is easy. High gets low, and low gets 0.
3996 codegen_->EmitParallelMoves(
3997 loc.ToLow(),
3998 loc.ToHigh(),
3999 Primitive::kPrimInt,
4000 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4001 loc.ToLow(),
4002 Primitive::kPrimInt);
4003 } else if (shift > 32) {
4004 // Low part becomes 0. High part is low part << (shift-32).
4005 __ movl(high, low);
4006 __ shll(high, Immediate(shift - 32));
4007 __ xorl(low, low);
4008 } else {
4009 // Between 1 and 31.
4010 __ shld(high, low, Immediate(shift));
4011 __ shll(low, Immediate(shift));
4012 }
4013 }
4014
GenerateShlLong(const Location & loc,Register shifter)4015 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4016 NearLabel done;
4017 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4018 __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4019 __ testl(shifter, Immediate(32));
4020 __ j(kEqual, &done);
4021 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4022 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4023 __ Bind(&done);
4024 }
4025
GenerateShrLong(const Location & loc,int shift)4026 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4027 Register low = loc.AsRegisterPairLow<Register>();
4028 Register high = loc.AsRegisterPairHigh<Register>();
4029 if (shift == 32) {
4030 // Need to copy the sign.
4031 DCHECK_NE(low, high);
4032 __ movl(low, high);
4033 __ sarl(high, Immediate(31));
4034 } else if (shift > 32) {
4035 DCHECK_NE(low, high);
4036 // High part becomes sign. Low part is shifted by shift - 32.
4037 __ movl(low, high);
4038 __ sarl(high, Immediate(31));
4039 __ sarl(low, Immediate(shift - 32));
4040 } else {
4041 // Between 1 and 31.
4042 __ shrd(low, high, Immediate(shift));
4043 __ sarl(high, Immediate(shift));
4044 }
4045 }
4046
GenerateShrLong(const Location & loc,Register shifter)4047 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4048 NearLabel done;
4049 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4050 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4051 __ testl(shifter, Immediate(32));
4052 __ j(kEqual, &done);
4053 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4054 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4055 __ Bind(&done);
4056 }
4057
GenerateUShrLong(const Location & loc,int shift)4058 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4059 Register low = loc.AsRegisterPairLow<Register>();
4060 Register high = loc.AsRegisterPairHigh<Register>();
4061 if (shift == 32) {
4062 // Shift by 32 is easy. Low gets high, and high gets 0.
4063 codegen_->EmitParallelMoves(
4064 loc.ToHigh(),
4065 loc.ToLow(),
4066 Primitive::kPrimInt,
4067 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4068 loc.ToHigh(),
4069 Primitive::kPrimInt);
4070 } else if (shift > 32) {
4071 // Low part is high >> (shift - 32). High part becomes 0.
4072 __ movl(low, high);
4073 __ shrl(low, Immediate(shift - 32));
4074 __ xorl(high, high);
4075 } else {
4076 // Between 1 and 31.
4077 __ shrd(low, high, Immediate(shift));
4078 __ shrl(high, Immediate(shift));
4079 }
4080 }
4081
GenerateUShrLong(const Location & loc,Register shifter)4082 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4083 NearLabel done;
4084 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4085 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4086 __ testl(shifter, Immediate(32));
4087 __ j(kEqual, &done);
4088 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4089 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4090 __ Bind(&done);
4091 }
4092
VisitRor(HRor * ror)4093 void LocationsBuilderX86::VisitRor(HRor* ror) {
4094 LocationSummary* locations =
4095 new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
4096
4097 switch (ror->GetResultType()) {
4098 case Primitive::kPrimLong:
4099 // Add the temporary needed.
4100 locations->AddTemp(Location::RequiresRegister());
4101 FALLTHROUGH_INTENDED;
4102 case Primitive::kPrimInt:
4103 locations->SetInAt(0, Location::RequiresRegister());
4104 // The shift count needs to be in CL (unless it is a constant).
4105 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4106 locations->SetOut(Location::SameAsFirstInput());
4107 break;
4108 default:
4109 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4110 UNREACHABLE();
4111 }
4112 }
4113
VisitRor(HRor * ror)4114 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4115 LocationSummary* locations = ror->GetLocations();
4116 Location first = locations->InAt(0);
4117 Location second = locations->InAt(1);
4118
4119 if (ror->GetResultType() == Primitive::kPrimInt) {
4120 Register first_reg = first.AsRegister<Register>();
4121 if (second.IsRegister()) {
4122 Register second_reg = second.AsRegister<Register>();
4123 __ rorl(first_reg, second_reg);
4124 } else {
4125 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4126 __ rorl(first_reg, imm);
4127 }
4128 return;
4129 }
4130
4131 DCHECK_EQ(ror->GetResultType(), Primitive::kPrimLong);
4132 Register first_reg_lo = first.AsRegisterPairLow<Register>();
4133 Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4134 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4135 if (second.IsRegister()) {
4136 Register second_reg = second.AsRegister<Register>();
4137 DCHECK_EQ(second_reg, ECX);
4138 __ movl(temp_reg, first_reg_hi);
4139 __ shrd(first_reg_hi, first_reg_lo, second_reg);
4140 __ shrd(first_reg_lo, temp_reg, second_reg);
4141 __ movl(temp_reg, first_reg_hi);
4142 __ testl(second_reg, Immediate(32));
4143 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4144 __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4145 } else {
4146 int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4147 if (shift_amt == 0) {
4148 // Already fine.
4149 return;
4150 }
4151 if (shift_amt == 32) {
4152 // Just swap.
4153 __ movl(temp_reg, first_reg_lo);
4154 __ movl(first_reg_lo, first_reg_hi);
4155 __ movl(first_reg_hi, temp_reg);
4156 return;
4157 }
4158
4159 Immediate imm(shift_amt);
4160 // Save the constents of the low value.
4161 __ movl(temp_reg, first_reg_lo);
4162
4163 // Shift right into low, feeding bits from high.
4164 __ shrd(first_reg_lo, first_reg_hi, imm);
4165
4166 // Shift right into high, feeding bits from the original low.
4167 __ shrd(first_reg_hi, temp_reg, imm);
4168
4169 // Swap if needed.
4170 if (shift_amt > 32) {
4171 __ movl(temp_reg, first_reg_lo);
4172 __ movl(first_reg_lo, first_reg_hi);
4173 __ movl(first_reg_hi, temp_reg);
4174 }
4175 }
4176 }
4177
VisitShl(HShl * shl)4178 void LocationsBuilderX86::VisitShl(HShl* shl) {
4179 HandleShift(shl);
4180 }
4181
VisitShl(HShl * shl)4182 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
4183 HandleShift(shl);
4184 }
4185
VisitShr(HShr * shr)4186 void LocationsBuilderX86::VisitShr(HShr* shr) {
4187 HandleShift(shr);
4188 }
4189
VisitShr(HShr * shr)4190 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
4191 HandleShift(shr);
4192 }
4193
VisitUShr(HUShr * ushr)4194 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
4195 HandleShift(ushr);
4196 }
4197
VisitUShr(HUShr * ushr)4198 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
4199 HandleShift(ushr);
4200 }
4201
VisitNewInstance(HNewInstance * instruction)4202 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
4203 LocationSummary* locations =
4204 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
4205 locations->SetOut(Location::RegisterLocation(EAX));
4206 if (instruction->IsStringAlloc()) {
4207 locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
4208 } else {
4209 InvokeRuntimeCallingConvention calling_convention;
4210 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4211 }
4212 }
4213
VisitNewInstance(HNewInstance * instruction)4214 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
4215 // Note: if heap poisoning is enabled, the entry point takes cares
4216 // of poisoning the reference.
4217 if (instruction->IsStringAlloc()) {
4218 // String is allocated through StringFactory. Call NewEmptyString entry point.
4219 Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
4220 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize);
4221 __ fs()->movl(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString)));
4222 __ call(Address(temp, code_offset.Int32Value()));
4223 codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
4224 } else {
4225 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4226 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4227 DCHECK(!codegen_->IsLeafMethod());
4228 }
4229 }
4230
VisitNewArray(HNewArray * instruction)4231 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
4232 LocationSummary* locations =
4233 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
4234 locations->SetOut(Location::RegisterLocation(EAX));
4235 InvokeRuntimeCallingConvention calling_convention;
4236 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4237 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4238 }
4239
VisitNewArray(HNewArray * instruction)4240 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
4241 // Note: if heap poisoning is enabled, the entry point takes cares
4242 // of poisoning the reference.
4243 QuickEntrypointEnum entrypoint =
4244 CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
4245 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4246 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4247 DCHECK(!codegen_->IsLeafMethod());
4248 }
4249
VisitParameterValue(HParameterValue * instruction)4250 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
4251 LocationSummary* locations =
4252 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4253 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4254 if (location.IsStackSlot()) {
4255 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4256 } else if (location.IsDoubleStackSlot()) {
4257 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4258 }
4259 locations->SetOut(location);
4260 }
4261
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4262 void InstructionCodeGeneratorX86::VisitParameterValue(
4263 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4264 }
4265
VisitCurrentMethod(HCurrentMethod * instruction)4266 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
4267 LocationSummary* locations =
4268 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4269 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4270 }
4271
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4272 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4273 }
4274
VisitClassTableGet(HClassTableGet * instruction)4275 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
4276 LocationSummary* locations =
4277 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4278 locations->SetInAt(0, Location::RequiresRegister());
4279 locations->SetOut(Location::RequiresRegister());
4280 }
4281
VisitClassTableGet(HClassTableGet * instruction)4282 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
4283 LocationSummary* locations = instruction->GetLocations();
4284 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4285 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4286 instruction->GetIndex(), kX86PointerSize).SizeValue();
4287 __ movl(locations->Out().AsRegister<Register>(),
4288 Address(locations->InAt(0).AsRegister<Register>(), method_offset));
4289 } else {
4290 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4291 instruction->GetIndex(), kX86PointerSize));
4292 __ movl(locations->Out().AsRegister<Register>(),
4293 Address(locations->InAt(0).AsRegister<Register>(),
4294 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
4295 // temp = temp->GetImtEntryAt(method_offset);
4296 __ movl(locations->Out().AsRegister<Register>(),
4297 Address(locations->Out().AsRegister<Register>(), method_offset));
4298 }
4299 }
4300
VisitNot(HNot * not_)4301 void LocationsBuilderX86::VisitNot(HNot* not_) {
4302 LocationSummary* locations =
4303 new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
4304 locations->SetInAt(0, Location::RequiresRegister());
4305 locations->SetOut(Location::SameAsFirstInput());
4306 }
4307
VisitNot(HNot * not_)4308 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
4309 LocationSummary* locations = not_->GetLocations();
4310 Location in = locations->InAt(0);
4311 Location out = locations->Out();
4312 DCHECK(in.Equals(out));
4313 switch (not_->GetResultType()) {
4314 case Primitive::kPrimInt:
4315 __ notl(out.AsRegister<Register>());
4316 break;
4317
4318 case Primitive::kPrimLong:
4319 __ notl(out.AsRegisterPairLow<Register>());
4320 __ notl(out.AsRegisterPairHigh<Register>());
4321 break;
4322
4323 default:
4324 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4325 }
4326 }
4327
VisitBooleanNot(HBooleanNot * bool_not)4328 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
4329 LocationSummary* locations =
4330 new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
4331 locations->SetInAt(0, Location::RequiresRegister());
4332 locations->SetOut(Location::SameAsFirstInput());
4333 }
4334
VisitBooleanNot(HBooleanNot * bool_not)4335 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
4336 LocationSummary* locations = bool_not->GetLocations();
4337 Location in = locations->InAt(0);
4338 Location out = locations->Out();
4339 DCHECK(in.Equals(out));
4340 __ xorl(out.AsRegister<Register>(), Immediate(1));
4341 }
4342
VisitCompare(HCompare * compare)4343 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
4344 LocationSummary* locations =
4345 new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
4346 switch (compare->InputAt(0)->GetType()) {
4347 case Primitive::kPrimBoolean:
4348 case Primitive::kPrimByte:
4349 case Primitive::kPrimShort:
4350 case Primitive::kPrimChar:
4351 case Primitive::kPrimInt:
4352 case Primitive::kPrimLong: {
4353 locations->SetInAt(0, Location::RequiresRegister());
4354 locations->SetInAt(1, Location::Any());
4355 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4356 break;
4357 }
4358 case Primitive::kPrimFloat:
4359 case Primitive::kPrimDouble: {
4360 locations->SetInAt(0, Location::RequiresFpuRegister());
4361 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
4362 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
4363 } else if (compare->InputAt(1)->IsConstant()) {
4364 locations->SetInAt(1, Location::RequiresFpuRegister());
4365 } else {
4366 locations->SetInAt(1, Location::Any());
4367 }
4368 locations->SetOut(Location::RequiresRegister());
4369 break;
4370 }
4371 default:
4372 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4373 }
4374 }
4375
VisitCompare(HCompare * compare)4376 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
4377 LocationSummary* locations = compare->GetLocations();
4378 Register out = locations->Out().AsRegister<Register>();
4379 Location left = locations->InAt(0);
4380 Location right = locations->InAt(1);
4381
4382 NearLabel less, greater, done;
4383 Condition less_cond = kLess;
4384
4385 switch (compare->InputAt(0)->GetType()) {
4386 case Primitive::kPrimBoolean:
4387 case Primitive::kPrimByte:
4388 case Primitive::kPrimShort:
4389 case Primitive::kPrimChar:
4390 case Primitive::kPrimInt: {
4391 codegen_->GenerateIntCompare(left, right);
4392 break;
4393 }
4394 case Primitive::kPrimLong: {
4395 Register left_low = left.AsRegisterPairLow<Register>();
4396 Register left_high = left.AsRegisterPairHigh<Register>();
4397 int32_t val_low = 0;
4398 int32_t val_high = 0;
4399 bool right_is_const = false;
4400
4401 if (right.IsConstant()) {
4402 DCHECK(right.GetConstant()->IsLongConstant());
4403 right_is_const = true;
4404 int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
4405 val_low = Low32Bits(val);
4406 val_high = High32Bits(val);
4407 }
4408
4409 if (right.IsRegisterPair()) {
4410 __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
4411 } else if (right.IsDoubleStackSlot()) {
4412 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
4413 } else {
4414 DCHECK(right_is_const) << right;
4415 codegen_->Compare32BitValue(left_high, val_high);
4416 }
4417 __ j(kLess, &less); // Signed compare.
4418 __ j(kGreater, &greater); // Signed compare.
4419 if (right.IsRegisterPair()) {
4420 __ cmpl(left_low, right.AsRegisterPairLow<Register>());
4421 } else if (right.IsDoubleStackSlot()) {
4422 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
4423 } else {
4424 DCHECK(right_is_const) << right;
4425 codegen_->Compare32BitValue(left_low, val_low);
4426 }
4427 less_cond = kBelow; // for CF (unsigned).
4428 break;
4429 }
4430 case Primitive::kPrimFloat: {
4431 GenerateFPCompare(left, right, compare, false);
4432 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4433 less_cond = kBelow; // for CF (floats).
4434 break;
4435 }
4436 case Primitive::kPrimDouble: {
4437 GenerateFPCompare(left, right, compare, true);
4438 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4439 less_cond = kBelow; // for CF (floats).
4440 break;
4441 }
4442 default:
4443 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4444 }
4445
4446 __ movl(out, Immediate(0));
4447 __ j(kEqual, &done);
4448 __ j(less_cond, &less);
4449
4450 __ Bind(&greater);
4451 __ movl(out, Immediate(1));
4452 __ jmp(&done);
4453
4454 __ Bind(&less);
4455 __ movl(out, Immediate(-1));
4456
4457 __ Bind(&done);
4458 }
4459
VisitPhi(HPhi * instruction)4460 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
4461 LocationSummary* locations =
4462 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4463 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4464 locations->SetInAt(i, Location::Any());
4465 }
4466 locations->SetOut(Location::Any());
4467 }
4468
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4469 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4470 LOG(FATAL) << "Unreachable";
4471 }
4472
GenerateMemoryBarrier(MemBarrierKind kind)4473 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
4474 /*
4475 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4476 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
4477 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4478 */
4479 switch (kind) {
4480 case MemBarrierKind::kAnyAny: {
4481 MemoryFence();
4482 break;
4483 }
4484 case MemBarrierKind::kAnyStore:
4485 case MemBarrierKind::kLoadAny:
4486 case MemBarrierKind::kStoreStore: {
4487 // nop
4488 break;
4489 }
4490 case MemBarrierKind::kNTStoreStore:
4491 // Non-Temporal Store/Store needs an explicit fence.
4492 MemoryFence(/* non-temporal */ true);
4493 break;
4494 }
4495 }
4496
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,HInvokeStaticOrDirect * invoke ATTRIBUTE_UNUSED)4497 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
4498 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4499 HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
4500 return desired_dispatch_info;
4501 }
4502
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)4503 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
4504 Register temp) {
4505 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
4506 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4507 if (!invoke->GetLocations()->Intrinsified()) {
4508 return location.AsRegister<Register>();
4509 }
4510 // For intrinsics we allow any location, so it may be on the stack.
4511 if (!location.IsRegister()) {
4512 __ movl(temp, Address(ESP, location.GetStackIndex()));
4513 return temp;
4514 }
4515 // For register locations, check if the register was saved. If so, get it from the stack.
4516 // Note: There is a chance that the register was saved but not overwritten, so we could
4517 // save one load. However, since this is just an intrinsic slow path we prefer this
4518 // simple and more robust approach rather that trying to determine if that's the case.
4519 SlowPathCode* slow_path = GetCurrentSlowPath();
4520 DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
4521 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
4522 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
4523 __ movl(temp, Address(ESP, stack_offset));
4524 return temp;
4525 }
4526 return location.AsRegister<Register>();
4527 }
4528
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4529 void CodeGeneratorX86::GenerateStaticOrDirectCall(
4530 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4531 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
4532 switch (invoke->GetMethodLoadKind()) {
4533 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4534 // temp = thread->string_init_entrypoint
4535 uint32_t offset =
4536 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4537 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
4538 break;
4539 }
4540 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4541 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4542 break;
4543 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
4544 DCHECK(GetCompilerOptions().IsBootImage());
4545 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4546 temp.AsRegister<Register>());
4547 __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset));
4548 RecordBootMethodPatch(invoke);
4549 break;
4550 }
4551 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
4552 __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
4553 break;
4554 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
4555 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4556 temp.AsRegister<Register>());
4557 __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
4558 // Bind a new fixup label at the end of the "movl" insn.
4559 __ Bind(NewMethodBssEntryPatch(
4560 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(),
4561 MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())));
4562 break;
4563 }
4564 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
4565 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
4566 return; // No code pointer retrieval; the runtime performs the call directly.
4567 }
4568 }
4569
4570 switch (invoke->GetCodePtrLocation()) {
4571 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
4572 __ call(GetFrameEntryLabel());
4573 break;
4574 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
4575 // (callee_method + offset_of_quick_compiled_code)()
4576 __ call(Address(callee_method.AsRegister<Register>(),
4577 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
4578 kX86PointerSize).Int32Value()));
4579 break;
4580 }
4581 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4582
4583 DCHECK(!IsLeafMethod());
4584 }
4585
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)4586 void CodeGeneratorX86::GenerateVirtualCall(
4587 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
4588 Register temp = temp_in.AsRegister<Register>();
4589 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4590 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
4591
4592 // Use the calling convention instead of the location of the receiver, as
4593 // intrinsics may have put the receiver in a different register. In the intrinsics
4594 // slow path, the arguments have been moved to the right place, so here we are
4595 // guaranteed that the receiver is the first register of the calling convention.
4596 InvokeDexCallingConvention calling_convention;
4597 Register receiver = calling_convention.GetRegisterAt(0);
4598 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4599 // /* HeapReference<Class> */ temp = receiver->klass_
4600 __ movl(temp, Address(receiver, class_offset));
4601 MaybeRecordImplicitNullCheck(invoke);
4602 // Instead of simply (possibly) unpoisoning `temp` here, we should
4603 // emit a read barrier for the previous class reference load.
4604 // However this is not required in practice, as this is an
4605 // intermediate/temporary reference and because the current
4606 // concurrent copying collector keeps the from-space memory
4607 // intact/accessible until the end of the marking phase (the
4608 // concurrent copying collector may not in the future).
4609 __ MaybeUnpoisonHeapReference(temp);
4610 // temp = temp->GetMethodAt(method_offset);
4611 __ movl(temp, Address(temp, method_offset));
4612 // call temp->GetEntryPoint();
4613 __ call(Address(
4614 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
4615 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4616 }
4617
RecordBootMethodPatch(HInvokeStaticOrDirect * invoke)4618 void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) {
4619 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
4620 HX86ComputeBaseMethodAddress* address =
4621 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
4622 boot_image_method_patches_.emplace_back(address,
4623 *invoke->GetTargetMethod().dex_file,
4624 invoke->GetTargetMethod().dex_method_index);
4625 __ Bind(&boot_image_method_patches_.back().label);
4626 }
4627
NewMethodBssEntryPatch(HX86ComputeBaseMethodAddress * method_address,MethodReference target_method)4628 Label* CodeGeneratorX86::NewMethodBssEntryPatch(
4629 HX86ComputeBaseMethodAddress* method_address,
4630 MethodReference target_method) {
4631 // Add the patch entry and bind its label at the end of the instruction.
4632 method_bss_entry_patches_.emplace_back(method_address,
4633 *target_method.dex_file,
4634 target_method.dex_method_index);
4635 return &method_bss_entry_patches_.back().label;
4636 }
4637
RecordBootTypePatch(HLoadClass * load_class)4638 void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) {
4639 HX86ComputeBaseMethodAddress* address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
4640 boot_image_type_patches_.emplace_back(address,
4641 load_class->GetDexFile(),
4642 load_class->GetTypeIndex().index_);
4643 __ Bind(&boot_image_type_patches_.back().label);
4644 }
4645
NewTypeBssEntryPatch(HLoadClass * load_class)4646 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
4647 HX86ComputeBaseMethodAddress* address =
4648 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
4649 type_bss_entry_patches_.emplace_back(
4650 address, load_class->GetDexFile(), load_class->GetTypeIndex().index_);
4651 return &type_bss_entry_patches_.back().label;
4652 }
4653
RecordBootStringPatch(HLoadString * load_string)4654 void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
4655 DCHECK(GetCompilerOptions().IsBootImage());
4656 HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
4657 string_patches_.emplace_back(address,
4658 load_string->GetDexFile(),
4659 load_string->GetStringIndex().index_);
4660 __ Bind(&string_patches_.back().label);
4661 }
4662
NewStringBssEntryPatch(HLoadString * load_string)4663 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
4664 DCHECK(!GetCompilerOptions().IsBootImage());
4665 HX86ComputeBaseMethodAddress* address =
4666 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
4667 string_patches_.emplace_back(
4668 address, load_string->GetDexFile(), load_string->GetStringIndex().index_);
4669 return &string_patches_.back().label;
4670 }
4671
4672 // The label points to the end of the "movl" or another instruction but the literal offset
4673 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
4674 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
4675
4676 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<LinkerPatch> * linker_patches)4677 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
4678 const ArenaDeque<X86PcRelativePatchInfo>& infos,
4679 ArenaVector<LinkerPatch>* linker_patches) {
4680 for (const X86PcRelativePatchInfo& info : infos) {
4681 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
4682 linker_patches->push_back(Factory(
4683 literal_offset, &info.dex_file, GetMethodAddressOffset(info.method_address), info.index));
4684 }
4685 }
4686
EmitLinkerPatches(ArenaVector<LinkerPatch> * linker_patches)4687 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
4688 DCHECK(linker_patches->empty());
4689 size_t size =
4690 boot_image_method_patches_.size() +
4691 method_bss_entry_patches_.size() +
4692 boot_image_type_patches_.size() +
4693 type_bss_entry_patches_.size() +
4694 string_patches_.size();
4695 linker_patches->reserve(size);
4696 if (GetCompilerOptions().IsBootImage()) {
4697 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_,
4698 linker_patches);
4699 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
4700 linker_patches);
4701 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
4702 } else {
4703 DCHECK(boot_image_method_patches_.empty());
4704 DCHECK(boot_image_type_patches_.empty());
4705 EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
4706 }
4707 EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
4708 linker_patches);
4709 EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
4710 linker_patches);
4711 DCHECK_EQ(size, linker_patches->size());
4712 }
4713
MarkGCCard(Register temp,Register card,Register object,Register value,bool value_can_be_null)4714 void CodeGeneratorX86::MarkGCCard(Register temp,
4715 Register card,
4716 Register object,
4717 Register value,
4718 bool value_can_be_null) {
4719 NearLabel is_null;
4720 if (value_can_be_null) {
4721 __ testl(value, value);
4722 __ j(kEqual, &is_null);
4723 }
4724 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
4725 __ movl(temp, object);
4726 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
4727 __ movb(Address(temp, card, TIMES_1, 0),
4728 X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
4729 if (value_can_be_null) {
4730 __ Bind(&is_null);
4731 }
4732 }
4733
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4734 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
4735 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4736
4737 bool object_field_get_with_read_barrier =
4738 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4739 LocationSummary* locations =
4740 new (GetGraph()->GetArena()) LocationSummary(instruction,
4741 kEmitCompilerReadBarrier ?
4742 LocationSummary::kCallOnSlowPath :
4743 LocationSummary::kNoCall);
4744 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4745 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4746 }
4747 locations->SetInAt(0, Location::RequiresRegister());
4748
4749 if (Primitive::IsFloatingPointType(instruction->GetType())) {
4750 locations->SetOut(Location::RequiresFpuRegister());
4751 } else {
4752 // The output overlaps in case of long: we don't want the low move
4753 // to overwrite the object's location. Likewise, in the case of
4754 // an object field get with read barriers enabled, we do not want
4755 // the move to overwrite the object's location, as we need it to emit
4756 // the read barrier.
4757 locations->SetOut(
4758 Location::RequiresRegister(),
4759 (object_field_get_with_read_barrier || instruction->GetType() == Primitive::kPrimLong) ?
4760 Location::kOutputOverlap :
4761 Location::kNoOutputOverlap);
4762 }
4763
4764 if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) {
4765 // Long values can be loaded atomically into an XMM using movsd.
4766 // So we use an XMM register as a temp to achieve atomicity (first
4767 // load the temp into the XMM and then copy the XMM into the
4768 // output, 32 bits at a time).
4769 locations->AddTemp(Location::RequiresFpuRegister());
4770 }
4771 }
4772
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4773 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
4774 const FieldInfo& field_info) {
4775 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4776
4777 LocationSummary* locations = instruction->GetLocations();
4778 Location base_loc = locations->InAt(0);
4779 Register base = base_loc.AsRegister<Register>();
4780 Location out = locations->Out();
4781 bool is_volatile = field_info.IsVolatile();
4782 Primitive::Type field_type = field_info.GetFieldType();
4783 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4784
4785 switch (field_type) {
4786 case Primitive::kPrimBoolean: {
4787 __ movzxb(out.AsRegister<Register>(), Address(base, offset));
4788 break;
4789 }
4790
4791 case Primitive::kPrimByte: {
4792 __ movsxb(out.AsRegister<Register>(), Address(base, offset));
4793 break;
4794 }
4795
4796 case Primitive::kPrimShort: {
4797 __ movsxw(out.AsRegister<Register>(), Address(base, offset));
4798 break;
4799 }
4800
4801 case Primitive::kPrimChar: {
4802 __ movzxw(out.AsRegister<Register>(), Address(base, offset));
4803 break;
4804 }
4805
4806 case Primitive::kPrimInt:
4807 __ movl(out.AsRegister<Register>(), Address(base, offset));
4808 break;
4809
4810 case Primitive::kPrimNot: {
4811 // /* HeapReference<Object> */ out = *(base + offset)
4812 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4813 // Note that a potential implicit null check is handled in this
4814 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
4815 codegen_->GenerateFieldLoadWithBakerReadBarrier(
4816 instruction, out, base, offset, /* needs_null_check */ true);
4817 if (is_volatile) {
4818 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4819 }
4820 } else {
4821 __ movl(out.AsRegister<Register>(), Address(base, offset));
4822 codegen_->MaybeRecordImplicitNullCheck(instruction);
4823 if (is_volatile) {
4824 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4825 }
4826 // If read barriers are enabled, emit read barriers other than
4827 // Baker's using a slow path (and also unpoison the loaded
4828 // reference, if heap poisoning is enabled).
4829 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4830 }
4831 break;
4832 }
4833
4834 case Primitive::kPrimLong: {
4835 if (is_volatile) {
4836 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4837 __ movsd(temp, Address(base, offset));
4838 codegen_->MaybeRecordImplicitNullCheck(instruction);
4839 __ movd(out.AsRegisterPairLow<Register>(), temp);
4840 __ psrlq(temp, Immediate(32));
4841 __ movd(out.AsRegisterPairHigh<Register>(), temp);
4842 } else {
4843 DCHECK_NE(base, out.AsRegisterPairLow<Register>());
4844 __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset));
4845 codegen_->MaybeRecordImplicitNullCheck(instruction);
4846 __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset));
4847 }
4848 break;
4849 }
4850
4851 case Primitive::kPrimFloat: {
4852 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4853 break;
4854 }
4855
4856 case Primitive::kPrimDouble: {
4857 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4858 break;
4859 }
4860
4861 case Primitive::kPrimVoid:
4862 LOG(FATAL) << "Unreachable type " << field_type;
4863 UNREACHABLE();
4864 }
4865
4866 if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimLong) {
4867 // Potential implicit null checks, in the case of reference or
4868 // long fields, are handled in the previous switch statement.
4869 } else {
4870 codegen_->MaybeRecordImplicitNullCheck(instruction);
4871 }
4872
4873 if (is_volatile) {
4874 if (field_type == Primitive::kPrimNot) {
4875 // Memory barriers, in the case of references, are also handled
4876 // in the previous switch statement.
4877 } else {
4878 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4879 }
4880 }
4881 }
4882
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4883 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
4884 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4885
4886 LocationSummary* locations =
4887 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4888 locations->SetInAt(0, Location::RequiresRegister());
4889 bool is_volatile = field_info.IsVolatile();
4890 Primitive::Type field_type = field_info.GetFieldType();
4891 bool is_byte_type = (field_type == Primitive::kPrimBoolean)
4892 || (field_type == Primitive::kPrimByte);
4893
4894 // The register allocator does not support multiple
4895 // inputs that die at entry with one in a specific register.
4896 if (is_byte_type) {
4897 // Ensure the value is in a byte register.
4898 locations->SetInAt(1, Location::RegisterLocation(EAX));
4899 } else if (Primitive::IsFloatingPointType(field_type)) {
4900 if (is_volatile && field_type == Primitive::kPrimDouble) {
4901 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4902 locations->SetInAt(1, Location::RequiresFpuRegister());
4903 } else {
4904 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4905 }
4906 } else if (is_volatile && field_type == Primitive::kPrimLong) {
4907 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4908 locations->SetInAt(1, Location::RequiresRegister());
4909
4910 // 64bits value can be atomically written to an address with movsd and an XMM register.
4911 // We need two XMM registers because there's no easier way to (bit) copy a register pair
4912 // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
4913 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
4914 // isolated cases when we need this it isn't worth adding the extra complexity.
4915 locations->AddTemp(Location::RequiresFpuRegister());
4916 locations->AddTemp(Location::RequiresFpuRegister());
4917 } else {
4918 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4919
4920 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4921 // Temporary registers for the write barrier.
4922 locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too.
4923 // Ensure the card is in a byte register.
4924 locations->AddTemp(Location::RegisterLocation(ECX));
4925 }
4926 }
4927 }
4928
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4929 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
4930 const FieldInfo& field_info,
4931 bool value_can_be_null) {
4932 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4933
4934 LocationSummary* locations = instruction->GetLocations();
4935 Register base = locations->InAt(0).AsRegister<Register>();
4936 Location value = locations->InAt(1);
4937 bool is_volatile = field_info.IsVolatile();
4938 Primitive::Type field_type = field_info.GetFieldType();
4939 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4940 bool needs_write_barrier =
4941 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4942
4943 if (is_volatile) {
4944 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4945 }
4946
4947 bool maybe_record_implicit_null_check_done = false;
4948
4949 switch (field_type) {
4950 case Primitive::kPrimBoolean:
4951 case Primitive::kPrimByte: {
4952 __ movb(Address(base, offset), value.AsRegister<ByteRegister>());
4953 break;
4954 }
4955
4956 case Primitive::kPrimShort:
4957 case Primitive::kPrimChar: {
4958 if (value.IsConstant()) {
4959 int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4960 __ movw(Address(base, offset), Immediate(v));
4961 } else {
4962 __ movw(Address(base, offset), value.AsRegister<Register>());
4963 }
4964 break;
4965 }
4966
4967 case Primitive::kPrimInt:
4968 case Primitive::kPrimNot: {
4969 if (kPoisonHeapReferences && needs_write_barrier) {
4970 // Note that in the case where `value` is a null reference,
4971 // we do not enter this block, as the reference does not
4972 // need poisoning.
4973 DCHECK_EQ(field_type, Primitive::kPrimNot);
4974 Register temp = locations->GetTemp(0).AsRegister<Register>();
4975 __ movl(temp, value.AsRegister<Register>());
4976 __ PoisonHeapReference(temp);
4977 __ movl(Address(base, offset), temp);
4978 } else if (value.IsConstant()) {
4979 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4980 __ movl(Address(base, offset), Immediate(v));
4981 } else {
4982 DCHECK(value.IsRegister()) << value;
4983 __ movl(Address(base, offset), value.AsRegister<Register>());
4984 }
4985 break;
4986 }
4987
4988 case Primitive::kPrimLong: {
4989 if (is_volatile) {
4990 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4991 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
4992 __ movd(temp1, value.AsRegisterPairLow<Register>());
4993 __ movd(temp2, value.AsRegisterPairHigh<Register>());
4994 __ punpckldq(temp1, temp2);
4995 __ movsd(Address(base, offset), temp1);
4996 codegen_->MaybeRecordImplicitNullCheck(instruction);
4997 } else if (value.IsConstant()) {
4998 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
4999 __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5000 codegen_->MaybeRecordImplicitNullCheck(instruction);
5001 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5002 } else {
5003 __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
5004 codegen_->MaybeRecordImplicitNullCheck(instruction);
5005 __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
5006 }
5007 maybe_record_implicit_null_check_done = true;
5008 break;
5009 }
5010
5011 case Primitive::kPrimFloat: {
5012 if (value.IsConstant()) {
5013 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5014 __ movl(Address(base, offset), Immediate(v));
5015 } else {
5016 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5017 }
5018 break;
5019 }
5020
5021 case Primitive::kPrimDouble: {
5022 if (value.IsConstant()) {
5023 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5024 __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5025 codegen_->MaybeRecordImplicitNullCheck(instruction);
5026 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5027 maybe_record_implicit_null_check_done = true;
5028 } else {
5029 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5030 }
5031 break;
5032 }
5033
5034 case Primitive::kPrimVoid:
5035 LOG(FATAL) << "Unreachable type " << field_type;
5036 UNREACHABLE();
5037 }
5038
5039 if (!maybe_record_implicit_null_check_done) {
5040 codegen_->MaybeRecordImplicitNullCheck(instruction);
5041 }
5042
5043 if (needs_write_barrier) {
5044 Register temp = locations->GetTemp(0).AsRegister<Register>();
5045 Register card = locations->GetTemp(1).AsRegister<Register>();
5046 codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
5047 }
5048
5049 if (is_volatile) {
5050 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5051 }
5052 }
5053
VisitStaticFieldGet(HStaticFieldGet * instruction)5054 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5055 HandleFieldGet(instruction, instruction->GetFieldInfo());
5056 }
5057
VisitStaticFieldGet(HStaticFieldGet * instruction)5058 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5059 HandleFieldGet(instruction, instruction->GetFieldInfo());
5060 }
5061
VisitStaticFieldSet(HStaticFieldSet * instruction)5062 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5063 HandleFieldSet(instruction, instruction->GetFieldInfo());
5064 }
5065
VisitStaticFieldSet(HStaticFieldSet * instruction)5066 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5067 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5068 }
5069
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5070 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5071 HandleFieldSet(instruction, instruction->GetFieldInfo());
5072 }
5073
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5074 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5075 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5076 }
5077
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5078 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5079 HandleFieldGet(instruction, instruction->GetFieldInfo());
5080 }
5081
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5082 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5083 HandleFieldGet(instruction, instruction->GetFieldInfo());
5084 }
5085
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5086 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
5087 HUnresolvedInstanceFieldGet* instruction) {
5088 FieldAccessCallingConventionX86 calling_convention;
5089 codegen_->CreateUnresolvedFieldLocationSummary(
5090 instruction, instruction->GetFieldType(), calling_convention);
5091 }
5092
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5093 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
5094 HUnresolvedInstanceFieldGet* instruction) {
5095 FieldAccessCallingConventionX86 calling_convention;
5096 codegen_->GenerateUnresolvedFieldAccess(instruction,
5097 instruction->GetFieldType(),
5098 instruction->GetFieldIndex(),
5099 instruction->GetDexPc(),
5100 calling_convention);
5101 }
5102
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5103 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
5104 HUnresolvedInstanceFieldSet* instruction) {
5105 FieldAccessCallingConventionX86 calling_convention;
5106 codegen_->CreateUnresolvedFieldLocationSummary(
5107 instruction, instruction->GetFieldType(), calling_convention);
5108 }
5109
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5110 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
5111 HUnresolvedInstanceFieldSet* instruction) {
5112 FieldAccessCallingConventionX86 calling_convention;
5113 codegen_->GenerateUnresolvedFieldAccess(instruction,
5114 instruction->GetFieldType(),
5115 instruction->GetFieldIndex(),
5116 instruction->GetDexPc(),
5117 calling_convention);
5118 }
5119
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5120 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
5121 HUnresolvedStaticFieldGet* instruction) {
5122 FieldAccessCallingConventionX86 calling_convention;
5123 codegen_->CreateUnresolvedFieldLocationSummary(
5124 instruction, instruction->GetFieldType(), calling_convention);
5125 }
5126
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5127 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
5128 HUnresolvedStaticFieldGet* instruction) {
5129 FieldAccessCallingConventionX86 calling_convention;
5130 codegen_->GenerateUnresolvedFieldAccess(instruction,
5131 instruction->GetFieldType(),
5132 instruction->GetFieldIndex(),
5133 instruction->GetDexPc(),
5134 calling_convention);
5135 }
5136
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5137 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
5138 HUnresolvedStaticFieldSet* instruction) {
5139 FieldAccessCallingConventionX86 calling_convention;
5140 codegen_->CreateUnresolvedFieldLocationSummary(
5141 instruction, instruction->GetFieldType(), calling_convention);
5142 }
5143
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5144 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
5145 HUnresolvedStaticFieldSet* instruction) {
5146 FieldAccessCallingConventionX86 calling_convention;
5147 codegen_->GenerateUnresolvedFieldAccess(instruction,
5148 instruction->GetFieldType(),
5149 instruction->GetFieldIndex(),
5150 instruction->GetDexPc(),
5151 calling_convention);
5152 }
5153
VisitNullCheck(HNullCheck * instruction)5154 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
5155 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5156 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5157 ? Location::RequiresRegister()
5158 : Location::Any();
5159 locations->SetInAt(0, loc);
5160 }
5161
GenerateImplicitNullCheck(HNullCheck * instruction)5162 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
5163 if (CanMoveNullCheckToUser(instruction)) {
5164 return;
5165 }
5166 LocationSummary* locations = instruction->GetLocations();
5167 Location obj = locations->InAt(0);
5168
5169 __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
5170 RecordPcInfo(instruction, instruction->GetDexPc());
5171 }
5172
GenerateExplicitNullCheck(HNullCheck * instruction)5173 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
5174 SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86(instruction);
5175 AddSlowPath(slow_path);
5176
5177 LocationSummary* locations = instruction->GetLocations();
5178 Location obj = locations->InAt(0);
5179
5180 if (obj.IsRegister()) {
5181 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
5182 } else if (obj.IsStackSlot()) {
5183 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
5184 } else {
5185 DCHECK(obj.IsConstant()) << obj;
5186 DCHECK(obj.GetConstant()->IsNullConstant());
5187 __ jmp(slow_path->GetEntryLabel());
5188 return;
5189 }
5190 __ j(kEqual, slow_path->GetEntryLabel());
5191 }
5192
VisitNullCheck(HNullCheck * instruction)5193 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
5194 codegen_->GenerateNullCheck(instruction);
5195 }
5196
VisitArrayGet(HArrayGet * instruction)5197 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
5198 bool object_array_get_with_read_barrier =
5199 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
5200 LocationSummary* locations =
5201 new (GetGraph()->GetArena()) LocationSummary(instruction,
5202 object_array_get_with_read_barrier ?
5203 LocationSummary::kCallOnSlowPath :
5204 LocationSummary::kNoCall);
5205 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5206 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5207 }
5208 locations->SetInAt(0, Location::RequiresRegister());
5209 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5210 if (Primitive::IsFloatingPointType(instruction->GetType())) {
5211 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5212 } else {
5213 // The output overlaps in case of long: we don't want the low move
5214 // to overwrite the array's location. Likewise, in the case of an
5215 // object array get with read barriers enabled, we do not want the
5216 // move to overwrite the array's location, as we need it to emit
5217 // the read barrier.
5218 locations->SetOut(
5219 Location::RequiresRegister(),
5220 (instruction->GetType() == Primitive::kPrimLong || object_array_get_with_read_barrier) ?
5221 Location::kOutputOverlap :
5222 Location::kNoOutputOverlap);
5223 }
5224 }
5225
VisitArrayGet(HArrayGet * instruction)5226 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
5227 LocationSummary* locations = instruction->GetLocations();
5228 Location obj_loc = locations->InAt(0);
5229 Register obj = obj_loc.AsRegister<Register>();
5230 Location index = locations->InAt(1);
5231 Location out_loc = locations->Out();
5232 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5233
5234 Primitive::Type type = instruction->GetType();
5235 switch (type) {
5236 case Primitive::kPrimBoolean: {
5237 Register out = out_loc.AsRegister<Register>();
5238 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5239 break;
5240 }
5241
5242 case Primitive::kPrimByte: {
5243 Register out = out_loc.AsRegister<Register>();
5244 __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5245 break;
5246 }
5247
5248 case Primitive::kPrimShort: {
5249 Register out = out_loc.AsRegister<Register>();
5250 __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5251 break;
5252 }
5253
5254 case Primitive::kPrimChar: {
5255 Register out = out_loc.AsRegister<Register>();
5256 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5257 // Branch cases into compressed and uncompressed for each index's type.
5258 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5259 NearLabel done, not_compressed;
5260 __ testb(Address(obj, count_offset), Immediate(1));
5261 codegen_->MaybeRecordImplicitNullCheck(instruction);
5262 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5263 "Expecting 0=compressed, 1=uncompressed");
5264 __ j(kNotZero, ¬_compressed);
5265 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5266 __ jmp(&done);
5267 __ Bind(¬_compressed);
5268 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5269 __ Bind(&done);
5270 } else {
5271 // Common case for charAt of array of char or when string compression's
5272 // feature is turned off.
5273 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5274 }
5275 break;
5276 }
5277
5278 case Primitive::kPrimInt: {
5279 Register out = out_loc.AsRegister<Register>();
5280 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5281 break;
5282 }
5283
5284 case Primitive::kPrimNot: {
5285 static_assert(
5286 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5287 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5288 // /* HeapReference<Object> */ out =
5289 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5290 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5291 // Note that a potential implicit null check is handled in this
5292 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
5293 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5294 instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
5295 } else {
5296 Register out = out_loc.AsRegister<Register>();
5297 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5298 codegen_->MaybeRecordImplicitNullCheck(instruction);
5299 // If read barriers are enabled, emit read barriers other than
5300 // Baker's using a slow path (and also unpoison the loaded
5301 // reference, if heap poisoning is enabled).
5302 if (index.IsConstant()) {
5303 uint32_t offset =
5304 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5305 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5306 } else {
5307 codegen_->MaybeGenerateReadBarrierSlow(
5308 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5309 }
5310 }
5311 break;
5312 }
5313
5314 case Primitive::kPrimLong: {
5315 DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
5316 __ movl(out_loc.AsRegisterPairLow<Register>(),
5317 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5318 codegen_->MaybeRecordImplicitNullCheck(instruction);
5319 __ movl(out_loc.AsRegisterPairHigh<Register>(),
5320 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize));
5321 break;
5322 }
5323
5324 case Primitive::kPrimFloat: {
5325 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5326 __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5327 break;
5328 }
5329
5330 case Primitive::kPrimDouble: {
5331 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5332 __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5333 break;
5334 }
5335
5336 case Primitive::kPrimVoid:
5337 LOG(FATAL) << "Unreachable type " << type;
5338 UNREACHABLE();
5339 }
5340
5341 if (type == Primitive::kPrimNot || type == Primitive::kPrimLong) {
5342 // Potential implicit null checks, in the case of reference or
5343 // long arrays, are handled in the previous switch statement.
5344 } else {
5345 codegen_->MaybeRecordImplicitNullCheck(instruction);
5346 }
5347 }
5348
VisitArraySet(HArraySet * instruction)5349 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
5350 Primitive::Type value_type = instruction->GetComponentType();
5351
5352 bool needs_write_barrier =
5353 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5354 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
5355
5356 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
5357 instruction,
5358 may_need_runtime_call_for_type_check ?
5359 LocationSummary::kCallOnSlowPath :
5360 LocationSummary::kNoCall);
5361
5362 bool is_byte_type = (value_type == Primitive::kPrimBoolean)
5363 || (value_type == Primitive::kPrimByte);
5364 // We need the inputs to be different than the output in case of long operation.
5365 // In case of a byte operation, the register allocator does not support multiple
5366 // inputs that die at entry with one in a specific register.
5367 locations->SetInAt(0, Location::RequiresRegister());
5368 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5369 if (is_byte_type) {
5370 // Ensure the value is in a byte register.
5371 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
5372 } else if (Primitive::IsFloatingPointType(value_type)) {
5373 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5374 } else {
5375 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5376 }
5377 if (needs_write_barrier) {
5378 // Temporary registers for the write barrier.
5379 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
5380 // Ensure the card is in a byte register.
5381 locations->AddTemp(Location::RegisterLocation(ECX));
5382 }
5383 }
5384
VisitArraySet(HArraySet * instruction)5385 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
5386 LocationSummary* locations = instruction->GetLocations();
5387 Location array_loc = locations->InAt(0);
5388 Register array = array_loc.AsRegister<Register>();
5389 Location index = locations->InAt(1);
5390 Location value = locations->InAt(2);
5391 Primitive::Type value_type = instruction->GetComponentType();
5392 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5393 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5394 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5395 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
5396 bool needs_write_barrier =
5397 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5398
5399 switch (value_type) {
5400 case Primitive::kPrimBoolean:
5401 case Primitive::kPrimByte: {
5402 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5403 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
5404 if (value.IsRegister()) {
5405 __ movb(address, value.AsRegister<ByteRegister>());
5406 } else {
5407 __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
5408 }
5409 codegen_->MaybeRecordImplicitNullCheck(instruction);
5410 break;
5411 }
5412
5413 case Primitive::kPrimShort:
5414 case Primitive::kPrimChar: {
5415 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5416 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
5417 if (value.IsRegister()) {
5418 __ movw(address, value.AsRegister<Register>());
5419 } else {
5420 __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
5421 }
5422 codegen_->MaybeRecordImplicitNullCheck(instruction);
5423 break;
5424 }
5425
5426 case Primitive::kPrimNot: {
5427 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5428 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5429
5430 if (!value.IsRegister()) {
5431 // Just setting null.
5432 DCHECK(instruction->InputAt(2)->IsNullConstant());
5433 DCHECK(value.IsConstant()) << value;
5434 __ movl(address, Immediate(0));
5435 codegen_->MaybeRecordImplicitNullCheck(instruction);
5436 DCHECK(!needs_write_barrier);
5437 DCHECK(!may_need_runtime_call_for_type_check);
5438 break;
5439 }
5440
5441 DCHECK(needs_write_barrier);
5442 Register register_value = value.AsRegister<Register>();
5443 // We cannot use a NearLabel for `done`, as its range may be too
5444 // short when Baker read barriers are enabled.
5445 Label done;
5446 NearLabel not_null, do_put;
5447 SlowPathCode* slow_path = nullptr;
5448 Location temp_loc = locations->GetTemp(0);
5449 Register temp = temp_loc.AsRegister<Register>();
5450 if (may_need_runtime_call_for_type_check) {
5451 slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction);
5452 codegen_->AddSlowPath(slow_path);
5453 if (instruction->GetValueCanBeNull()) {
5454 __ testl(register_value, register_value);
5455 __ j(kNotEqual, ¬_null);
5456 __ movl(address, Immediate(0));
5457 codegen_->MaybeRecordImplicitNullCheck(instruction);
5458 __ jmp(&done);
5459 __ Bind(¬_null);
5460 }
5461
5462 // Note that when Baker read barriers are enabled, the type
5463 // checks are performed without read barriers. This is fine,
5464 // even in the case where a class object is in the from-space
5465 // after the flip, as a comparison involving such a type would
5466 // not produce a false positive; it may of course produce a
5467 // false negative, in which case we would take the ArraySet
5468 // slow path.
5469
5470 // /* HeapReference<Class> */ temp = array->klass_
5471 __ movl(temp, Address(array, class_offset));
5472 codegen_->MaybeRecordImplicitNullCheck(instruction);
5473 __ MaybeUnpoisonHeapReference(temp);
5474
5475 // /* HeapReference<Class> */ temp = temp->component_type_
5476 __ movl(temp, Address(temp, component_offset));
5477 // If heap poisoning is enabled, no need to unpoison `temp`
5478 // nor the object reference in `register_value->klass`, as
5479 // we are comparing two poisoned references.
5480 __ cmpl(temp, Address(register_value, class_offset));
5481
5482 if (instruction->StaticTypeOfArrayIsObjectArray()) {
5483 __ j(kEqual, &do_put);
5484 // If heap poisoning is enabled, the `temp` reference has
5485 // not been unpoisoned yet; unpoison it now.
5486 __ MaybeUnpoisonHeapReference(temp);
5487
5488 // If heap poisoning is enabled, no need to unpoison the
5489 // heap reference loaded below, as it is only used for a
5490 // comparison with null.
5491 __ cmpl(Address(temp, super_offset), Immediate(0));
5492 __ j(kNotEqual, slow_path->GetEntryLabel());
5493 __ Bind(&do_put);
5494 } else {
5495 __ j(kNotEqual, slow_path->GetEntryLabel());
5496 }
5497 }
5498
5499 if (kPoisonHeapReferences) {
5500 __ movl(temp, register_value);
5501 __ PoisonHeapReference(temp);
5502 __ movl(address, temp);
5503 } else {
5504 __ movl(address, register_value);
5505 }
5506 if (!may_need_runtime_call_for_type_check) {
5507 codegen_->MaybeRecordImplicitNullCheck(instruction);
5508 }
5509
5510 Register card = locations->GetTemp(1).AsRegister<Register>();
5511 codegen_->MarkGCCard(
5512 temp, card, array, value.AsRegister<Register>(), instruction->GetValueCanBeNull());
5513 __ Bind(&done);
5514
5515 if (slow_path != nullptr) {
5516 __ Bind(slow_path->GetExitLabel());
5517 }
5518
5519 break;
5520 }
5521
5522 case Primitive::kPrimInt: {
5523 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5524 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5525 if (value.IsRegister()) {
5526 __ movl(address, value.AsRegister<Register>());
5527 } else {
5528 DCHECK(value.IsConstant()) << value;
5529 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5530 __ movl(address, Immediate(v));
5531 }
5532 codegen_->MaybeRecordImplicitNullCheck(instruction);
5533 break;
5534 }
5535
5536 case Primitive::kPrimLong: {
5537 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5538 if (value.IsRegisterPair()) {
5539 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
5540 value.AsRegisterPairLow<Register>());
5541 codegen_->MaybeRecordImplicitNullCheck(instruction);
5542 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
5543 value.AsRegisterPairHigh<Register>());
5544 } else {
5545 DCHECK(value.IsConstant());
5546 int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
5547 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
5548 Immediate(Low32Bits(val)));
5549 codegen_->MaybeRecordImplicitNullCheck(instruction);
5550 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
5551 Immediate(High32Bits(val)));
5552 }
5553 break;
5554 }
5555
5556 case Primitive::kPrimFloat: {
5557 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5558 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5559 if (value.IsFpuRegister()) {
5560 __ movss(address, value.AsFpuRegister<XmmRegister>());
5561 } else {
5562 DCHECK(value.IsConstant());
5563 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5564 __ movl(address, Immediate(v));
5565 }
5566 codegen_->MaybeRecordImplicitNullCheck(instruction);
5567 break;
5568 }
5569
5570 case Primitive::kPrimDouble: {
5571 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5572 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
5573 if (value.IsFpuRegister()) {
5574 __ movsd(address, value.AsFpuRegister<XmmRegister>());
5575 } else {
5576 DCHECK(value.IsConstant());
5577 Address address_hi =
5578 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
5579 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5580 __ movl(address, Immediate(Low32Bits(v)));
5581 codegen_->MaybeRecordImplicitNullCheck(instruction);
5582 __ movl(address_hi, Immediate(High32Bits(v)));
5583 }
5584 break;
5585 }
5586
5587 case Primitive::kPrimVoid:
5588 LOG(FATAL) << "Unreachable type " << instruction->GetType();
5589 UNREACHABLE();
5590 }
5591 }
5592
VisitArrayLength(HArrayLength * instruction)5593 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
5594 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5595 locations->SetInAt(0, Location::RequiresRegister());
5596 if (!instruction->IsEmittedAtUseSite()) {
5597 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5598 }
5599 }
5600
VisitArrayLength(HArrayLength * instruction)5601 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
5602 if (instruction->IsEmittedAtUseSite()) {
5603 return;
5604 }
5605
5606 LocationSummary* locations = instruction->GetLocations();
5607 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5608 Register obj = locations->InAt(0).AsRegister<Register>();
5609 Register out = locations->Out().AsRegister<Register>();
5610 __ movl(out, Address(obj, offset));
5611 codegen_->MaybeRecordImplicitNullCheck(instruction);
5612 // Mask out most significant bit in case the array is String's array of char.
5613 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5614 __ shrl(out, Immediate(1));
5615 }
5616 }
5617
VisitBoundsCheck(HBoundsCheck * instruction)5618 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
5619 RegisterSet caller_saves = RegisterSet::Empty();
5620 InvokeRuntimeCallingConvention calling_convention;
5621 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5622 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5623 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5624 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5625 HInstruction* length = instruction->InputAt(1);
5626 if (!length->IsEmittedAtUseSite()) {
5627 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5628 }
5629 // Need register to see array's length.
5630 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5631 locations->AddTemp(Location::RequiresRegister());
5632 }
5633 }
5634
VisitBoundsCheck(HBoundsCheck * instruction)5635 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
5636 const bool is_string_compressed_char_at =
5637 mirror::kUseStringCompression && instruction->IsStringCharAt();
5638 LocationSummary* locations = instruction->GetLocations();
5639 Location index_loc = locations->InAt(0);
5640 Location length_loc = locations->InAt(1);
5641 SlowPathCode* slow_path =
5642 new (GetGraph()->GetArena()) BoundsCheckSlowPathX86(instruction);
5643
5644 if (length_loc.IsConstant()) {
5645 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5646 if (index_loc.IsConstant()) {
5647 // BCE will remove the bounds check if we are guarenteed to pass.
5648 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5649 if (index < 0 || index >= length) {
5650 codegen_->AddSlowPath(slow_path);
5651 __ jmp(slow_path->GetEntryLabel());
5652 } else {
5653 // Some optimization after BCE may have generated this, and we should not
5654 // generate a bounds check if it is a valid range.
5655 }
5656 return;
5657 }
5658
5659 // We have to reverse the jump condition because the length is the constant.
5660 Register index_reg = index_loc.AsRegister<Register>();
5661 __ cmpl(index_reg, Immediate(length));
5662 codegen_->AddSlowPath(slow_path);
5663 __ j(kAboveEqual, slow_path->GetEntryLabel());
5664 } else {
5665 HInstruction* array_length = instruction->InputAt(1);
5666 if (array_length->IsEmittedAtUseSite()) {
5667 // Address the length field in the array.
5668 DCHECK(array_length->IsArrayLength());
5669 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5670 Location array_loc = array_length->GetLocations()->InAt(0);
5671 Address array_len(array_loc.AsRegister<Register>(), len_offset);
5672 if (is_string_compressed_char_at) {
5673 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5674 // the string compression flag) with the in-memory length and avoid the temporary.
5675 Register length_reg = locations->GetTemp(0).AsRegister<Register>();
5676 __ movl(length_reg, array_len);
5677 codegen_->MaybeRecordImplicitNullCheck(array_length);
5678 __ shrl(length_reg, Immediate(1));
5679 codegen_->GenerateIntCompare(length_reg, index_loc);
5680 } else {
5681 // Checking bounds for general case:
5682 // Array of char or string's array with feature compression off.
5683 if (index_loc.IsConstant()) {
5684 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5685 __ cmpl(array_len, Immediate(value));
5686 } else {
5687 __ cmpl(array_len, index_loc.AsRegister<Register>());
5688 }
5689 codegen_->MaybeRecordImplicitNullCheck(array_length);
5690 }
5691 } else {
5692 codegen_->GenerateIntCompare(length_loc, index_loc);
5693 }
5694 codegen_->AddSlowPath(slow_path);
5695 __ j(kBelowEqual, slow_path->GetEntryLabel());
5696 }
5697 }
5698
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5699 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5700 LOG(FATAL) << "Unreachable";
5701 }
5702
VisitParallelMove(HParallelMove * instruction)5703 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
5704 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5705 }
5706
VisitSuspendCheck(HSuspendCheck * instruction)5707 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
5708 LocationSummary* locations =
5709 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5710 // In suspend check slow path, usually there are no caller-save registers at all.
5711 // If SIMD instructions are present, however, we force spilling all live SIMD
5712 // registers in full width (since the runtime only saves/restores lower part).
5713 locations->SetCustomSlowPathCallerSaves(
5714 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5715 }
5716
VisitSuspendCheck(HSuspendCheck * instruction)5717 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
5718 HBasicBlock* block = instruction->GetBlock();
5719 if (block->GetLoopInformation() != nullptr) {
5720 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5721 // The back edge will generate the suspend check.
5722 return;
5723 }
5724 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5725 // The goto will generate the suspend check.
5726 return;
5727 }
5728 GenerateSuspendCheck(instruction, nullptr);
5729 }
5730
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5731 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
5732 HBasicBlock* successor) {
5733 SuspendCheckSlowPathX86* slow_path =
5734 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
5735 if (slow_path == nullptr) {
5736 slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
5737 instruction->SetSlowPath(slow_path);
5738 codegen_->AddSlowPath(slow_path);
5739 if (successor != nullptr) {
5740 DCHECK(successor->IsLoopHeader());
5741 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
5742 }
5743 } else {
5744 DCHECK_EQ(slow_path->GetSuccessor(), successor);
5745 }
5746
5747 __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
5748 Immediate(0));
5749 if (successor == nullptr) {
5750 __ j(kNotEqual, slow_path->GetEntryLabel());
5751 __ Bind(slow_path->GetReturnLabel());
5752 } else {
5753 __ j(kEqual, codegen_->GetLabelOf(successor));
5754 __ jmp(slow_path->GetEntryLabel());
5755 }
5756 }
5757
GetAssembler() const5758 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
5759 return codegen_->GetAssembler();
5760 }
5761
MoveMemoryToMemory32(int dst,int src)5762 void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) {
5763 ScratchRegisterScope ensure_scratch(
5764 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5765 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
5766 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
5767 __ movl(temp_reg, Address(ESP, src + stack_offset));
5768 __ movl(Address(ESP, dst + stack_offset), temp_reg);
5769 }
5770
MoveMemoryToMemory64(int dst,int src)5771 void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) {
5772 ScratchRegisterScope ensure_scratch(
5773 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5774 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
5775 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
5776 __ movl(temp_reg, Address(ESP, src + stack_offset));
5777 __ movl(Address(ESP, dst + stack_offset), temp_reg);
5778 __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize));
5779 __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg);
5780 }
5781
EmitMove(size_t index)5782 void ParallelMoveResolverX86::EmitMove(size_t index) {
5783 MoveOperands* move = moves_[index];
5784 Location source = move->GetSource();
5785 Location destination = move->GetDestination();
5786
5787 if (source.IsRegister()) {
5788 if (destination.IsRegister()) {
5789 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
5790 } else if (destination.IsFpuRegister()) {
5791 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
5792 } else {
5793 DCHECK(destination.IsStackSlot());
5794 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
5795 }
5796 } else if (source.IsRegisterPair()) {
5797 size_t elem_size = Primitive::ComponentSize(Primitive::kPrimInt);
5798 // Create stack space for 2 elements.
5799 __ subl(ESP, Immediate(2 * elem_size));
5800 __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
5801 __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
5802 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
5803 // And remove the temporary stack space we allocated.
5804 __ addl(ESP, Immediate(2 * elem_size));
5805 } else if (source.IsFpuRegister()) {
5806 if (destination.IsRegister()) {
5807 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
5808 } else if (destination.IsFpuRegister()) {
5809 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5810 } else if (destination.IsRegisterPair()) {
5811 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
5812 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
5813 __ psrlq(src_reg, Immediate(32));
5814 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
5815 } else if (destination.IsStackSlot()) {
5816 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
5817 } else if (destination.IsDoubleStackSlot()) {
5818 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
5819 } else {
5820 DCHECK(destination.IsSIMDStackSlot());
5821 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
5822 }
5823 } else if (source.IsStackSlot()) {
5824 if (destination.IsRegister()) {
5825 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
5826 } else if (destination.IsFpuRegister()) {
5827 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
5828 } else {
5829 DCHECK(destination.IsStackSlot());
5830 MoveMemoryToMemory32(destination.GetStackIndex(), source.GetStackIndex());
5831 }
5832 } else if (source.IsDoubleStackSlot()) {
5833 if (destination.IsRegisterPair()) {
5834 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
5835 __ movl(destination.AsRegisterPairHigh<Register>(),
5836 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
5837 } else if (destination.IsFpuRegister()) {
5838 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
5839 } else {
5840 DCHECK(destination.IsDoubleStackSlot()) << destination;
5841 MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex());
5842 }
5843 } else if (source.IsSIMDStackSlot()) {
5844 DCHECK(destination.IsFpuRegister());
5845 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
5846 } else if (source.IsConstant()) {
5847 HConstant* constant = source.GetConstant();
5848 if (constant->IsIntConstant() || constant->IsNullConstant()) {
5849 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5850 if (destination.IsRegister()) {
5851 if (value == 0) {
5852 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
5853 } else {
5854 __ movl(destination.AsRegister<Register>(), Immediate(value));
5855 }
5856 } else {
5857 DCHECK(destination.IsStackSlot()) << destination;
5858 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
5859 }
5860 } else if (constant->IsFloatConstant()) {
5861 float fp_value = constant->AsFloatConstant()->GetValue();
5862 int32_t value = bit_cast<int32_t, float>(fp_value);
5863 Immediate imm(value);
5864 if (destination.IsFpuRegister()) {
5865 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5866 if (value == 0) {
5867 // Easy handling of 0.0.
5868 __ xorps(dest, dest);
5869 } else {
5870 ScratchRegisterScope ensure_scratch(
5871 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5872 Register temp = static_cast<Register>(ensure_scratch.GetRegister());
5873 __ movl(temp, Immediate(value));
5874 __ movd(dest, temp);
5875 }
5876 } else {
5877 DCHECK(destination.IsStackSlot()) << destination;
5878 __ movl(Address(ESP, destination.GetStackIndex()), imm);
5879 }
5880 } else if (constant->IsLongConstant()) {
5881 int64_t value = constant->AsLongConstant()->GetValue();
5882 int32_t low_value = Low32Bits(value);
5883 int32_t high_value = High32Bits(value);
5884 Immediate low(low_value);
5885 Immediate high(high_value);
5886 if (destination.IsDoubleStackSlot()) {
5887 __ movl(Address(ESP, destination.GetStackIndex()), low);
5888 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
5889 } else {
5890 __ movl(destination.AsRegisterPairLow<Register>(), low);
5891 __ movl(destination.AsRegisterPairHigh<Register>(), high);
5892 }
5893 } else {
5894 DCHECK(constant->IsDoubleConstant());
5895 double dbl_value = constant->AsDoubleConstant()->GetValue();
5896 int64_t value = bit_cast<int64_t, double>(dbl_value);
5897 int32_t low_value = Low32Bits(value);
5898 int32_t high_value = High32Bits(value);
5899 Immediate low(low_value);
5900 Immediate high(high_value);
5901 if (destination.IsFpuRegister()) {
5902 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5903 if (value == 0) {
5904 // Easy handling of 0.0.
5905 __ xorpd(dest, dest);
5906 } else {
5907 __ pushl(high);
5908 __ pushl(low);
5909 __ movsd(dest, Address(ESP, 0));
5910 __ addl(ESP, Immediate(8));
5911 }
5912 } else {
5913 DCHECK(destination.IsDoubleStackSlot()) << destination;
5914 __ movl(Address(ESP, destination.GetStackIndex()), low);
5915 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
5916 }
5917 }
5918 } else {
5919 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
5920 }
5921 }
5922
Exchange(Register reg,int mem)5923 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
5924 Register suggested_scratch = reg == EAX ? EBX : EAX;
5925 ScratchRegisterScope ensure_scratch(
5926 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
5927
5928 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
5929 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
5930 __ movl(Address(ESP, mem + stack_offset), reg);
5931 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
5932 }
5933
Exchange32(XmmRegister reg,int mem)5934 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
5935 ScratchRegisterScope ensure_scratch(
5936 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5937
5938 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
5939 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
5940 __ movl(temp_reg, Address(ESP, mem + stack_offset));
5941 __ movss(Address(ESP, mem + stack_offset), reg);
5942 __ movd(reg, temp_reg);
5943 }
5944
Exchange(int mem1,int mem2)5945 void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
5946 ScratchRegisterScope ensure_scratch1(
5947 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5948
5949 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
5950 ScratchRegisterScope ensure_scratch2(
5951 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
5952
5953 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
5954 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
5955 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
5956 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
5957 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
5958 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
5959 }
5960
EmitSwap(size_t index)5961 void ParallelMoveResolverX86::EmitSwap(size_t index) {
5962 MoveOperands* move = moves_[index];
5963 Location source = move->GetSource();
5964 Location destination = move->GetDestination();
5965
5966 if (source.IsRegister() && destination.IsRegister()) {
5967 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
5968 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
5969 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
5970 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
5971 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
5972 } else if (source.IsRegister() && destination.IsStackSlot()) {
5973 Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
5974 } else if (source.IsStackSlot() && destination.IsRegister()) {
5975 Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
5976 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5977 Exchange(destination.GetStackIndex(), source.GetStackIndex());
5978 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5979 // Use XOR Swap algorithm to avoid a temporary.
5980 DCHECK_NE(source.reg(), destination.reg());
5981 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5982 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5983 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5984 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5985 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5986 } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
5987 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5988 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5989 // Take advantage of the 16 bytes in the XMM register.
5990 XmmRegister reg = source.AsFpuRegister<XmmRegister>();
5991 Address stack(ESP, destination.GetStackIndex());
5992 // Load the double into the high doubleword.
5993 __ movhpd(reg, stack);
5994
5995 // Store the low double into the destination.
5996 __ movsd(stack, reg);
5997
5998 // Move the high double to the low double.
5999 __ psrldq(reg, Immediate(8));
6000 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
6001 // Take advantage of the 16 bytes in the XMM register.
6002 XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
6003 Address stack(ESP, source.GetStackIndex());
6004 // Load the double into the high doubleword.
6005 __ movhpd(reg, stack);
6006
6007 // Store the low double into the destination.
6008 __ movsd(stack, reg);
6009
6010 // Move the high double to the low double.
6011 __ psrldq(reg, Immediate(8));
6012 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
6013 Exchange(destination.GetStackIndex(), source.GetStackIndex());
6014 Exchange(destination.GetHighStackIndex(kX86WordSize), source.GetHighStackIndex(kX86WordSize));
6015 } else {
6016 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
6017 }
6018 }
6019
SpillScratch(int reg)6020 void ParallelMoveResolverX86::SpillScratch(int reg) {
6021 __ pushl(static_cast<Register>(reg));
6022 }
6023
RestoreScratch(int reg)6024 void ParallelMoveResolverX86::RestoreScratch(int reg) {
6025 __ popl(static_cast<Register>(reg));
6026 }
6027
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6028 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
6029 HLoadClass::LoadKind desired_class_load_kind) {
6030 switch (desired_class_load_kind) {
6031 case HLoadClass::LoadKind::kInvalid:
6032 LOG(FATAL) << "UNREACHABLE";
6033 UNREACHABLE();
6034 case HLoadClass::LoadKind::kReferrersClass:
6035 break;
6036 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6037 case HLoadClass::LoadKind::kBssEntry:
6038 DCHECK(!Runtime::Current()->UseJitCompilation());
6039 break;
6040 case HLoadClass::LoadKind::kJitTableAddress:
6041 DCHECK(Runtime::Current()->UseJitCompilation());
6042 break;
6043 case HLoadClass::LoadKind::kBootImageAddress:
6044 case HLoadClass::LoadKind::kRuntimeCall:
6045 break;
6046 }
6047 return desired_class_load_kind;
6048 }
6049
VisitLoadClass(HLoadClass * cls)6050 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
6051 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6052 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6053 InvokeRuntimeCallingConvention calling_convention;
6054 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6055 cls,
6056 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
6057 Location::RegisterLocation(EAX));
6058 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
6059 return;
6060 }
6061 DCHECK(!cls->NeedsAccessCheck());
6062
6063 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6064 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6065 ? LocationSummary::kCallOnSlowPath
6066 : LocationSummary::kNoCall;
6067 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
6068 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6069 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6070 }
6071
6072 if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
6073 load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
6074 load_kind == HLoadClass::LoadKind::kBssEntry) {
6075 locations->SetInAt(0, Location::RequiresRegister());
6076 }
6077 locations->SetOut(Location::RequiresRegister());
6078 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6079 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6080 // Rely on the type resolution and/or initialization to save everything.
6081 RegisterSet caller_saves = RegisterSet::Empty();
6082 InvokeRuntimeCallingConvention calling_convention;
6083 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6084 locations->SetCustomSlowPathCallerSaves(caller_saves);
6085 } else {
6086 // For non-Baker read barrier we have a temp-clobbering call.
6087 }
6088 }
6089 }
6090
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex dex_index,Handle<mirror::Class> handle)6091 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
6092 dex::TypeIndex dex_index,
6093 Handle<mirror::Class> handle) {
6094 jit_class_roots_.Overwrite(TypeReference(&dex_file, dex_index),
6095 reinterpret_cast64<uint64_t>(handle.GetReference()));
6096 // Add a patch entry and return the label.
6097 jit_class_patches_.emplace_back(dex_file, dex_index.index_);
6098 PatchInfo<Label>* info = &jit_class_patches_.back();
6099 return &info->label;
6100 }
6101
6102 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6103 // move.
VisitLoadClass(HLoadClass * cls)6104 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6105 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6106 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6107 codegen_->GenerateLoadClassRuntimeCall(cls);
6108 return;
6109 }
6110 DCHECK(!cls->NeedsAccessCheck());
6111
6112 LocationSummary* locations = cls->GetLocations();
6113 Location out_loc = locations->Out();
6114 Register out = out_loc.AsRegister<Register>();
6115
6116 bool generate_null_check = false;
6117 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6118 ? kWithoutReadBarrier
6119 : kCompilerReadBarrierOption;
6120 switch (load_kind) {
6121 case HLoadClass::LoadKind::kReferrersClass: {
6122 DCHECK(!cls->CanCallRuntime());
6123 DCHECK(!cls->MustGenerateClinitCheck());
6124 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6125 Register current_method = locations->InAt(0).AsRegister<Register>();
6126 GenerateGcRootFieldLoad(
6127 cls,
6128 out_loc,
6129 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6130 /* fixup_label */ nullptr,
6131 read_barrier_option);
6132 break;
6133 }
6134 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
6135 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
6136 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6137 Register method_address = locations->InAt(0).AsRegister<Register>();
6138 __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6139 codegen_->RecordBootTypePatch(cls);
6140 break;
6141 }
6142 case HLoadClass::LoadKind::kBootImageAddress: {
6143 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6144 uint32_t address = dchecked_integral_cast<uint32_t>(
6145 reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
6146 DCHECK_NE(address, 0u);
6147 __ movl(out, Immediate(address));
6148 break;
6149 }
6150 case HLoadClass::LoadKind::kBssEntry: {
6151 Register method_address = locations->InAt(0).AsRegister<Register>();
6152 Address address(method_address, CodeGeneratorX86::kDummy32BitOffset);
6153 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6154 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6155 generate_null_check = true;
6156 break;
6157 }
6158 case HLoadClass::LoadKind::kJitTableAddress: {
6159 Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
6160 Label* fixup_label = codegen_->NewJitRootClassPatch(
6161 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6162 // /* GcRoot<mirror::Class> */ out = *address
6163 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6164 break;
6165 }
6166 case HLoadClass::LoadKind::kRuntimeCall:
6167 case HLoadClass::LoadKind::kInvalid:
6168 LOG(FATAL) << "UNREACHABLE";
6169 UNREACHABLE();
6170 }
6171
6172 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6173 DCHECK(cls->CanCallRuntime());
6174 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
6175 cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
6176 codegen_->AddSlowPath(slow_path);
6177
6178 if (generate_null_check) {
6179 __ testl(out, out);
6180 __ j(kEqual, slow_path->GetEntryLabel());
6181 }
6182
6183 if (cls->MustGenerateClinitCheck()) {
6184 GenerateClassInitializationCheck(slow_path, out);
6185 } else {
6186 __ Bind(slow_path->GetExitLabel());
6187 }
6188 }
6189 }
6190
VisitClinitCheck(HClinitCheck * check)6191 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
6192 LocationSummary* locations =
6193 new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6194 locations->SetInAt(0, Location::RequiresRegister());
6195 if (check->HasUses()) {
6196 locations->SetOut(Location::SameAsFirstInput());
6197 }
6198 }
6199
VisitClinitCheck(HClinitCheck * check)6200 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
6201 // We assume the class to not be null.
6202 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
6203 check->GetLoadClass(), check, check->GetDexPc(), true);
6204 codegen_->AddSlowPath(slow_path);
6205 GenerateClassInitializationCheck(slow_path,
6206 check->GetLocations()->InAt(0).AsRegister<Register>());
6207 }
6208
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)6209 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
6210 SlowPathCode* slow_path, Register class_reg) {
6211 __ cmpl(Address(class_reg, mirror::Class::StatusOffset().Int32Value()),
6212 Immediate(mirror::Class::kStatusInitialized));
6213 __ j(kLess, slow_path->GetEntryLabel());
6214 __ Bind(slow_path->GetExitLabel());
6215 // No need for memory fence, thanks to the X86 memory model.
6216 }
6217
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6218 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
6219 HLoadString::LoadKind desired_string_load_kind) {
6220 switch (desired_string_load_kind) {
6221 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6222 case HLoadString::LoadKind::kBssEntry:
6223 DCHECK(!Runtime::Current()->UseJitCompilation());
6224 break;
6225 case HLoadString::LoadKind::kJitTableAddress:
6226 DCHECK(Runtime::Current()->UseJitCompilation());
6227 break;
6228 case HLoadString::LoadKind::kBootImageAddress:
6229 case HLoadString::LoadKind::kRuntimeCall:
6230 break;
6231 }
6232 return desired_string_load_kind;
6233 }
6234
VisitLoadString(HLoadString * load)6235 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
6236 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6237 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
6238 HLoadString::LoadKind load_kind = load->GetLoadKind();
6239 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
6240 load_kind == HLoadString::LoadKind::kBssEntry) {
6241 locations->SetInAt(0, Location::RequiresRegister());
6242 }
6243 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
6244 locations->SetOut(Location::RegisterLocation(EAX));
6245 } else {
6246 locations->SetOut(Location::RequiresRegister());
6247 if (load_kind == HLoadString::LoadKind::kBssEntry) {
6248 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6249 // Rely on the pResolveString to save everything.
6250 RegisterSet caller_saves = RegisterSet::Empty();
6251 InvokeRuntimeCallingConvention calling_convention;
6252 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6253 locations->SetCustomSlowPathCallerSaves(caller_saves);
6254 } else {
6255 // For non-Baker read barrier we have a temp-clobbering call.
6256 }
6257 }
6258 }
6259 }
6260
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex dex_index,Handle<mirror::String> handle)6261 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
6262 dex::StringIndex dex_index,
6263 Handle<mirror::String> handle) {
6264 jit_string_roots_.Overwrite(
6265 StringReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference()));
6266 // Add a patch entry and return the label.
6267 jit_string_patches_.emplace_back(dex_file, dex_index.index_);
6268 PatchInfo<Label>* info = &jit_string_patches_.back();
6269 return &info->label;
6270 }
6271
6272 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6273 // move.
VisitLoadString(HLoadString * load)6274 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6275 LocationSummary* locations = load->GetLocations();
6276 Location out_loc = locations->Out();
6277 Register out = out_loc.AsRegister<Register>();
6278
6279 switch (load->GetLoadKind()) {
6280 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6281 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
6282 Register method_address = locations->InAt(0).AsRegister<Register>();
6283 __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6284 codegen_->RecordBootStringPatch(load);
6285 return; // No dex cache slow path.
6286 }
6287 case HLoadString::LoadKind::kBootImageAddress: {
6288 uint32_t address = dchecked_integral_cast<uint32_t>(
6289 reinterpret_cast<uintptr_t>(load->GetString().Get()));
6290 DCHECK_NE(address, 0u);
6291 __ movl(out, Immediate(address));
6292 return; // No dex cache slow path.
6293 }
6294 case HLoadString::LoadKind::kBssEntry: {
6295 Register method_address = locations->InAt(0).AsRegister<Register>();
6296 Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset);
6297 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6298 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
6299 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6300 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
6301 codegen_->AddSlowPath(slow_path);
6302 __ testl(out, out);
6303 __ j(kEqual, slow_path->GetEntryLabel());
6304 __ Bind(slow_path->GetExitLabel());
6305 return;
6306 }
6307 case HLoadString::LoadKind::kJitTableAddress: {
6308 Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
6309 Label* fixup_label = codegen_->NewJitRootStringPatch(
6310 load->GetDexFile(), load->GetStringIndex(), load->GetString());
6311 // /* GcRoot<mirror::String> */ out = *address
6312 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6313 return;
6314 }
6315 default:
6316 break;
6317 }
6318
6319 // TODO: Re-add the compiler code to do string dex cache lookup again.
6320 InvokeRuntimeCallingConvention calling_convention;
6321 DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
6322 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
6323 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
6324 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6325 }
6326
GetExceptionTlsAddress()6327 static Address GetExceptionTlsAddress() {
6328 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
6329 }
6330
VisitLoadException(HLoadException * load)6331 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
6332 LocationSummary* locations =
6333 new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
6334 locations->SetOut(Location::RequiresRegister());
6335 }
6336
VisitLoadException(HLoadException * load)6337 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
6338 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
6339 }
6340
VisitClearException(HClearException * clear)6341 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
6342 new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
6343 }
6344
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6345 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6346 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
6347 }
6348
VisitThrow(HThrow * instruction)6349 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
6350 LocationSummary* locations =
6351 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
6352 InvokeRuntimeCallingConvention calling_convention;
6353 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6354 }
6355
VisitThrow(HThrow * instruction)6356 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
6357 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6358 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6359 }
6360
6361 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6362 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6363 if (kEmitCompilerReadBarrier &&
6364 !kUseBakerReadBarrier &&
6365 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6366 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6367 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6368 return 1;
6369 }
6370 return 0;
6371 }
6372
6373 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
6374 // interface pointer, one for loading the current interface.
6375 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6376 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6377 if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
6378 return 2;
6379 }
6380 return 1 + NumberOfInstanceOfTemps(type_check_kind);
6381 }
6382
VisitInstanceOf(HInstanceOf * instruction)6383 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
6384 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6385 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6386 bool baker_read_barrier_slow_path = false;
6387 switch (type_check_kind) {
6388 case TypeCheckKind::kExactCheck:
6389 case TypeCheckKind::kAbstractClassCheck:
6390 case TypeCheckKind::kClassHierarchyCheck:
6391 case TypeCheckKind::kArrayObjectCheck:
6392 call_kind =
6393 kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6394 baker_read_barrier_slow_path = kUseBakerReadBarrier;
6395 break;
6396 case TypeCheckKind::kArrayCheck:
6397 case TypeCheckKind::kUnresolvedCheck:
6398 case TypeCheckKind::kInterfaceCheck:
6399 call_kind = LocationSummary::kCallOnSlowPath;
6400 break;
6401 }
6402
6403 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
6404 if (baker_read_barrier_slow_path) {
6405 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6406 }
6407 locations->SetInAt(0, Location::RequiresRegister());
6408 locations->SetInAt(1, Location::Any());
6409 // Note that TypeCheckSlowPathX86 uses this "out" register too.
6410 locations->SetOut(Location::RequiresRegister());
6411 // When read barriers are enabled, we need a temporary register for some cases.
6412 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6413 }
6414
VisitInstanceOf(HInstanceOf * instruction)6415 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
6416 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6417 LocationSummary* locations = instruction->GetLocations();
6418 Location obj_loc = locations->InAt(0);
6419 Register obj = obj_loc.AsRegister<Register>();
6420 Location cls = locations->InAt(1);
6421 Location out_loc = locations->Out();
6422 Register out = out_loc.AsRegister<Register>();
6423 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6424 DCHECK_LE(num_temps, 1u);
6425 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
6426 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6427 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6428 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6429 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6430 SlowPathCode* slow_path = nullptr;
6431 NearLabel done, zero;
6432
6433 // Return 0 if `obj` is null.
6434 // Avoid null check if we know obj is not null.
6435 if (instruction->MustDoNullCheck()) {
6436 __ testl(obj, obj);
6437 __ j(kEqual, &zero);
6438 }
6439
6440 switch (type_check_kind) {
6441 case TypeCheckKind::kExactCheck: {
6442 // /* HeapReference<Class> */ out = obj->klass_
6443 GenerateReferenceLoadTwoRegisters(instruction,
6444 out_loc,
6445 obj_loc,
6446 class_offset,
6447 kCompilerReadBarrierOption);
6448 if (cls.IsRegister()) {
6449 __ cmpl(out, cls.AsRegister<Register>());
6450 } else {
6451 DCHECK(cls.IsStackSlot()) << cls;
6452 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6453 }
6454
6455 // Classes must be equal for the instanceof to succeed.
6456 __ j(kNotEqual, &zero);
6457 __ movl(out, Immediate(1));
6458 __ jmp(&done);
6459 break;
6460 }
6461
6462 case TypeCheckKind::kAbstractClassCheck: {
6463 // /* HeapReference<Class> */ out = obj->klass_
6464 GenerateReferenceLoadTwoRegisters(instruction,
6465 out_loc,
6466 obj_loc,
6467 class_offset,
6468 kCompilerReadBarrierOption);
6469 // If the class is abstract, we eagerly fetch the super class of the
6470 // object to avoid doing a comparison we know will fail.
6471 NearLabel loop;
6472 __ Bind(&loop);
6473 // /* HeapReference<Class> */ out = out->super_class_
6474 GenerateReferenceLoadOneRegister(instruction,
6475 out_loc,
6476 super_offset,
6477 maybe_temp_loc,
6478 kCompilerReadBarrierOption);
6479 __ testl(out, out);
6480 // If `out` is null, we use it for the result, and jump to `done`.
6481 __ j(kEqual, &done);
6482 if (cls.IsRegister()) {
6483 __ cmpl(out, cls.AsRegister<Register>());
6484 } else {
6485 DCHECK(cls.IsStackSlot()) << cls;
6486 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6487 }
6488 __ j(kNotEqual, &loop);
6489 __ movl(out, Immediate(1));
6490 if (zero.IsLinked()) {
6491 __ jmp(&done);
6492 }
6493 break;
6494 }
6495
6496 case TypeCheckKind::kClassHierarchyCheck: {
6497 // /* HeapReference<Class> */ out = obj->klass_
6498 GenerateReferenceLoadTwoRegisters(instruction,
6499 out_loc,
6500 obj_loc,
6501 class_offset,
6502 kCompilerReadBarrierOption);
6503 // Walk over the class hierarchy to find a match.
6504 NearLabel loop, success;
6505 __ Bind(&loop);
6506 if (cls.IsRegister()) {
6507 __ cmpl(out, cls.AsRegister<Register>());
6508 } else {
6509 DCHECK(cls.IsStackSlot()) << cls;
6510 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6511 }
6512 __ j(kEqual, &success);
6513 // /* HeapReference<Class> */ out = out->super_class_
6514 GenerateReferenceLoadOneRegister(instruction,
6515 out_loc,
6516 super_offset,
6517 maybe_temp_loc,
6518 kCompilerReadBarrierOption);
6519 __ testl(out, out);
6520 __ j(kNotEqual, &loop);
6521 // If `out` is null, we use it for the result, and jump to `done`.
6522 __ jmp(&done);
6523 __ Bind(&success);
6524 __ movl(out, Immediate(1));
6525 if (zero.IsLinked()) {
6526 __ jmp(&done);
6527 }
6528 break;
6529 }
6530
6531 case TypeCheckKind::kArrayObjectCheck: {
6532 // /* HeapReference<Class> */ out = obj->klass_
6533 GenerateReferenceLoadTwoRegisters(instruction,
6534 out_loc,
6535 obj_loc,
6536 class_offset,
6537 kCompilerReadBarrierOption);
6538 // Do an exact check.
6539 NearLabel exact_check;
6540 if (cls.IsRegister()) {
6541 __ cmpl(out, cls.AsRegister<Register>());
6542 } else {
6543 DCHECK(cls.IsStackSlot()) << cls;
6544 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6545 }
6546 __ j(kEqual, &exact_check);
6547 // Otherwise, we need to check that the object's class is a non-primitive array.
6548 // /* HeapReference<Class> */ out = out->component_type_
6549 GenerateReferenceLoadOneRegister(instruction,
6550 out_loc,
6551 component_offset,
6552 maybe_temp_loc,
6553 kCompilerReadBarrierOption);
6554 __ testl(out, out);
6555 // If `out` is null, we use it for the result, and jump to `done`.
6556 __ j(kEqual, &done);
6557 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6558 __ j(kNotEqual, &zero);
6559 __ Bind(&exact_check);
6560 __ movl(out, Immediate(1));
6561 __ jmp(&done);
6562 break;
6563 }
6564
6565 case TypeCheckKind::kArrayCheck: {
6566 // No read barrier since the slow path will retry upon failure.
6567 // /* HeapReference<Class> */ out = obj->klass_
6568 GenerateReferenceLoadTwoRegisters(instruction,
6569 out_loc,
6570 obj_loc,
6571 class_offset,
6572 kWithoutReadBarrier);
6573 if (cls.IsRegister()) {
6574 __ cmpl(out, cls.AsRegister<Register>());
6575 } else {
6576 DCHECK(cls.IsStackSlot()) << cls;
6577 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6578 }
6579 DCHECK(locations->OnlyCallsOnSlowPath());
6580 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
6581 /* is_fatal */ false);
6582 codegen_->AddSlowPath(slow_path);
6583 __ j(kNotEqual, slow_path->GetEntryLabel());
6584 __ movl(out, Immediate(1));
6585 if (zero.IsLinked()) {
6586 __ jmp(&done);
6587 }
6588 break;
6589 }
6590
6591 case TypeCheckKind::kUnresolvedCheck:
6592 case TypeCheckKind::kInterfaceCheck: {
6593 // Note that we indeed only call on slow path, but we always go
6594 // into the slow path for the unresolved and interface check
6595 // cases.
6596 //
6597 // We cannot directly call the InstanceofNonTrivial runtime
6598 // entry point without resorting to a type checking slow path
6599 // here (i.e. by calling InvokeRuntime directly), as it would
6600 // require to assign fixed registers for the inputs of this
6601 // HInstanceOf instruction (following the runtime calling
6602 // convention), which might be cluttered by the potential first
6603 // read barrier emission at the beginning of this method.
6604 //
6605 // TODO: Introduce a new runtime entry point taking the object
6606 // to test (instead of its class) as argument, and let it deal
6607 // with the read barrier issues. This will let us refactor this
6608 // case of the `switch` code as it was previously (with a direct
6609 // call to the runtime not using a type checking slow path).
6610 // This should also be beneficial for the other cases above.
6611 DCHECK(locations->OnlyCallsOnSlowPath());
6612 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
6613 /* is_fatal */ false);
6614 codegen_->AddSlowPath(slow_path);
6615 __ jmp(slow_path->GetEntryLabel());
6616 if (zero.IsLinked()) {
6617 __ jmp(&done);
6618 }
6619 break;
6620 }
6621 }
6622
6623 if (zero.IsLinked()) {
6624 __ Bind(&zero);
6625 __ xorl(out, out);
6626 }
6627
6628 if (done.IsLinked()) {
6629 __ Bind(&done);
6630 }
6631
6632 if (slow_path != nullptr) {
6633 __ Bind(slow_path->GetExitLabel());
6634 }
6635 }
6636
IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind,bool throws_into_catch)6637 static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) {
6638 switch (type_check_kind) {
6639 case TypeCheckKind::kExactCheck:
6640 case TypeCheckKind::kAbstractClassCheck:
6641 case TypeCheckKind::kClassHierarchyCheck:
6642 case TypeCheckKind::kArrayObjectCheck:
6643 return !throws_into_catch && !kEmitCompilerReadBarrier;
6644 case TypeCheckKind::kInterfaceCheck:
6645 return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences;
6646 case TypeCheckKind::kArrayCheck:
6647 case TypeCheckKind::kUnresolvedCheck:
6648 return false;
6649 }
6650 LOG(FATAL) << "Unreachable";
6651 UNREACHABLE();
6652 }
6653
VisitCheckCast(HCheckCast * instruction)6654 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
6655 bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
6656 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6657 LocationSummary::CallKind call_kind =
6658 IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch)
6659 ? LocationSummary::kNoCall
6660 : LocationSummary::kCallOnSlowPath;
6661 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
6662 locations->SetInAt(0, Location::RequiresRegister());
6663 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6664 // Require a register for the interface check since there is a loop that compares the class to
6665 // a memory address.
6666 locations->SetInAt(1, Location::RequiresRegister());
6667 } else {
6668 locations->SetInAt(1, Location::Any());
6669 }
6670 // Note that TypeCheckSlowPathX86 uses this "temp" register too.
6671 locations->AddTemp(Location::RequiresRegister());
6672 // When read barriers are enabled, we need an additional temporary register for some cases.
6673 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
6674 }
6675
VisitCheckCast(HCheckCast * instruction)6676 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
6677 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6678 LocationSummary* locations = instruction->GetLocations();
6679 Location obj_loc = locations->InAt(0);
6680 Register obj = obj_loc.AsRegister<Register>();
6681 Location cls = locations->InAt(1);
6682 Location temp_loc = locations->GetTemp(0);
6683 Register temp = temp_loc.AsRegister<Register>();
6684 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
6685 DCHECK_GE(num_temps, 1u);
6686 DCHECK_LE(num_temps, 2u);
6687 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
6688 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6689 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6690 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6691 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6692 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6693 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6694 const uint32_t object_array_data_offset =
6695 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6696
6697 // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
6698 // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
6699 // read barriers is done for performance and code size reasons.
6700 bool is_type_check_slow_path_fatal =
6701 IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock());
6702
6703 SlowPathCode* type_check_slow_path =
6704 new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
6705 is_type_check_slow_path_fatal);
6706 codegen_->AddSlowPath(type_check_slow_path);
6707
6708 NearLabel done;
6709 // Avoid null check if we know obj is not null.
6710 if (instruction->MustDoNullCheck()) {
6711 __ testl(obj, obj);
6712 __ j(kEqual, &done);
6713 }
6714
6715 switch (type_check_kind) {
6716 case TypeCheckKind::kExactCheck:
6717 case TypeCheckKind::kArrayCheck: {
6718 // /* HeapReference<Class> */ temp = obj->klass_
6719 GenerateReferenceLoadTwoRegisters(instruction,
6720 temp_loc,
6721 obj_loc,
6722 class_offset,
6723 kWithoutReadBarrier);
6724
6725 if (cls.IsRegister()) {
6726 __ cmpl(temp, cls.AsRegister<Register>());
6727 } else {
6728 DCHECK(cls.IsStackSlot()) << cls;
6729 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
6730 }
6731 // Jump to slow path for throwing the exception or doing a
6732 // more involved array check.
6733 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6734 break;
6735 }
6736
6737 case TypeCheckKind::kAbstractClassCheck: {
6738 // /* HeapReference<Class> */ temp = obj->klass_
6739 GenerateReferenceLoadTwoRegisters(instruction,
6740 temp_loc,
6741 obj_loc,
6742 class_offset,
6743 kWithoutReadBarrier);
6744
6745 // If the class is abstract, we eagerly fetch the super class of the
6746 // object to avoid doing a comparison we know will fail.
6747 NearLabel loop;
6748 __ Bind(&loop);
6749 // /* HeapReference<Class> */ temp = temp->super_class_
6750 GenerateReferenceLoadOneRegister(instruction,
6751 temp_loc,
6752 super_offset,
6753 maybe_temp2_loc,
6754 kWithoutReadBarrier);
6755
6756 // If the class reference currently in `temp` is null, jump to the slow path to throw the
6757 // exception.
6758 __ testl(temp, temp);
6759 __ j(kZero, type_check_slow_path->GetEntryLabel());
6760
6761 // Otherwise, compare the classes
6762 if (cls.IsRegister()) {
6763 __ cmpl(temp, cls.AsRegister<Register>());
6764 } else {
6765 DCHECK(cls.IsStackSlot()) << cls;
6766 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
6767 }
6768 __ j(kNotEqual, &loop);
6769 break;
6770 }
6771
6772 case TypeCheckKind::kClassHierarchyCheck: {
6773 // /* HeapReference<Class> */ temp = obj->klass_
6774 GenerateReferenceLoadTwoRegisters(instruction,
6775 temp_loc,
6776 obj_loc,
6777 class_offset,
6778 kWithoutReadBarrier);
6779
6780 // Walk over the class hierarchy to find a match.
6781 NearLabel loop;
6782 __ Bind(&loop);
6783 if (cls.IsRegister()) {
6784 __ cmpl(temp, cls.AsRegister<Register>());
6785 } else {
6786 DCHECK(cls.IsStackSlot()) << cls;
6787 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
6788 }
6789 __ j(kEqual, &done);
6790
6791 // /* HeapReference<Class> */ temp = temp->super_class_
6792 GenerateReferenceLoadOneRegister(instruction,
6793 temp_loc,
6794 super_offset,
6795 maybe_temp2_loc,
6796 kWithoutReadBarrier);
6797
6798 // If the class reference currently in `temp` is not null, jump
6799 // back at the beginning of the loop.
6800 __ testl(temp, temp);
6801 __ j(kNotZero, &loop);
6802 // Otherwise, jump to the slow path to throw the exception.;
6803 __ jmp(type_check_slow_path->GetEntryLabel());
6804 break;
6805 }
6806
6807 case TypeCheckKind::kArrayObjectCheck: {
6808 // /* HeapReference<Class> */ temp = obj->klass_
6809 GenerateReferenceLoadTwoRegisters(instruction,
6810 temp_loc,
6811 obj_loc,
6812 class_offset,
6813 kWithoutReadBarrier);
6814
6815 // Do an exact check.
6816 if (cls.IsRegister()) {
6817 __ cmpl(temp, cls.AsRegister<Register>());
6818 } else {
6819 DCHECK(cls.IsStackSlot()) << cls;
6820 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
6821 }
6822 __ j(kEqual, &done);
6823
6824 // Otherwise, we need to check that the object's class is a non-primitive array.
6825 // /* HeapReference<Class> */ temp = temp->component_type_
6826 GenerateReferenceLoadOneRegister(instruction,
6827 temp_loc,
6828 component_offset,
6829 maybe_temp2_loc,
6830 kWithoutReadBarrier);
6831
6832 // If the component type is null (i.e. the object not an array), jump to the slow path to
6833 // throw the exception. Otherwise proceed with the check.
6834 __ testl(temp, temp);
6835 __ j(kZero, type_check_slow_path->GetEntryLabel());
6836
6837 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
6838 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6839 break;
6840 }
6841
6842 case TypeCheckKind::kUnresolvedCheck:
6843 // We always go into the type check slow path for the unresolved check case.
6844 // We cannot directly call the CheckCast runtime entry point
6845 // without resorting to a type checking slow path here (i.e. by
6846 // calling InvokeRuntime directly), as it would require to
6847 // assign fixed registers for the inputs of this HInstanceOf
6848 // instruction (following the runtime calling convention), which
6849 // might be cluttered by the potential first read barrier
6850 // emission at the beginning of this method.
6851 __ jmp(type_check_slow_path->GetEntryLabel());
6852 break;
6853
6854 case TypeCheckKind::kInterfaceCheck: {
6855 // Fast path for the interface check. Since we compare with a memory location in the inner
6856 // loop we would need to have cls poisoned. However unpoisoning cls would reset the
6857 // conditional flags and cause the conditional jump to be incorrect. Therefore we just jump
6858 // to the slow path if we are running under poisoning.
6859 if (!kPoisonHeapReferences) {
6860 // Try to avoid read barriers to improve the fast path. We can not get false positives by
6861 // doing this.
6862 // /* HeapReference<Class> */ temp = obj->klass_
6863 GenerateReferenceLoadTwoRegisters(instruction,
6864 temp_loc,
6865 obj_loc,
6866 class_offset,
6867 kWithoutReadBarrier);
6868
6869 // /* HeapReference<Class> */ temp = temp->iftable_
6870 GenerateReferenceLoadTwoRegisters(instruction,
6871 temp_loc,
6872 temp_loc,
6873 iftable_offset,
6874 kWithoutReadBarrier);
6875 // Iftable is never null.
6876 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
6877 // Loop through the iftable and check if any class matches.
6878 NearLabel start_loop;
6879 __ Bind(&start_loop);
6880 // Need to subtract first to handle the empty array case.
6881 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
6882 __ j(kNegative, type_check_slow_path->GetEntryLabel());
6883 // Go to next interface if the classes do not match.
6884 __ cmpl(cls.AsRegister<Register>(),
6885 CodeGeneratorX86::ArrayAddress(temp,
6886 maybe_temp2_loc,
6887 TIMES_4,
6888 object_array_data_offset));
6889 __ j(kNotEqual, &start_loop);
6890 } else {
6891 __ jmp(type_check_slow_path->GetEntryLabel());
6892 }
6893 break;
6894 }
6895 }
6896 __ Bind(&done);
6897
6898 __ Bind(type_check_slow_path->GetExitLabel());
6899 }
6900
VisitMonitorOperation(HMonitorOperation * instruction)6901 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
6902 LocationSummary* locations =
6903 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
6904 InvokeRuntimeCallingConvention calling_convention;
6905 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6906 }
6907
VisitMonitorOperation(HMonitorOperation * instruction)6908 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
6909 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
6910 : kQuickUnlockObject,
6911 instruction,
6912 instruction->GetDexPc());
6913 if (instruction->IsEnter()) {
6914 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6915 } else {
6916 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6917 }
6918 }
6919
VisitAnd(HAnd * instruction)6920 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6921 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6922 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6923
HandleBitwiseOperation(HBinaryOperation * instruction)6924 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
6925 LocationSummary* locations =
6926 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6927 DCHECK(instruction->GetResultType() == Primitive::kPrimInt
6928 || instruction->GetResultType() == Primitive::kPrimLong);
6929 locations->SetInAt(0, Location::RequiresRegister());
6930 locations->SetInAt(1, Location::Any());
6931 locations->SetOut(Location::SameAsFirstInput());
6932 }
6933
VisitAnd(HAnd * instruction)6934 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
6935 HandleBitwiseOperation(instruction);
6936 }
6937
VisitOr(HOr * instruction)6938 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
6939 HandleBitwiseOperation(instruction);
6940 }
6941
VisitXor(HXor * instruction)6942 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
6943 HandleBitwiseOperation(instruction);
6944 }
6945
HandleBitwiseOperation(HBinaryOperation * instruction)6946 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
6947 LocationSummary* locations = instruction->GetLocations();
6948 Location first = locations->InAt(0);
6949 Location second = locations->InAt(1);
6950 DCHECK(first.Equals(locations->Out()));
6951
6952 if (instruction->GetResultType() == Primitive::kPrimInt) {
6953 if (second.IsRegister()) {
6954 if (instruction->IsAnd()) {
6955 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
6956 } else if (instruction->IsOr()) {
6957 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
6958 } else {
6959 DCHECK(instruction->IsXor());
6960 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
6961 }
6962 } else if (second.IsConstant()) {
6963 if (instruction->IsAnd()) {
6964 __ andl(first.AsRegister<Register>(),
6965 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
6966 } else if (instruction->IsOr()) {
6967 __ orl(first.AsRegister<Register>(),
6968 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
6969 } else {
6970 DCHECK(instruction->IsXor());
6971 __ xorl(first.AsRegister<Register>(),
6972 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
6973 }
6974 } else {
6975 if (instruction->IsAnd()) {
6976 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
6977 } else if (instruction->IsOr()) {
6978 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
6979 } else {
6980 DCHECK(instruction->IsXor());
6981 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
6982 }
6983 }
6984 } else {
6985 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
6986 if (second.IsRegisterPair()) {
6987 if (instruction->IsAnd()) {
6988 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
6989 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
6990 } else if (instruction->IsOr()) {
6991 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
6992 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
6993 } else {
6994 DCHECK(instruction->IsXor());
6995 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
6996 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
6997 }
6998 } else if (second.IsDoubleStackSlot()) {
6999 if (instruction->IsAnd()) {
7000 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7001 __ andl(first.AsRegisterPairHigh<Register>(),
7002 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7003 } else if (instruction->IsOr()) {
7004 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7005 __ orl(first.AsRegisterPairHigh<Register>(),
7006 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7007 } else {
7008 DCHECK(instruction->IsXor());
7009 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7010 __ xorl(first.AsRegisterPairHigh<Register>(),
7011 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7012 }
7013 } else {
7014 DCHECK(second.IsConstant()) << second;
7015 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
7016 int32_t low_value = Low32Bits(value);
7017 int32_t high_value = High32Bits(value);
7018 Immediate low(low_value);
7019 Immediate high(high_value);
7020 Register first_low = first.AsRegisterPairLow<Register>();
7021 Register first_high = first.AsRegisterPairHigh<Register>();
7022 if (instruction->IsAnd()) {
7023 if (low_value == 0) {
7024 __ xorl(first_low, first_low);
7025 } else if (low_value != -1) {
7026 __ andl(first_low, low);
7027 }
7028 if (high_value == 0) {
7029 __ xorl(first_high, first_high);
7030 } else if (high_value != -1) {
7031 __ andl(first_high, high);
7032 }
7033 } else if (instruction->IsOr()) {
7034 if (low_value != 0) {
7035 __ orl(first_low, low);
7036 }
7037 if (high_value != 0) {
7038 __ orl(first_high, high);
7039 }
7040 } else {
7041 DCHECK(instruction->IsXor());
7042 if (low_value != 0) {
7043 __ xorl(first_low, low);
7044 }
7045 if (high_value != 0) {
7046 __ xorl(first_high, high);
7047 }
7048 }
7049 }
7050 }
7051 }
7052
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7053 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
7054 HInstruction* instruction,
7055 Location out,
7056 uint32_t offset,
7057 Location maybe_temp,
7058 ReadBarrierOption read_barrier_option) {
7059 Register out_reg = out.AsRegister<Register>();
7060 if (read_barrier_option == kWithReadBarrier) {
7061 CHECK(kEmitCompilerReadBarrier);
7062 if (kUseBakerReadBarrier) {
7063 // Load with fast path based Baker's read barrier.
7064 // /* HeapReference<Object> */ out = *(out + offset)
7065 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7066 instruction, out, out_reg, offset, /* needs_null_check */ false);
7067 } else {
7068 // Load with slow path based read barrier.
7069 // Save the value of `out` into `maybe_temp` before overwriting it
7070 // in the following move operation, as we will need it for the
7071 // read barrier below.
7072 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7073 __ movl(maybe_temp.AsRegister<Register>(), out_reg);
7074 // /* HeapReference<Object> */ out = *(out + offset)
7075 __ movl(out_reg, Address(out_reg, offset));
7076 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7077 }
7078 } else {
7079 // Plain load with no read barrier.
7080 // /* HeapReference<Object> */ out = *(out + offset)
7081 __ movl(out_reg, Address(out_reg, offset));
7082 __ MaybeUnpoisonHeapReference(out_reg);
7083 }
7084 }
7085
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7086 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
7087 HInstruction* instruction,
7088 Location out,
7089 Location obj,
7090 uint32_t offset,
7091 ReadBarrierOption read_barrier_option) {
7092 Register out_reg = out.AsRegister<Register>();
7093 Register obj_reg = obj.AsRegister<Register>();
7094 if (read_barrier_option == kWithReadBarrier) {
7095 CHECK(kEmitCompilerReadBarrier);
7096 if (kUseBakerReadBarrier) {
7097 // Load with fast path based Baker's read barrier.
7098 // /* HeapReference<Object> */ out = *(obj + offset)
7099 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7100 instruction, out, obj_reg, offset, /* needs_null_check */ false);
7101 } else {
7102 // Load with slow path based read barrier.
7103 // /* HeapReference<Object> */ out = *(obj + offset)
7104 __ movl(out_reg, Address(obj_reg, offset));
7105 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7106 }
7107 } else {
7108 // Plain load with no read barrier.
7109 // /* HeapReference<Object> */ out = *(obj + offset)
7110 __ movl(out_reg, Address(obj_reg, offset));
7111 __ MaybeUnpoisonHeapReference(out_reg);
7112 }
7113 }
7114
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7115 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
7116 HInstruction* instruction,
7117 Location root,
7118 const Address& address,
7119 Label* fixup_label,
7120 ReadBarrierOption read_barrier_option) {
7121 Register root_reg = root.AsRegister<Register>();
7122 if (read_barrier_option == kWithReadBarrier) {
7123 DCHECK(kEmitCompilerReadBarrier);
7124 if (kUseBakerReadBarrier) {
7125 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7126 // Baker's read barrier are used:
7127 //
7128 // root = obj.field;
7129 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7130 // if (temp != null) {
7131 // root = temp(root)
7132 // }
7133
7134 // /* GcRoot<mirror::Object> */ root = *address
7135 __ movl(root_reg, address);
7136 if (fixup_label != nullptr) {
7137 __ Bind(fixup_label);
7138 }
7139 static_assert(
7140 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7141 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7142 "have different sizes.");
7143 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7144 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7145 "have different sizes.");
7146
7147 // Slow path marking the GC root `root`.
7148 SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
7149 instruction, root, /* unpoison_ref_before_marking */ false);
7150 codegen_->AddSlowPath(slow_path);
7151
7152 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
7153 const int32_t entry_point_offset =
7154 Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
7155 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
7156 // The entrypoint is null when the GC is not marking.
7157 __ j(kNotEqual, slow_path->GetEntryLabel());
7158 __ Bind(slow_path->GetExitLabel());
7159 } else {
7160 // GC root loaded through a slow path for read barriers other
7161 // than Baker's.
7162 // /* GcRoot<mirror::Object>* */ root = address
7163 __ leal(root_reg, address);
7164 if (fixup_label != nullptr) {
7165 __ Bind(fixup_label);
7166 }
7167 // /* mirror::Object* */ root = root->Read()
7168 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7169 }
7170 } else {
7171 // Plain GC root load with no read barrier.
7172 // /* GcRoot<mirror::Object> */ root = *address
7173 __ movl(root_reg, address);
7174 if (fixup_label != nullptr) {
7175 __ Bind(fixup_label);
7176 }
7177 // Note that GC roots are not affected by heap poisoning, thus we
7178 // do not have to unpoison `root_reg` here.
7179 }
7180 }
7181
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)7182 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7183 Location ref,
7184 Register obj,
7185 uint32_t offset,
7186 bool needs_null_check) {
7187 DCHECK(kEmitCompilerReadBarrier);
7188 DCHECK(kUseBakerReadBarrier);
7189
7190 // /* HeapReference<Object> */ ref = *(obj + offset)
7191 Address src(obj, offset);
7192 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7193 }
7194
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)7195 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7196 Location ref,
7197 Register obj,
7198 uint32_t data_offset,
7199 Location index,
7200 bool needs_null_check) {
7201 DCHECK(kEmitCompilerReadBarrier);
7202 DCHECK(kUseBakerReadBarrier);
7203
7204 static_assert(
7205 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7206 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7207 // /* HeapReference<Object> */ ref =
7208 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
7209 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
7210 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7211 }
7212
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)7213 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7214 Location ref,
7215 Register obj,
7216 const Address& src,
7217 bool needs_null_check,
7218 bool always_update_field,
7219 Register* temp) {
7220 DCHECK(kEmitCompilerReadBarrier);
7221 DCHECK(kUseBakerReadBarrier);
7222
7223 // In slow path based read barriers, the read barrier call is
7224 // inserted after the original load. However, in fast path based
7225 // Baker's read barriers, we need to perform the load of
7226 // mirror::Object::monitor_ *before* the original reference load.
7227 // This load-load ordering is required by the read barrier.
7228 // The fast path/slow path (for Baker's algorithm) should look like:
7229 //
7230 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7231 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
7232 // HeapReference<Object> ref = *src; // Original reference load.
7233 // bool is_gray = (rb_state == ReadBarrier::GrayState());
7234 // if (is_gray) {
7235 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
7236 // }
7237 //
7238 // Note: the original implementation in ReadBarrier::Barrier is
7239 // slightly more complex as:
7240 // - it implements the load-load fence using a data dependency on
7241 // the high-bits of rb_state, which are expected to be all zeroes
7242 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
7243 // which is a no-op thanks to the x86 memory model);
7244 // - it performs additional checks that we do not do here for
7245 // performance reasons.
7246
7247 Register ref_reg = ref.AsRegister<Register>();
7248 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7249
7250 // Given the numeric representation, it's enough to check the low bit of the rb_state.
7251 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
7252 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7253 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7254 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7255 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7256
7257 // if (rb_state == ReadBarrier::GrayState())
7258 // ref = ReadBarrier::Mark(ref);
7259 // At this point, just do the "if" and make sure that flags are preserved until the branch.
7260 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7261 if (needs_null_check) {
7262 MaybeRecordImplicitNullCheck(instruction);
7263 }
7264
7265 // Load fence to prevent load-load reordering.
7266 // Note that this is a no-op, thanks to the x86 memory model.
7267 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7268
7269 // The actual reference load.
7270 // /* HeapReference<Object> */ ref = *src
7271 __ movl(ref_reg, src); // Flags are unaffected.
7272
7273 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7274 // Slow path marking the object `ref` when it is gray.
7275 SlowPathCode* slow_path;
7276 if (always_update_field) {
7277 DCHECK(temp != nullptr);
7278 slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
7279 instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp);
7280 } else {
7281 slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
7282 instruction, ref, /* unpoison_ref_before_marking */ true);
7283 }
7284 AddSlowPath(slow_path);
7285
7286 // We have done the "if" of the gray bit check above, now branch based on the flags.
7287 __ j(kNotZero, slow_path->GetEntryLabel());
7288
7289 // Object* ref = ref_addr->AsMirrorPtr()
7290 __ MaybeUnpoisonHeapReference(ref_reg);
7291
7292 __ Bind(slow_path->GetExitLabel());
7293 }
7294
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7295 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
7296 Location out,
7297 Location ref,
7298 Location obj,
7299 uint32_t offset,
7300 Location index) {
7301 DCHECK(kEmitCompilerReadBarrier);
7302
7303 // Insert a slow path based read barrier *after* the reference load.
7304 //
7305 // If heap poisoning is enabled, the unpoisoning of the loaded
7306 // reference will be carried out by the runtime within the slow
7307 // path.
7308 //
7309 // Note that `ref` currently does not get unpoisoned (when heap
7310 // poisoning is enabled), which is alright as the `ref` argument is
7311 // not used by the artReadBarrierSlow entry point.
7312 //
7313 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7314 SlowPathCode* slow_path = new (GetGraph()->GetArena())
7315 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
7316 AddSlowPath(slow_path);
7317
7318 __ jmp(slow_path->GetEntryLabel());
7319 __ Bind(slow_path->GetExitLabel());
7320 }
7321
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7322 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7323 Location out,
7324 Location ref,
7325 Location obj,
7326 uint32_t offset,
7327 Location index) {
7328 if (kEmitCompilerReadBarrier) {
7329 // Baker's read barriers shall be handled by the fast path
7330 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
7331 DCHECK(!kUseBakerReadBarrier);
7332 // If heap poisoning is enabled, unpoisoning will be taken care of
7333 // by the runtime within the slow path.
7334 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7335 } else if (kPoisonHeapReferences) {
7336 __ UnpoisonHeapReference(out.AsRegister<Register>());
7337 }
7338 }
7339
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7340 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7341 Location out,
7342 Location root) {
7343 DCHECK(kEmitCompilerReadBarrier);
7344
7345 // Insert a slow path based read barrier *after* the GC root load.
7346 //
7347 // Note that GC roots are not affected by heap poisoning, so we do
7348 // not need to do anything special for this here.
7349 SlowPathCode* slow_path =
7350 new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root);
7351 AddSlowPath(slow_path);
7352
7353 __ jmp(slow_path->GetEntryLabel());
7354 __ Bind(slow_path->GetExitLabel());
7355 }
7356
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7357 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7358 // Nothing to do, this should be removed during prepare for register allocator.
7359 LOG(FATAL) << "Unreachable";
7360 }
7361
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7362 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7363 // Nothing to do, this should be removed during prepare for register allocator.
7364 LOG(FATAL) << "Unreachable";
7365 }
7366
7367 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7368 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7369 LocationSummary* locations =
7370 new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7371 locations->SetInAt(0, Location::RequiresRegister());
7372 }
7373
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)7374 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
7375 int32_t lower_bound,
7376 uint32_t num_entries,
7377 HBasicBlock* switch_block,
7378 HBasicBlock* default_block) {
7379 // Figure out the correct compare values and jump conditions.
7380 // Handle the first compare/branch as a special case because it might
7381 // jump to the default case.
7382 DCHECK_GT(num_entries, 2u);
7383 Condition first_condition;
7384 uint32_t index;
7385 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
7386 if (lower_bound != 0) {
7387 first_condition = kLess;
7388 __ cmpl(value_reg, Immediate(lower_bound));
7389 __ j(first_condition, codegen_->GetLabelOf(default_block));
7390 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7391
7392 index = 1;
7393 } else {
7394 // Handle all the compare/jumps below.
7395 first_condition = kBelow;
7396 index = 0;
7397 }
7398
7399 // Handle the rest of the compare/jumps.
7400 for (; index + 1 < num_entries; index += 2) {
7401 int32_t compare_to_value = lower_bound + index + 1;
7402 __ cmpl(value_reg, Immediate(compare_to_value));
7403 // Jump to successors[index] if value < case_value[index].
7404 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7405 // Jump to successors[index + 1] if value == case_value[index + 1].
7406 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7407 }
7408
7409 if (index != num_entries) {
7410 // There are an odd number of entries. Handle the last one.
7411 DCHECK_EQ(index + 1, num_entries);
7412 __ cmpl(value_reg, Immediate(lower_bound + index));
7413 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7414 }
7415
7416 // And the default for any other value.
7417 if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
7418 __ jmp(codegen_->GetLabelOf(default_block));
7419 }
7420 }
7421
VisitPackedSwitch(HPackedSwitch * switch_instr)7422 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7423 int32_t lower_bound = switch_instr->GetStartValue();
7424 uint32_t num_entries = switch_instr->GetNumEntries();
7425 LocationSummary* locations = switch_instr->GetLocations();
7426 Register value_reg = locations->InAt(0).AsRegister<Register>();
7427
7428 GenPackedSwitchWithCompares(value_reg,
7429 lower_bound,
7430 num_entries,
7431 switch_instr->GetBlock(),
7432 switch_instr->GetDefaultBlock());
7433 }
7434
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)7435 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
7436 LocationSummary* locations =
7437 new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7438 locations->SetInAt(0, Location::RequiresRegister());
7439
7440 // Constant area pointer.
7441 locations->SetInAt(1, Location::RequiresRegister());
7442
7443 // And the temporary we need.
7444 locations->AddTemp(Location::RequiresRegister());
7445 }
7446
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)7447 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
7448 int32_t lower_bound = switch_instr->GetStartValue();
7449 uint32_t num_entries = switch_instr->GetNumEntries();
7450 LocationSummary* locations = switch_instr->GetLocations();
7451 Register value_reg = locations->InAt(0).AsRegister<Register>();
7452 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7453
7454 if (num_entries <= kPackedSwitchJumpTableThreshold) {
7455 GenPackedSwitchWithCompares(value_reg,
7456 lower_bound,
7457 num_entries,
7458 switch_instr->GetBlock(),
7459 default_block);
7460 return;
7461 }
7462
7463 // Optimizing has a jump area.
7464 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
7465 Register constant_area = locations->InAt(1).AsRegister<Register>();
7466
7467 // Remove the bias, if needed.
7468 if (lower_bound != 0) {
7469 __ leal(temp_reg, Address(value_reg, -lower_bound));
7470 value_reg = temp_reg;
7471 }
7472
7473 // Is the value in range?
7474 DCHECK_GE(num_entries, 1u);
7475 __ cmpl(value_reg, Immediate(num_entries - 1));
7476 __ j(kAbove, codegen_->GetLabelOf(default_block));
7477
7478 // We are in the range of the table.
7479 // Load (target-constant_area) from the jump table, indexing by the value.
7480 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
7481
7482 // Compute the actual target address by adding in constant_area.
7483 __ addl(temp_reg, constant_area);
7484
7485 // And jump.
7486 __ jmp(temp_reg);
7487 }
7488
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)7489 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
7490 HX86ComputeBaseMethodAddress* insn) {
7491 LocationSummary* locations =
7492 new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall);
7493 locations->SetOut(Location::RequiresRegister());
7494 }
7495
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)7496 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
7497 HX86ComputeBaseMethodAddress* insn) {
7498 LocationSummary* locations = insn->GetLocations();
7499 Register reg = locations->Out().AsRegister<Register>();
7500
7501 // Generate call to next instruction.
7502 Label next_instruction;
7503 __ call(&next_instruction);
7504 __ Bind(&next_instruction);
7505
7506 // Remember this offset for later use with constant area.
7507 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
7508
7509 // Grab the return address off the stack.
7510 __ popl(reg);
7511 }
7512
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)7513 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
7514 HX86LoadFromConstantTable* insn) {
7515 LocationSummary* locations =
7516 new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall);
7517
7518 locations->SetInAt(0, Location::RequiresRegister());
7519 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
7520
7521 // If we don't need to be materialized, we only need the inputs to be set.
7522 if (insn->IsEmittedAtUseSite()) {
7523 return;
7524 }
7525
7526 switch (insn->GetType()) {
7527 case Primitive::kPrimFloat:
7528 case Primitive::kPrimDouble:
7529 locations->SetOut(Location::RequiresFpuRegister());
7530 break;
7531
7532 case Primitive::kPrimInt:
7533 locations->SetOut(Location::RequiresRegister());
7534 break;
7535
7536 default:
7537 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
7538 }
7539 }
7540
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)7541 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
7542 if (insn->IsEmittedAtUseSite()) {
7543 return;
7544 }
7545
7546 LocationSummary* locations = insn->GetLocations();
7547 Location out = locations->Out();
7548 Register const_area = locations->InAt(0).AsRegister<Register>();
7549 HConstant *value = insn->GetConstant();
7550
7551 switch (insn->GetType()) {
7552 case Primitive::kPrimFloat:
7553 __ movss(out.AsFpuRegister<XmmRegister>(),
7554 codegen_->LiteralFloatAddress(
7555 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
7556 break;
7557
7558 case Primitive::kPrimDouble:
7559 __ movsd(out.AsFpuRegister<XmmRegister>(),
7560 codegen_->LiteralDoubleAddress(
7561 value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
7562 break;
7563
7564 case Primitive::kPrimInt:
7565 __ movl(out.AsRegister<Register>(),
7566 codegen_->LiteralInt32Address(
7567 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
7568 break;
7569
7570 default:
7571 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
7572 }
7573 }
7574
7575 /**
7576 * Class to handle late fixup of offsets into constant area.
7577 */
7578 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
7579 public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)7580 RIPFixup(CodeGeneratorX86& codegen,
7581 HX86ComputeBaseMethodAddress* base_method_address,
7582 size_t offset)
7583 : codegen_(&codegen),
7584 base_method_address_(base_method_address),
7585 offset_into_constant_area_(offset) {}
7586
7587 protected:
SetOffset(size_t offset)7588 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7589
7590 CodeGeneratorX86* codegen_;
7591 HX86ComputeBaseMethodAddress* base_method_address_;
7592
7593 private:
Process(const MemoryRegion & region,int pos)7594 void Process(const MemoryRegion& region, int pos) OVERRIDE {
7595 // Patch the correct offset for the instruction. The place to patch is the
7596 // last 4 bytes of the instruction.
7597 // The value to patch is the distance from the offset in the constant area
7598 // from the address computed by the HX86ComputeBaseMethodAddress instruction.
7599 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7600 int32_t relative_position =
7601 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
7602
7603 // Patch in the right value.
7604 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7605 }
7606
7607 // Location in constant area that the fixup refers to.
7608 int32_t offset_into_constant_area_;
7609 };
7610
7611 /**
7612 * Class to handle late fixup of offsets to a jump table that will be created in the
7613 * constant area.
7614 */
7615 class JumpTableRIPFixup : public RIPFixup {
7616 public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)7617 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
7618 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
7619 switch_instr_(switch_instr) {}
7620
CreateJumpTable()7621 void CreateJumpTable() {
7622 X86Assembler* assembler = codegen_->GetAssembler();
7623
7624 // Ensure that the reference to the jump table has the correct offset.
7625 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7626 SetOffset(offset_in_constant_table);
7627
7628 // The label values in the jump table are computed relative to the
7629 // instruction addressing the constant area.
7630 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
7631
7632 // Populate the jump table with the correct values for the jump table.
7633 int32_t num_entries = switch_instr_->GetNumEntries();
7634 HBasicBlock* block = switch_instr_->GetBlock();
7635 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7636 // The value that we want is the target offset - the position of the table.
7637 for (int32_t i = 0; i < num_entries; i++) {
7638 HBasicBlock* b = successors[i];
7639 Label* l = codegen_->GetLabelOf(b);
7640 DCHECK(l->IsBound());
7641 int32_t offset_to_block = l->Position() - relative_offset;
7642 assembler->AppendInt32(offset_to_block);
7643 }
7644 }
7645
7646 private:
7647 const HX86PackedSwitch* switch_instr_;
7648 };
7649
Finalize(CodeAllocator * allocator)7650 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
7651 // Generate the constant area if needed.
7652 X86Assembler* assembler = GetAssembler();
7653 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7654 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
7655 // byte values.
7656 assembler->Align(4, 0);
7657 constant_area_start_ = assembler->CodeSize();
7658
7659 // Populate any jump tables.
7660 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
7661 jump_table->CreateJumpTable();
7662 }
7663
7664 // And now add the constant area to the generated code.
7665 assembler->AddConstantArea();
7666 }
7667
7668 // And finish up.
7669 CodeGenerator::Finalize(allocator);
7670 }
7671
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)7672 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
7673 HX86ComputeBaseMethodAddress* method_base,
7674 Register reg) {
7675 AssemblerFixup* fixup =
7676 new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddDouble(v));
7677 return Address(reg, kDummy32BitOffset, fixup);
7678 }
7679
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)7680 Address CodeGeneratorX86::LiteralFloatAddress(float v,
7681 HX86ComputeBaseMethodAddress* method_base,
7682 Register reg) {
7683 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddFloat(v));
7684 return Address(reg, kDummy32BitOffset, fixup);
7685 }
7686
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)7687 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
7688 HX86ComputeBaseMethodAddress* method_base,
7689 Register reg) {
7690 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddInt32(v));
7691 return Address(reg, kDummy32BitOffset, fixup);
7692 }
7693
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)7694 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
7695 HX86ComputeBaseMethodAddress* method_base,
7696 Register reg) {
7697 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddInt64(v));
7698 return Address(reg, kDummy32BitOffset, fixup);
7699 }
7700
Load32BitValue(Register dest,int32_t value)7701 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
7702 if (value == 0) {
7703 __ xorl(dest, dest);
7704 } else {
7705 __ movl(dest, Immediate(value));
7706 }
7707 }
7708
Compare32BitValue(Register dest,int32_t value)7709 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
7710 if (value == 0) {
7711 __ testl(dest, dest);
7712 } else {
7713 __ cmpl(dest, Immediate(value));
7714 }
7715 }
7716
GenerateIntCompare(Location lhs,Location rhs)7717 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
7718 Register lhs_reg = lhs.AsRegister<Register>();
7719 GenerateIntCompare(lhs_reg, rhs);
7720 }
7721
GenerateIntCompare(Register lhs,Location rhs)7722 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
7723 if (rhs.IsConstant()) {
7724 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
7725 Compare32BitValue(lhs, value);
7726 } else if (rhs.IsStackSlot()) {
7727 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
7728 } else {
7729 __ cmpl(lhs, rhs.AsRegister<Register>());
7730 }
7731 }
7732
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)7733 Address CodeGeneratorX86::ArrayAddress(Register obj,
7734 Location index,
7735 ScaleFactor scale,
7736 uint32_t data_offset) {
7737 return index.IsConstant() ?
7738 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
7739 Address(obj, index.AsRegister<Register>(), scale, data_offset);
7740 }
7741
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)7742 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
7743 Register reg,
7744 Register value) {
7745 // Create a fixup to be used to create and address the jump table.
7746 JumpTableRIPFixup* table_fixup =
7747 new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
7748
7749 // We have to populate the jump tables.
7750 fixups_to_jump_tables_.push_back(table_fixup);
7751
7752 // We want a scaled address, as we are extracting the correct offset from the table.
7753 return Address(reg, value, TIMES_4, kDummy32BitOffset, table_fixup);
7754 }
7755
7756 // TODO: target as memory.
MoveFromReturnRegister(Location target,Primitive::Type type)7757 void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type type) {
7758 if (!target.IsValid()) {
7759 DCHECK_EQ(type, Primitive::kPrimVoid);
7760 return;
7761 }
7762
7763 DCHECK_NE(type, Primitive::kPrimVoid);
7764
7765 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
7766 if (target.Equals(return_loc)) {
7767 return;
7768 }
7769
7770 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
7771 // with the else branch.
7772 if (type == Primitive::kPrimLong) {
7773 HParallelMove parallel_move(GetGraph()->GetArena());
7774 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), Primitive::kPrimInt, nullptr);
7775 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), Primitive::kPrimInt, nullptr);
7776 GetMoveResolver()->EmitNativeCode(¶llel_move);
7777 } else {
7778 // Let the parallel move resolver take care of all of this.
7779 HParallelMove parallel_move(GetGraph()->GetArena());
7780 parallel_move.AddMove(return_loc, target, type, nullptr);
7781 GetMoveResolver()->EmitNativeCode(¶llel_move);
7782 }
7783 }
7784
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7785 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
7786 const uint8_t* roots_data,
7787 const PatchInfo<Label>& info,
7788 uint64_t index_in_table) const {
7789 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
7790 uintptr_t address =
7791 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7792 typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
7793 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
7794 dchecked_integral_cast<uint32_t>(address);
7795 }
7796
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)7797 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7798 for (const PatchInfo<Label>& info : jit_string_patches_) {
7799 const auto it = jit_string_roots_.find(
7800 StringReference(&info.dex_file, dex::StringIndex(info.index)));
7801 DCHECK(it != jit_string_roots_.end());
7802 uint64_t index_in_table = it->second;
7803 PatchJitRootUse(code, roots_data, info, index_in_table);
7804 }
7805
7806 for (const PatchInfo<Label>& info : jit_class_patches_) {
7807 const auto it = jit_class_roots_.find(
7808 TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
7809 DCHECK(it != jit_class_roots_.end());
7810 uint64_t index_in_table = it->second;
7811 PatchJitRootUse(code, roots_data, info, index_in_table);
7812 }
7813 }
7814
7815 #undef __
7816
7817 } // namespace x86
7818 } // namespace art
7819