1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "art_method.h"
20 #include "class_table.h"
21 #include "code_generator_utils.h"
22 #include "compiled_method.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "entrypoints/quick/quick_entrypoints_enum.h"
25 #include "gc/accounting/card_table.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_x86.h"
29 #include "linker/linker_patch.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/class-inl.h"
33 #include "thread.h"
34 #include "utils/assembler.h"
35 #include "utils/stack_checks.h"
36 #include "utils/x86/assembler_x86.h"
37 #include "utils/x86/managed_register_x86.h"
38
39 namespace art {
40
41 template<class MirrorType>
42 class GcRoot;
43
44 namespace x86 {
45
46 static constexpr int kCurrentMethodStackOffset = 0;
47 static constexpr Register kMethodRegisterArgument = EAX;
48 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
49
50 static constexpr int kC2ConditionMask = 0x400;
51
52 static constexpr int kFakeReturnRegister = Register(8);
53
54 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
55 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
56 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
57
58 class NullCheckSlowPathX86 : public SlowPathCode {
59 public:
NullCheckSlowPathX86(HNullCheck * instruction)60 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
61
EmitNativeCode(CodeGenerator * codegen)62 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
63 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
64 __ Bind(GetEntryLabel());
65 if (instruction_->CanThrowIntoCatchBlock()) {
66 // Live registers will be restored in the catch block if caught.
67 SaveLiveRegisters(codegen, instruction_->GetLocations());
68 }
69 x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
70 instruction_,
71 instruction_->GetDexPc(),
72 this);
73 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
74 }
75
IsFatal() const76 bool IsFatal() const OVERRIDE { return true; }
77
GetDescription() const78 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86"; }
79
80 private:
81 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
82 };
83
84 class DivZeroCheckSlowPathX86 : public SlowPathCode {
85 public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)86 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
87
EmitNativeCode(CodeGenerator * codegen)88 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
89 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
90 __ Bind(GetEntryLabel());
91 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
92 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
93 }
94
IsFatal() const95 bool IsFatal() const OVERRIDE { return true; }
96
GetDescription() const97 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86"; }
98
99 private:
100 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
101 };
102
103 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
104 public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)105 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
106 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
107
EmitNativeCode(CodeGenerator * codegen)108 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
109 __ Bind(GetEntryLabel());
110 if (is_div_) {
111 __ negl(reg_);
112 } else {
113 __ movl(reg_, Immediate(0));
114 }
115 __ jmp(GetExitLabel());
116 }
117
GetDescription() const118 const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86"; }
119
120 private:
121 Register reg_;
122 bool is_div_;
123 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
124 };
125
126 class BoundsCheckSlowPathX86 : public SlowPathCode {
127 public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)128 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
129
EmitNativeCode(CodeGenerator * codegen)130 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
131 LocationSummary* locations = instruction_->GetLocations();
132 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
133 __ Bind(GetEntryLabel());
134 // We're moving two locations to locations that could overlap, so we need a parallel
135 // move resolver.
136 if (instruction_->CanThrowIntoCatchBlock()) {
137 // Live registers will be restored in the catch block if caught.
138 SaveLiveRegisters(codegen, instruction_->GetLocations());
139 }
140
141 // Are we using an array length from memory?
142 HInstruction* array_length = instruction_->InputAt(1);
143 Location length_loc = locations->InAt(1);
144 InvokeRuntimeCallingConvention calling_convention;
145 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
146 // Load the array length into our temporary.
147 HArrayLength* length = array_length->AsArrayLength();
148 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
149 Location array_loc = array_length->GetLocations()->InAt(0);
150 Address array_len(array_loc.AsRegister<Register>(), len_offset);
151 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
152 // Check for conflicts with index.
153 if (length_loc.Equals(locations->InAt(0))) {
154 // We know we aren't using parameter 2.
155 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
156 }
157 __ movl(length_loc.AsRegister<Register>(), array_len);
158 if (mirror::kUseStringCompression && length->IsStringLength()) {
159 __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
160 }
161 }
162 x86_codegen->EmitParallelMoves(
163 locations->InAt(0),
164 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
165 DataType::Type::kInt32,
166 length_loc,
167 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
168 DataType::Type::kInt32);
169 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
170 ? kQuickThrowStringBounds
171 : kQuickThrowArrayBounds;
172 x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
173 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
174 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
175 }
176
IsFatal() const177 bool IsFatal() const OVERRIDE { return true; }
178
GetDescription() const179 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86"; }
180
181 private:
182 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
183 };
184
185 class SuspendCheckSlowPathX86 : public SlowPathCode {
186 public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)187 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
188 : SlowPathCode(instruction), successor_(successor) {}
189
EmitNativeCode(CodeGenerator * codegen)190 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
191 LocationSummary* locations = instruction_->GetLocations();
192 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
193 __ Bind(GetEntryLabel());
194 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
195 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
196 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
197 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
198 if (successor_ == nullptr) {
199 __ jmp(GetReturnLabel());
200 } else {
201 __ jmp(x86_codegen->GetLabelOf(successor_));
202 }
203 }
204
GetReturnLabel()205 Label* GetReturnLabel() {
206 DCHECK(successor_ == nullptr);
207 return &return_label_;
208 }
209
GetSuccessor() const210 HBasicBlock* GetSuccessor() const {
211 return successor_;
212 }
213
GetDescription() const214 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86"; }
215
216 private:
217 HBasicBlock* const successor_;
218 Label return_label_;
219
220 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
221 };
222
223 class LoadStringSlowPathX86 : public SlowPathCode {
224 public:
LoadStringSlowPathX86(HLoadString * instruction)225 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
226
EmitNativeCode(CodeGenerator * codegen)227 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
228 LocationSummary* locations = instruction_->GetLocations();
229 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
230
231 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
232 __ Bind(GetEntryLabel());
233 SaveLiveRegisters(codegen, locations);
234
235 InvokeRuntimeCallingConvention calling_convention;
236 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
237 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
238 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
239 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
240 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
241 RestoreLiveRegisters(codegen, locations);
242
243 __ jmp(GetExitLabel());
244 }
245
GetDescription() const246 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; }
247
248 private:
249 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
250 };
251
252 class LoadClassSlowPathX86 : public SlowPathCode {
253 public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)254 LoadClassSlowPathX86(HLoadClass* cls,
255 HInstruction* at,
256 uint32_t dex_pc,
257 bool do_clinit)
258 : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
259 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
260 }
261
EmitNativeCode(CodeGenerator * codegen)262 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
263 LocationSummary* locations = instruction_->GetLocations();
264 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
265 __ Bind(GetEntryLabel());
266 SaveLiveRegisters(codegen, locations);
267
268 InvokeRuntimeCallingConvention calling_convention;
269 dex::TypeIndex type_index = cls_->GetTypeIndex();
270 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
271 x86_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage
272 : kQuickInitializeType,
273 instruction_,
274 dex_pc_,
275 this);
276 if (do_clinit_) {
277 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
278 } else {
279 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
280 }
281
282 // Move the class to the desired location.
283 Location out = locations->Out();
284 if (out.IsValid()) {
285 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
286 x86_codegen->Move32(out, Location::RegisterLocation(EAX));
287 }
288 RestoreLiveRegisters(codegen, locations);
289 __ jmp(GetExitLabel());
290 }
291
GetDescription() const292 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86"; }
293
294 private:
295 // The class this slow path will load.
296 HLoadClass* const cls_;
297
298 // The dex PC of `at_`.
299 const uint32_t dex_pc_;
300
301 // Whether to initialize the class.
302 const bool do_clinit_;
303
304 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
305 };
306
307 class TypeCheckSlowPathX86 : public SlowPathCode {
308 public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)309 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
310 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
311
EmitNativeCode(CodeGenerator * codegen)312 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
313 LocationSummary* locations = instruction_->GetLocations();
314 DCHECK(instruction_->IsCheckCast()
315 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
316
317 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
318 __ Bind(GetEntryLabel());
319
320 if (kPoisonHeapReferences &&
321 instruction_->IsCheckCast() &&
322 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
323 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
324 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
325 }
326
327 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
328 SaveLiveRegisters(codegen, locations);
329 }
330
331 // We're moving two locations to locations that could overlap, so we need a parallel
332 // move resolver.
333 InvokeRuntimeCallingConvention calling_convention;
334 x86_codegen->EmitParallelMoves(locations->InAt(0),
335 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
336 DataType::Type::kReference,
337 locations->InAt(1),
338 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
339 DataType::Type::kReference);
340 if (instruction_->IsInstanceOf()) {
341 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
342 instruction_,
343 instruction_->GetDexPc(),
344 this);
345 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
346 } else {
347 DCHECK(instruction_->IsCheckCast());
348 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
349 instruction_,
350 instruction_->GetDexPc(),
351 this);
352 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
353 }
354
355 if (!is_fatal_) {
356 if (instruction_->IsInstanceOf()) {
357 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
358 }
359 RestoreLiveRegisters(codegen, locations);
360
361 __ jmp(GetExitLabel());
362 }
363 }
364
GetDescription() const365 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86"; }
IsFatal() const366 bool IsFatal() const OVERRIDE { return is_fatal_; }
367
368 private:
369 const bool is_fatal_;
370
371 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
372 };
373
374 class DeoptimizationSlowPathX86 : public SlowPathCode {
375 public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)376 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
377 : SlowPathCode(instruction) {}
378
EmitNativeCode(CodeGenerator * codegen)379 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
380 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
381 __ Bind(GetEntryLabel());
382 LocationSummary* locations = instruction_->GetLocations();
383 SaveLiveRegisters(codegen, locations);
384 InvokeRuntimeCallingConvention calling_convention;
385 x86_codegen->Load32BitValue(
386 calling_convention.GetRegisterAt(0),
387 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
388 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
389 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
390 }
391
GetDescription() const392 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
393
394 private:
395 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
396 };
397
398 class ArraySetSlowPathX86 : public SlowPathCode {
399 public:
ArraySetSlowPathX86(HInstruction * instruction)400 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
401
EmitNativeCode(CodeGenerator * codegen)402 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
403 LocationSummary* locations = instruction_->GetLocations();
404 __ Bind(GetEntryLabel());
405 SaveLiveRegisters(codegen, locations);
406
407 InvokeRuntimeCallingConvention calling_convention;
408 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
409 parallel_move.AddMove(
410 locations->InAt(0),
411 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
412 DataType::Type::kReference,
413 nullptr);
414 parallel_move.AddMove(
415 locations->InAt(1),
416 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
417 DataType::Type::kInt32,
418 nullptr);
419 parallel_move.AddMove(
420 locations->InAt(2),
421 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
422 DataType::Type::kReference,
423 nullptr);
424 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
425
426 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
427 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
428 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
429 RestoreLiveRegisters(codegen, locations);
430 __ jmp(GetExitLabel());
431 }
432
GetDescription() const433 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; }
434
435 private:
436 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
437 };
438
439 // Slow path marking an object reference `ref` during a read
440 // barrier. The field `obj.field` in the object `obj` holding this
441 // reference does not get updated by this slow path after marking (see
442 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
443 //
444 // This means that after the execution of this slow path, `ref` will
445 // always be up-to-date, but `obj.field` may not; i.e., after the
446 // flip, `ref` will be a to-space reference, but `obj.field` will
447 // probably still be a from-space reference (unless it gets updated by
448 // another thread, or if another thread installed another object
449 // reference (different from `ref`) in `obj.field`).
450 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
451 public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)452 ReadBarrierMarkSlowPathX86(HInstruction* instruction,
453 Location ref,
454 bool unpoison_ref_before_marking)
455 : SlowPathCode(instruction),
456 ref_(ref),
457 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
458 DCHECK(kEmitCompilerReadBarrier);
459 }
460
GetDescription() const461 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; }
462
EmitNativeCode(CodeGenerator * codegen)463 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
464 LocationSummary* locations = instruction_->GetLocations();
465 Register ref_reg = ref_.AsRegister<Register>();
466 DCHECK(locations->CanCall());
467 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
468 DCHECK(instruction_->IsInstanceFieldGet() ||
469 instruction_->IsStaticFieldGet() ||
470 instruction_->IsArrayGet() ||
471 instruction_->IsArraySet() ||
472 instruction_->IsLoadClass() ||
473 instruction_->IsLoadString() ||
474 instruction_->IsInstanceOf() ||
475 instruction_->IsCheckCast() ||
476 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
477 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
478 << "Unexpected instruction in read barrier marking slow path: "
479 << instruction_->DebugName();
480
481 __ Bind(GetEntryLabel());
482 if (unpoison_ref_before_marking_) {
483 // Object* ref = ref_addr->AsMirrorPtr()
484 __ MaybeUnpoisonHeapReference(ref_reg);
485 }
486 // No need to save live registers; it's taken care of by the
487 // entrypoint. Also, there is no need to update the stack mask,
488 // as this runtime call will not trigger a garbage collection.
489 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
490 DCHECK_NE(ref_reg, ESP);
491 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
492 // "Compact" slow path, saving two moves.
493 //
494 // Instead of using the standard runtime calling convention (input
495 // and output in EAX):
496 //
497 // EAX <- ref
498 // EAX <- ReadBarrierMark(EAX)
499 // ref <- EAX
500 //
501 // we just use rX (the register containing `ref`) as input and output
502 // of a dedicated entrypoint:
503 //
504 // rX <- ReadBarrierMarkRegX(rX)
505 //
506 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
507 // This runtime call does not require a stack map.
508 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
509 __ jmp(GetExitLabel());
510 }
511
512 private:
513 // The location (register) of the marked object reference.
514 const Location ref_;
515 // Should the reference in `ref_` be unpoisoned prior to marking it?
516 const bool unpoison_ref_before_marking_;
517
518 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
519 };
520
521 // Slow path marking an object reference `ref` during a read barrier,
522 // and if needed, atomically updating the field `obj.field` in the
523 // object `obj` holding this reference after marking (contrary to
524 // ReadBarrierMarkSlowPathX86 above, which never tries to update
525 // `obj.field`).
526 //
527 // This means that after the execution of this slow path, both `ref`
528 // and `obj.field` will be up-to-date; i.e., after the flip, both will
529 // hold the same to-space reference (unless another thread installed
530 // another object reference (different from `ref`) in `obj.field`).
531 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
532 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)533 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
534 Location ref,
535 Register obj,
536 const Address& field_addr,
537 bool unpoison_ref_before_marking,
538 Register temp)
539 : SlowPathCode(instruction),
540 ref_(ref),
541 obj_(obj),
542 field_addr_(field_addr),
543 unpoison_ref_before_marking_(unpoison_ref_before_marking),
544 temp_(temp) {
545 DCHECK(kEmitCompilerReadBarrier);
546 }
547
GetDescription() const548 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
549
EmitNativeCode(CodeGenerator * codegen)550 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
551 LocationSummary* locations = instruction_->GetLocations();
552 Register ref_reg = ref_.AsRegister<Register>();
553 DCHECK(locations->CanCall());
554 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
555 // This slow path is only used by the UnsafeCASObject intrinsic.
556 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
557 << "Unexpected instruction in read barrier marking and field updating slow path: "
558 << instruction_->DebugName();
559 DCHECK(instruction_->GetLocations()->Intrinsified());
560 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
561
562 __ Bind(GetEntryLabel());
563 if (unpoison_ref_before_marking_) {
564 // Object* ref = ref_addr->AsMirrorPtr()
565 __ MaybeUnpoisonHeapReference(ref_reg);
566 }
567
568 // Save the old (unpoisoned) reference.
569 __ movl(temp_, ref_reg);
570
571 // No need to save live registers; it's taken care of by the
572 // entrypoint. Also, there is no need to update the stack mask,
573 // as this runtime call will not trigger a garbage collection.
574 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
575 DCHECK_NE(ref_reg, ESP);
576 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
577 // "Compact" slow path, saving two moves.
578 //
579 // Instead of using the standard runtime calling convention (input
580 // and output in EAX):
581 //
582 // EAX <- ref
583 // EAX <- ReadBarrierMark(EAX)
584 // ref <- EAX
585 //
586 // we just use rX (the register containing `ref`) as input and output
587 // of a dedicated entrypoint:
588 //
589 // rX <- ReadBarrierMarkRegX(rX)
590 //
591 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
592 // This runtime call does not require a stack map.
593 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
594
595 // If the new reference is different from the old reference,
596 // update the field in the holder (`*field_addr`).
597 //
598 // Note that this field could also hold a different object, if
599 // another thread had concurrently changed it. In that case, the
600 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
601 // operation below would abort the CAS, leaving the field as-is.
602 NearLabel done;
603 __ cmpl(temp_, ref_reg);
604 __ j(kEqual, &done);
605
606 // Update the the holder's field atomically. This may fail if
607 // mutator updates before us, but it's OK. This is achieved
608 // using a strong compare-and-set (CAS) operation with relaxed
609 // memory synchronization ordering, where the expected value is
610 // the old reference and the desired value is the new reference.
611 // This operation is implemented with a 32-bit LOCK CMPXLCHG
612 // instruction, which requires the expected value (the old
613 // reference) to be in EAX. Save EAX beforehand, and move the
614 // expected value (stored in `temp_`) into EAX.
615 __ pushl(EAX);
616 __ movl(EAX, temp_);
617
618 // Convenience aliases.
619 Register base = obj_;
620 Register expected = EAX;
621 Register value = ref_reg;
622
623 bool base_equals_value = (base == value);
624 if (kPoisonHeapReferences) {
625 if (base_equals_value) {
626 // If `base` and `value` are the same register location, move
627 // `value` to a temporary register. This way, poisoning
628 // `value` won't invalidate `base`.
629 value = temp_;
630 __ movl(value, base);
631 }
632
633 // Check that the register allocator did not assign the location
634 // of `expected` (EAX) to `value` nor to `base`, so that heap
635 // poisoning (when enabled) works as intended below.
636 // - If `value` were equal to `expected`, both references would
637 // be poisoned twice, meaning they would not be poisoned at
638 // all, as heap poisoning uses address negation.
639 // - If `base` were equal to `expected`, poisoning `expected`
640 // would invalidate `base`.
641 DCHECK_NE(value, expected);
642 DCHECK_NE(base, expected);
643
644 __ PoisonHeapReference(expected);
645 __ PoisonHeapReference(value);
646 }
647
648 __ LockCmpxchgl(field_addr_, value);
649
650 // If heap poisoning is enabled, we need to unpoison the values
651 // that were poisoned earlier.
652 if (kPoisonHeapReferences) {
653 if (base_equals_value) {
654 // `value` has been moved to a temporary register, no need
655 // to unpoison it.
656 } else {
657 __ UnpoisonHeapReference(value);
658 }
659 // No need to unpoison `expected` (EAX), as it is be overwritten below.
660 }
661
662 // Restore EAX.
663 __ popl(EAX);
664
665 __ Bind(&done);
666 __ jmp(GetExitLabel());
667 }
668
669 private:
670 // The location (register) of the marked object reference.
671 const Location ref_;
672 // The register containing the object holding the marked object reference field.
673 const Register obj_;
674 // The address of the marked reference field. The base of this address must be `obj_`.
675 const Address field_addr_;
676
677 // Should the reference in `ref_` be unpoisoned prior to marking it?
678 const bool unpoison_ref_before_marking_;
679
680 const Register temp_;
681
682 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
683 };
684
685 // Slow path generating a read barrier for a heap reference.
686 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
687 public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)688 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
689 Location out,
690 Location ref,
691 Location obj,
692 uint32_t offset,
693 Location index)
694 : SlowPathCode(instruction),
695 out_(out),
696 ref_(ref),
697 obj_(obj),
698 offset_(offset),
699 index_(index) {
700 DCHECK(kEmitCompilerReadBarrier);
701 // If `obj` is equal to `out` or `ref`, it means the initial object
702 // has been overwritten by (or after) the heap object reference load
703 // to be instrumented, e.g.:
704 //
705 // __ movl(out, Address(out, offset));
706 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
707 //
708 // In that case, we have lost the information about the original
709 // object, and the emitted read barrier cannot work properly.
710 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
711 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
712 }
713
EmitNativeCode(CodeGenerator * codegen)714 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
715 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
716 LocationSummary* locations = instruction_->GetLocations();
717 Register reg_out = out_.AsRegister<Register>();
718 DCHECK(locations->CanCall());
719 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
720 DCHECK(instruction_->IsInstanceFieldGet() ||
721 instruction_->IsStaticFieldGet() ||
722 instruction_->IsArrayGet() ||
723 instruction_->IsInstanceOf() ||
724 instruction_->IsCheckCast() ||
725 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
726 << "Unexpected instruction in read barrier for heap reference slow path: "
727 << instruction_->DebugName();
728
729 __ Bind(GetEntryLabel());
730 SaveLiveRegisters(codegen, locations);
731
732 // We may have to change the index's value, but as `index_` is a
733 // constant member (like other "inputs" of this slow path),
734 // introduce a copy of it, `index`.
735 Location index = index_;
736 if (index_.IsValid()) {
737 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
738 if (instruction_->IsArrayGet()) {
739 // Compute the actual memory offset and store it in `index`.
740 Register index_reg = index_.AsRegister<Register>();
741 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
742 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
743 // We are about to change the value of `index_reg` (see the
744 // calls to art::x86::X86Assembler::shll and
745 // art::x86::X86Assembler::AddImmediate below), but it has
746 // not been saved by the previous call to
747 // art::SlowPathCode::SaveLiveRegisters, as it is a
748 // callee-save register --
749 // art::SlowPathCode::SaveLiveRegisters does not consider
750 // callee-save registers, as it has been designed with the
751 // assumption that callee-save registers are supposed to be
752 // handled by the called function. So, as a callee-save
753 // register, `index_reg` _would_ eventually be saved onto
754 // the stack, but it would be too late: we would have
755 // changed its value earlier. Therefore, we manually save
756 // it here into another freely available register,
757 // `free_reg`, chosen of course among the caller-save
758 // registers (as a callee-save `free_reg` register would
759 // exhibit the same problem).
760 //
761 // Note we could have requested a temporary register from
762 // the register allocator instead; but we prefer not to, as
763 // this is a slow path, and we know we can find a
764 // caller-save register that is available.
765 Register free_reg = FindAvailableCallerSaveRegister(codegen);
766 __ movl(free_reg, index_reg);
767 index_reg = free_reg;
768 index = Location::RegisterLocation(index_reg);
769 } else {
770 // The initial register stored in `index_` has already been
771 // saved in the call to art::SlowPathCode::SaveLiveRegisters
772 // (as it is not a callee-save register), so we can freely
773 // use it.
774 }
775 // Shifting the index value contained in `index_reg` by the scale
776 // factor (2) cannot overflow in practice, as the runtime is
777 // unable to allocate object arrays with a size larger than
778 // 2^26 - 1 (that is, 2^28 - 4 bytes).
779 __ shll(index_reg, Immediate(TIMES_4));
780 static_assert(
781 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
782 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
783 __ AddImmediate(index_reg, Immediate(offset_));
784 } else {
785 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
786 // intrinsics, `index_` is not shifted by a scale factor of 2
787 // (as in the case of ArrayGet), as it is actually an offset
788 // to an object field within an object.
789 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
790 DCHECK(instruction_->GetLocations()->Intrinsified());
791 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
792 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
793 << instruction_->AsInvoke()->GetIntrinsic();
794 DCHECK_EQ(offset_, 0U);
795 DCHECK(index_.IsRegisterPair());
796 // UnsafeGet's offset location is a register pair, the low
797 // part contains the correct offset.
798 index = index_.ToLow();
799 }
800 }
801
802 // We're moving two or three locations to locations that could
803 // overlap, so we need a parallel move resolver.
804 InvokeRuntimeCallingConvention calling_convention;
805 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
806 parallel_move.AddMove(ref_,
807 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
808 DataType::Type::kReference,
809 nullptr);
810 parallel_move.AddMove(obj_,
811 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
812 DataType::Type::kReference,
813 nullptr);
814 if (index.IsValid()) {
815 parallel_move.AddMove(index,
816 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
817 DataType::Type::kInt32,
818 nullptr);
819 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
820 } else {
821 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
822 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
823 }
824 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
825 CheckEntrypointTypes<
826 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
827 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
828
829 RestoreLiveRegisters(codegen, locations);
830 __ jmp(GetExitLabel());
831 }
832
GetDescription() const833 const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86"; }
834
835 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)836 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
837 size_t ref = static_cast<int>(ref_.AsRegister<Register>());
838 size_t obj = static_cast<int>(obj_.AsRegister<Register>());
839 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
840 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
841 return static_cast<Register>(i);
842 }
843 }
844 // We shall never fail to find a free caller-save register, as
845 // there are more than two core caller-save registers on x86
846 // (meaning it is possible to find one which is different from
847 // `ref` and `obj`).
848 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
849 LOG(FATAL) << "Could not find a free caller-save register";
850 UNREACHABLE();
851 }
852
853 const Location out_;
854 const Location ref_;
855 const Location obj_;
856 const uint32_t offset_;
857 // An additional location containing an index to an array.
858 // Only used for HArrayGet and the UnsafeGetObject &
859 // UnsafeGetObjectVolatile intrinsics.
860 const Location index_;
861
862 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
863 };
864
865 // Slow path generating a read barrier for a GC root.
866 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
867 public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)868 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
869 : SlowPathCode(instruction), out_(out), root_(root) {
870 DCHECK(kEmitCompilerReadBarrier);
871 }
872
EmitNativeCode(CodeGenerator * codegen)873 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
874 LocationSummary* locations = instruction_->GetLocations();
875 Register reg_out = out_.AsRegister<Register>();
876 DCHECK(locations->CanCall());
877 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
878 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
879 << "Unexpected instruction in read barrier for GC root slow path: "
880 << instruction_->DebugName();
881
882 __ Bind(GetEntryLabel());
883 SaveLiveRegisters(codegen, locations);
884
885 InvokeRuntimeCallingConvention calling_convention;
886 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
887 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
888 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
889 instruction_,
890 instruction_->GetDexPc(),
891 this);
892 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
893 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
894
895 RestoreLiveRegisters(codegen, locations);
896 __ jmp(GetExitLabel());
897 }
898
GetDescription() const899 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; }
900
901 private:
902 const Location out_;
903 const Location root_;
904
905 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
906 };
907
908 #undef __
909 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
910 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
911
X86Condition(IfCondition cond)912 inline Condition X86Condition(IfCondition cond) {
913 switch (cond) {
914 case kCondEQ: return kEqual;
915 case kCondNE: return kNotEqual;
916 case kCondLT: return kLess;
917 case kCondLE: return kLessEqual;
918 case kCondGT: return kGreater;
919 case kCondGE: return kGreaterEqual;
920 case kCondB: return kBelow;
921 case kCondBE: return kBelowEqual;
922 case kCondA: return kAbove;
923 case kCondAE: return kAboveEqual;
924 }
925 LOG(FATAL) << "Unreachable";
926 UNREACHABLE();
927 }
928
929 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)930 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
931 switch (cond) {
932 case kCondEQ: return kEqual;
933 case kCondNE: return kNotEqual;
934 // Signed to unsigned, and FP to x86 name.
935 case kCondLT: return kBelow;
936 case kCondLE: return kBelowEqual;
937 case kCondGT: return kAbove;
938 case kCondGE: return kAboveEqual;
939 // Unsigned remain unchanged.
940 case kCondB: return kBelow;
941 case kCondBE: return kBelowEqual;
942 case kCondA: return kAbove;
943 case kCondAE: return kAboveEqual;
944 }
945 LOG(FATAL) << "Unreachable";
946 UNREACHABLE();
947 }
948
DumpCoreRegister(std::ostream & stream,int reg) const949 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
950 stream << Register(reg);
951 }
952
DumpFloatingPointRegister(std::ostream & stream,int reg) const953 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
954 stream << XmmRegister(reg);
955 }
956
SaveCoreRegister(size_t stack_index,uint32_t reg_id)957 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
958 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
959 return kX86WordSize;
960 }
961
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)962 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
963 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
964 return kX86WordSize;
965 }
966
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)967 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
968 if (GetGraph()->HasSIMD()) {
969 __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
970 } else {
971 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
972 }
973 return GetFloatingPointSpillSlotSize();
974 }
975
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)976 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
977 if (GetGraph()->HasSIMD()) {
978 __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
979 } else {
980 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
981 }
982 return GetFloatingPointSpillSlotSize();
983 }
984
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)985 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
986 HInstruction* instruction,
987 uint32_t dex_pc,
988 SlowPathCode* slow_path) {
989 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
990 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
991 if (EntrypointRequiresStackMap(entrypoint)) {
992 RecordPcInfo(instruction, dex_pc, slow_path);
993 }
994 }
995
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)996 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
997 HInstruction* instruction,
998 SlowPathCode* slow_path) {
999 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1000 GenerateInvokeRuntime(entry_point_offset);
1001 }
1002
GenerateInvokeRuntime(int32_t entry_point_offset)1003 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1004 __ fs()->call(Address::Absolute(entry_point_offset));
1005 }
1006
CodeGeneratorX86(HGraph * graph,const X86InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1007 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1008 const X86InstructionSetFeatures& isa_features,
1009 const CompilerOptions& compiler_options,
1010 OptimizingCompilerStats* stats)
1011 : CodeGenerator(graph,
1012 kNumberOfCpuRegisters,
1013 kNumberOfXmmRegisters,
1014 kNumberOfRegisterPairs,
1015 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1016 arraysize(kCoreCalleeSaves))
1017 | (1 << kFakeReturnRegister),
1018 0,
1019 compiler_options,
1020 stats),
1021 block_labels_(nullptr),
1022 location_builder_(graph, this),
1023 instruction_visitor_(graph, this),
1024 move_resolver_(graph->GetAllocator(), this),
1025 assembler_(graph->GetAllocator()),
1026 isa_features_(isa_features),
1027 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1028 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1029 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1030 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1031 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1032 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1033 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1034 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1035 constant_area_start_(-1),
1036 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1037 method_address_offset_(std::less<uint32_t>(),
1038 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1039 // Use a fake return address register to mimic Quick.
1040 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1041 }
1042
SetupBlockedRegisters() const1043 void CodeGeneratorX86::SetupBlockedRegisters() const {
1044 // Stack register is always reserved.
1045 blocked_core_registers_[ESP] = true;
1046 }
1047
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1048 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1049 : InstructionCodeGenerator(graph, codegen),
1050 assembler_(codegen->GetAssembler()),
1051 codegen_(codegen) {}
1052
DWARFReg(Register reg)1053 static dwarf::Reg DWARFReg(Register reg) {
1054 return dwarf::Reg::X86Core(static_cast<int>(reg));
1055 }
1056
GenerateFrameEntry()1057 void CodeGeneratorX86::GenerateFrameEntry() {
1058 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
1059 __ Bind(&frame_entry_label_);
1060 bool skip_overflow_check =
1061 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1062 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1063
1064 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1065 __ addw(Address(kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()),
1066 Immediate(1));
1067 }
1068
1069 if (!skip_overflow_check) {
1070 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1071 __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1072 RecordPcInfo(nullptr, 0);
1073 }
1074
1075 if (HasEmptyFrame()) {
1076 return;
1077 }
1078
1079 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1080 Register reg = kCoreCalleeSaves[i];
1081 if (allocated_registers_.ContainsCoreRegister(reg)) {
1082 __ pushl(reg);
1083 __ cfi().AdjustCFAOffset(kX86WordSize);
1084 __ cfi().RelOffset(DWARFReg(reg), 0);
1085 }
1086 }
1087
1088 int adjust = GetFrameSize() - FrameEntrySpillSize();
1089 __ subl(ESP, Immediate(adjust));
1090 __ cfi().AdjustCFAOffset(adjust);
1091 // Save the current method if we need it. Note that we do not
1092 // do this in HCurrentMethod, as the instruction might have been removed
1093 // in the SSA graph.
1094 if (RequiresCurrentMethod()) {
1095 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1096 }
1097
1098 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1099 // Initialize should_deoptimize flag to 0.
1100 __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1101 }
1102 }
1103
GenerateFrameExit()1104 void CodeGeneratorX86::GenerateFrameExit() {
1105 __ cfi().RememberState();
1106 if (!HasEmptyFrame()) {
1107 int adjust = GetFrameSize() - FrameEntrySpillSize();
1108 __ addl(ESP, Immediate(adjust));
1109 __ cfi().AdjustCFAOffset(-adjust);
1110
1111 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1112 Register reg = kCoreCalleeSaves[i];
1113 if (allocated_registers_.ContainsCoreRegister(reg)) {
1114 __ popl(reg);
1115 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1116 __ cfi().Restore(DWARFReg(reg));
1117 }
1118 }
1119 }
1120 __ ret();
1121 __ cfi().RestoreState();
1122 __ cfi().DefCFAOffset(GetFrameSize());
1123 }
1124
Bind(HBasicBlock * block)1125 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1126 __ Bind(GetLabelOf(block));
1127 }
1128
GetReturnLocation(DataType::Type type) const1129 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1130 switch (type) {
1131 case DataType::Type::kReference:
1132 case DataType::Type::kBool:
1133 case DataType::Type::kUint8:
1134 case DataType::Type::kInt8:
1135 case DataType::Type::kUint16:
1136 case DataType::Type::kInt16:
1137 case DataType::Type::kUint32:
1138 case DataType::Type::kInt32:
1139 return Location::RegisterLocation(EAX);
1140
1141 case DataType::Type::kUint64:
1142 case DataType::Type::kInt64:
1143 return Location::RegisterPairLocation(EAX, EDX);
1144
1145 case DataType::Type::kVoid:
1146 return Location::NoLocation();
1147
1148 case DataType::Type::kFloat64:
1149 case DataType::Type::kFloat32:
1150 return Location::FpuRegisterLocation(XMM0);
1151 }
1152
1153 UNREACHABLE();
1154 }
1155
GetMethodLocation() const1156 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1157 return Location::RegisterLocation(kMethodRegisterArgument);
1158 }
1159
GetNextLocation(DataType::Type type)1160 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1161 switch (type) {
1162 case DataType::Type::kReference:
1163 case DataType::Type::kBool:
1164 case DataType::Type::kUint8:
1165 case DataType::Type::kInt8:
1166 case DataType::Type::kUint16:
1167 case DataType::Type::kInt16:
1168 case DataType::Type::kInt32: {
1169 uint32_t index = gp_index_++;
1170 stack_index_++;
1171 if (index < calling_convention.GetNumberOfRegisters()) {
1172 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1173 } else {
1174 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1175 }
1176 }
1177
1178 case DataType::Type::kInt64: {
1179 uint32_t index = gp_index_;
1180 gp_index_ += 2;
1181 stack_index_ += 2;
1182 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1183 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1184 calling_convention.GetRegisterPairAt(index));
1185 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1186 } else {
1187 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1188 }
1189 }
1190
1191 case DataType::Type::kFloat32: {
1192 uint32_t index = float_index_++;
1193 stack_index_++;
1194 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1195 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1196 } else {
1197 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1198 }
1199 }
1200
1201 case DataType::Type::kFloat64: {
1202 uint32_t index = float_index_++;
1203 stack_index_ += 2;
1204 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1205 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1206 } else {
1207 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1208 }
1209 }
1210
1211 case DataType::Type::kUint32:
1212 case DataType::Type::kUint64:
1213 case DataType::Type::kVoid:
1214 LOG(FATAL) << "Unexpected parameter type " << type;
1215 break;
1216 }
1217 return Location::NoLocation();
1218 }
1219
Move32(Location destination,Location source)1220 void CodeGeneratorX86::Move32(Location destination, Location source) {
1221 if (source.Equals(destination)) {
1222 return;
1223 }
1224 if (destination.IsRegister()) {
1225 if (source.IsRegister()) {
1226 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1227 } else if (source.IsFpuRegister()) {
1228 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1229 } else {
1230 DCHECK(source.IsStackSlot());
1231 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1232 }
1233 } else if (destination.IsFpuRegister()) {
1234 if (source.IsRegister()) {
1235 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1236 } else if (source.IsFpuRegister()) {
1237 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1238 } else {
1239 DCHECK(source.IsStackSlot());
1240 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1241 }
1242 } else {
1243 DCHECK(destination.IsStackSlot()) << destination;
1244 if (source.IsRegister()) {
1245 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1246 } else if (source.IsFpuRegister()) {
1247 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1248 } else if (source.IsConstant()) {
1249 HConstant* constant = source.GetConstant();
1250 int32_t value = GetInt32ValueOf(constant);
1251 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1252 } else {
1253 DCHECK(source.IsStackSlot());
1254 __ pushl(Address(ESP, source.GetStackIndex()));
1255 __ popl(Address(ESP, destination.GetStackIndex()));
1256 }
1257 }
1258 }
1259
Move64(Location destination,Location source)1260 void CodeGeneratorX86::Move64(Location destination, Location source) {
1261 if (source.Equals(destination)) {
1262 return;
1263 }
1264 if (destination.IsRegisterPair()) {
1265 if (source.IsRegisterPair()) {
1266 EmitParallelMoves(
1267 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1268 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1269 DataType::Type::kInt32,
1270 Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1271 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1272 DataType::Type::kInt32);
1273 } else if (source.IsFpuRegister()) {
1274 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1275 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1276 __ psrlq(src_reg, Immediate(32));
1277 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1278 } else {
1279 // No conflict possible, so just do the moves.
1280 DCHECK(source.IsDoubleStackSlot());
1281 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1282 __ movl(destination.AsRegisterPairHigh<Register>(),
1283 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1284 }
1285 } else if (destination.IsFpuRegister()) {
1286 if (source.IsFpuRegister()) {
1287 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1288 } else if (source.IsDoubleStackSlot()) {
1289 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1290 } else if (source.IsRegisterPair()) {
1291 size_t elem_size = DataType::Size(DataType::Type::kInt32);
1292 // Create stack space for 2 elements.
1293 __ subl(ESP, Immediate(2 * elem_size));
1294 __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
1295 __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
1296 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1297 // And remove the temporary stack space we allocated.
1298 __ addl(ESP, Immediate(2 * elem_size));
1299 } else {
1300 LOG(FATAL) << "Unimplemented";
1301 }
1302 } else {
1303 DCHECK(destination.IsDoubleStackSlot()) << destination;
1304 if (source.IsRegisterPair()) {
1305 // No conflict possible, so just do the moves.
1306 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1307 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1308 source.AsRegisterPairHigh<Register>());
1309 } else if (source.IsFpuRegister()) {
1310 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1311 } else if (source.IsConstant()) {
1312 HConstant* constant = source.GetConstant();
1313 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1314 int64_t value = GetInt64ValueOf(constant);
1315 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1316 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1317 Immediate(High32Bits(value)));
1318 } else {
1319 DCHECK(source.IsDoubleStackSlot()) << source;
1320 EmitParallelMoves(
1321 Location::StackSlot(source.GetStackIndex()),
1322 Location::StackSlot(destination.GetStackIndex()),
1323 DataType::Type::kInt32,
1324 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1325 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1326 DataType::Type::kInt32);
1327 }
1328 }
1329 }
1330
MoveConstant(Location location,int32_t value)1331 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1332 DCHECK(location.IsRegister());
1333 __ movl(location.AsRegister<Register>(), Immediate(value));
1334 }
1335
MoveLocation(Location dst,Location src,DataType::Type dst_type)1336 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1337 HParallelMove move(GetGraph()->GetAllocator());
1338 if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1339 move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1340 move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1341 } else {
1342 move.AddMove(src, dst, dst_type, nullptr);
1343 }
1344 GetMoveResolver()->EmitNativeCode(&move);
1345 }
1346
AddLocationAsTemp(Location location,LocationSummary * locations)1347 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1348 if (location.IsRegister()) {
1349 locations->AddTemp(location);
1350 } else if (location.IsRegisterPair()) {
1351 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1352 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1353 } else {
1354 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1355 }
1356 }
1357
HandleGoto(HInstruction * got,HBasicBlock * successor)1358 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1359 if (successor->IsExitBlock()) {
1360 DCHECK(got->GetPrevious()->AlwaysThrows());
1361 return; // no code needed
1362 }
1363
1364 HBasicBlock* block = got->GetBlock();
1365 HInstruction* previous = got->GetPrevious();
1366
1367 HLoopInformation* info = block->GetLoopInformation();
1368 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1369 if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
1370 __ pushl(EAX);
1371 __ movl(EAX, Address(ESP, kX86WordSize));
1372 __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1));
1373 __ popl(EAX);
1374 }
1375 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1376 return;
1377 }
1378
1379 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1380 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1381 }
1382 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1383 __ jmp(codegen_->GetLabelOf(successor));
1384 }
1385 }
1386
VisitGoto(HGoto * got)1387 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1388 got->SetLocations(nullptr);
1389 }
1390
VisitGoto(HGoto * got)1391 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1392 HandleGoto(got, got->GetSuccessor());
1393 }
1394
VisitTryBoundary(HTryBoundary * try_boundary)1395 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1396 try_boundary->SetLocations(nullptr);
1397 }
1398
VisitTryBoundary(HTryBoundary * try_boundary)1399 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1400 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1401 if (!successor->IsExitBlock()) {
1402 HandleGoto(try_boundary, successor);
1403 }
1404 }
1405
VisitExit(HExit * exit)1406 void LocationsBuilderX86::VisitExit(HExit* exit) {
1407 exit->SetLocations(nullptr);
1408 }
1409
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1410 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1411 }
1412
1413 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1414 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1415 LabelType* true_label,
1416 LabelType* false_label) {
1417 if (cond->IsFPConditionTrueIfNaN()) {
1418 __ j(kUnordered, true_label);
1419 } else if (cond->IsFPConditionFalseIfNaN()) {
1420 __ j(kUnordered, false_label);
1421 }
1422 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1423 }
1424
1425 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1426 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1427 LabelType* true_label,
1428 LabelType* false_label) {
1429 LocationSummary* locations = cond->GetLocations();
1430 Location left = locations->InAt(0);
1431 Location right = locations->InAt(1);
1432 IfCondition if_cond = cond->GetCondition();
1433
1434 Register left_high = left.AsRegisterPairHigh<Register>();
1435 Register left_low = left.AsRegisterPairLow<Register>();
1436 IfCondition true_high_cond = if_cond;
1437 IfCondition false_high_cond = cond->GetOppositeCondition();
1438 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part
1439
1440 // Set the conditions for the test, remembering that == needs to be
1441 // decided using the low words.
1442 switch (if_cond) {
1443 case kCondEQ:
1444 case kCondNE:
1445 // Nothing to do.
1446 break;
1447 case kCondLT:
1448 false_high_cond = kCondGT;
1449 break;
1450 case kCondLE:
1451 true_high_cond = kCondLT;
1452 break;
1453 case kCondGT:
1454 false_high_cond = kCondLT;
1455 break;
1456 case kCondGE:
1457 true_high_cond = kCondGT;
1458 break;
1459 case kCondB:
1460 false_high_cond = kCondA;
1461 break;
1462 case kCondBE:
1463 true_high_cond = kCondB;
1464 break;
1465 case kCondA:
1466 false_high_cond = kCondB;
1467 break;
1468 case kCondAE:
1469 true_high_cond = kCondA;
1470 break;
1471 }
1472
1473 if (right.IsConstant()) {
1474 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1475 int32_t val_high = High32Bits(value);
1476 int32_t val_low = Low32Bits(value);
1477
1478 codegen_->Compare32BitValue(left_high, val_high);
1479 if (if_cond == kCondNE) {
1480 __ j(X86Condition(true_high_cond), true_label);
1481 } else if (if_cond == kCondEQ) {
1482 __ j(X86Condition(false_high_cond), false_label);
1483 } else {
1484 __ j(X86Condition(true_high_cond), true_label);
1485 __ j(X86Condition(false_high_cond), false_label);
1486 }
1487 // Must be equal high, so compare the lows.
1488 codegen_->Compare32BitValue(left_low, val_low);
1489 } else if (right.IsRegisterPair()) {
1490 Register right_high = right.AsRegisterPairHigh<Register>();
1491 Register right_low = right.AsRegisterPairLow<Register>();
1492
1493 __ cmpl(left_high, right_high);
1494 if (if_cond == kCondNE) {
1495 __ j(X86Condition(true_high_cond), true_label);
1496 } else if (if_cond == kCondEQ) {
1497 __ j(X86Condition(false_high_cond), false_label);
1498 } else {
1499 __ j(X86Condition(true_high_cond), true_label);
1500 __ j(X86Condition(false_high_cond), false_label);
1501 }
1502 // Must be equal high, so compare the lows.
1503 __ cmpl(left_low, right_low);
1504 } else {
1505 DCHECK(right.IsDoubleStackSlot());
1506 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1507 if (if_cond == kCondNE) {
1508 __ j(X86Condition(true_high_cond), true_label);
1509 } else if (if_cond == kCondEQ) {
1510 __ j(X86Condition(false_high_cond), false_label);
1511 } else {
1512 __ j(X86Condition(true_high_cond), true_label);
1513 __ j(X86Condition(false_high_cond), false_label);
1514 }
1515 // Must be equal high, so compare the lows.
1516 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1517 }
1518 // The last comparison might be unsigned.
1519 __ j(final_condition, true_label);
1520 }
1521
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1522 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1523 Location rhs,
1524 HInstruction* insn,
1525 bool is_double) {
1526 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1527 if (is_double) {
1528 if (rhs.IsFpuRegister()) {
1529 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1530 } else if (const_area != nullptr) {
1531 DCHECK(const_area->IsEmittedAtUseSite());
1532 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1533 codegen_->LiteralDoubleAddress(
1534 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1535 const_area->GetBaseMethodAddress(),
1536 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1537 } else {
1538 DCHECK(rhs.IsDoubleStackSlot());
1539 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1540 }
1541 } else {
1542 if (rhs.IsFpuRegister()) {
1543 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1544 } else if (const_area != nullptr) {
1545 DCHECK(const_area->IsEmittedAtUseSite());
1546 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
1547 codegen_->LiteralFloatAddress(
1548 const_area->GetConstant()->AsFloatConstant()->GetValue(),
1549 const_area->GetBaseMethodAddress(),
1550 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1551 } else {
1552 DCHECK(rhs.IsStackSlot());
1553 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1554 }
1555 }
1556 }
1557
1558 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1559 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
1560 LabelType* true_target_in,
1561 LabelType* false_target_in) {
1562 // Generated branching requires both targets to be explicit. If either of the
1563 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1564 LabelType fallthrough_target;
1565 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1566 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1567
1568 LocationSummary* locations = condition->GetLocations();
1569 Location left = locations->InAt(0);
1570 Location right = locations->InAt(1);
1571
1572 DataType::Type type = condition->InputAt(0)->GetType();
1573 switch (type) {
1574 case DataType::Type::kInt64:
1575 GenerateLongComparesAndJumps(condition, true_target, false_target);
1576 break;
1577 case DataType::Type::kFloat32:
1578 GenerateFPCompare(left, right, condition, false);
1579 GenerateFPJumps(condition, true_target, false_target);
1580 break;
1581 case DataType::Type::kFloat64:
1582 GenerateFPCompare(left, right, condition, true);
1583 GenerateFPJumps(condition, true_target, false_target);
1584 break;
1585 default:
1586 LOG(FATAL) << "Unexpected compare type " << type;
1587 }
1588
1589 if (false_target != &fallthrough_target) {
1590 __ jmp(false_target);
1591 }
1592
1593 if (fallthrough_target.IsLinked()) {
1594 __ Bind(&fallthrough_target);
1595 }
1596 }
1597
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1598 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1599 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1600 // are set only strictly before `branch`. We can't use the eflags on long/FP
1601 // conditions if they are materialized due to the complex branching.
1602 return cond->IsCondition() &&
1603 cond->GetNext() == branch &&
1604 cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
1605 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1606 }
1607
1608 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1609 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
1610 size_t condition_input_index,
1611 LabelType* true_target,
1612 LabelType* false_target) {
1613 HInstruction* cond = instruction->InputAt(condition_input_index);
1614
1615 if (true_target == nullptr && false_target == nullptr) {
1616 // Nothing to do. The code always falls through.
1617 return;
1618 } else if (cond->IsIntConstant()) {
1619 // Constant condition, statically compared against "true" (integer value 1).
1620 if (cond->AsIntConstant()->IsTrue()) {
1621 if (true_target != nullptr) {
1622 __ jmp(true_target);
1623 }
1624 } else {
1625 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1626 if (false_target != nullptr) {
1627 __ jmp(false_target);
1628 }
1629 }
1630 return;
1631 }
1632
1633 // The following code generates these patterns:
1634 // (1) true_target == nullptr && false_target != nullptr
1635 // - opposite condition true => branch to false_target
1636 // (2) true_target != nullptr && false_target == nullptr
1637 // - condition true => branch to true_target
1638 // (3) true_target != nullptr && false_target != nullptr
1639 // - condition true => branch to true_target
1640 // - branch to false_target
1641 if (IsBooleanValueOrMaterializedCondition(cond)) {
1642 if (AreEflagsSetFrom(cond, instruction)) {
1643 if (true_target == nullptr) {
1644 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
1645 } else {
1646 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
1647 }
1648 } else {
1649 // Materialized condition, compare against 0.
1650 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1651 if (lhs.IsRegister()) {
1652 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
1653 } else {
1654 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
1655 }
1656 if (true_target == nullptr) {
1657 __ j(kEqual, false_target);
1658 } else {
1659 __ j(kNotEqual, true_target);
1660 }
1661 }
1662 } else {
1663 // Condition has not been materialized, use its inputs as the comparison and
1664 // its condition as the branch condition.
1665 HCondition* condition = cond->AsCondition();
1666
1667 // If this is a long or FP comparison that has been folded into
1668 // the HCondition, generate the comparison directly.
1669 DataType::Type type = condition->InputAt(0)->GetType();
1670 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1671 GenerateCompareTestAndBranch(condition, true_target, false_target);
1672 return;
1673 }
1674
1675 Location lhs = condition->GetLocations()->InAt(0);
1676 Location rhs = condition->GetLocations()->InAt(1);
1677 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
1678 codegen_->GenerateIntCompare(lhs, rhs);
1679 if (true_target == nullptr) {
1680 __ j(X86Condition(condition->GetOppositeCondition()), false_target);
1681 } else {
1682 __ j(X86Condition(condition->GetCondition()), true_target);
1683 }
1684 }
1685
1686 // If neither branch falls through (case 3), the conditional branch to `true_target`
1687 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1688 if (true_target != nullptr && false_target != nullptr) {
1689 __ jmp(false_target);
1690 }
1691 }
1692
VisitIf(HIf * if_instr)1693 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
1694 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1695 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1696 locations->SetInAt(0, Location::Any());
1697 }
1698 }
1699
VisitIf(HIf * if_instr)1700 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
1701 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1702 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1703 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1704 nullptr : codegen_->GetLabelOf(true_successor);
1705 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1706 nullptr : codegen_->GetLabelOf(false_successor);
1707 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1708 }
1709
VisitDeoptimize(HDeoptimize * deoptimize)1710 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1711 LocationSummary* locations = new (GetGraph()->GetAllocator())
1712 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1713 InvokeRuntimeCallingConvention calling_convention;
1714 RegisterSet caller_saves = RegisterSet::Empty();
1715 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1716 locations->SetCustomSlowPathCallerSaves(caller_saves);
1717 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1718 locations->SetInAt(0, Location::Any());
1719 }
1720 }
1721
VisitDeoptimize(HDeoptimize * deoptimize)1722 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1723 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
1724 GenerateTestAndBranch<Label>(deoptimize,
1725 /* condition_input_index */ 0,
1726 slow_path->GetEntryLabel(),
1727 /* false_target */ nullptr);
1728 }
1729
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1730 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1731 LocationSummary* locations = new (GetGraph()->GetAllocator())
1732 LocationSummary(flag, LocationSummary::kNoCall);
1733 locations->SetOut(Location::RequiresRegister());
1734 }
1735
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1736 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1737 __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
1738 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1739 }
1740
SelectCanUseCMOV(HSelect * select)1741 static bool SelectCanUseCMOV(HSelect* select) {
1742 // There are no conditional move instructions for XMMs.
1743 if (DataType::IsFloatingPointType(select->GetType())) {
1744 return false;
1745 }
1746
1747 // A FP condition doesn't generate the single CC that we need.
1748 // In 32 bit mode, a long condition doesn't generate a single CC either.
1749 HInstruction* condition = select->GetCondition();
1750 if (condition->IsCondition()) {
1751 DataType::Type compare_type = condition->InputAt(0)->GetType();
1752 if (compare_type == DataType::Type::kInt64 ||
1753 DataType::IsFloatingPointType(compare_type)) {
1754 return false;
1755 }
1756 }
1757
1758 // We can generate a CMOV for this Select.
1759 return true;
1760 }
1761
VisitSelect(HSelect * select)1762 void LocationsBuilderX86::VisitSelect(HSelect* select) {
1763 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1764 if (DataType::IsFloatingPointType(select->GetType())) {
1765 locations->SetInAt(0, Location::RequiresFpuRegister());
1766 locations->SetInAt(1, Location::Any());
1767 } else {
1768 locations->SetInAt(0, Location::RequiresRegister());
1769 if (SelectCanUseCMOV(select)) {
1770 if (select->InputAt(1)->IsConstant()) {
1771 // Cmov can't handle a constant value.
1772 locations->SetInAt(1, Location::RequiresRegister());
1773 } else {
1774 locations->SetInAt(1, Location::Any());
1775 }
1776 } else {
1777 locations->SetInAt(1, Location::Any());
1778 }
1779 }
1780 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1781 locations->SetInAt(2, Location::RequiresRegister());
1782 }
1783 locations->SetOut(Location::SameAsFirstInput());
1784 }
1785
VisitSelect(HSelect * select)1786 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
1787 LocationSummary* locations = select->GetLocations();
1788 DCHECK(locations->InAt(0).Equals(locations->Out()));
1789 if (SelectCanUseCMOV(select)) {
1790 // If both the condition and the source types are integer, we can generate
1791 // a CMOV to implement Select.
1792
1793 HInstruction* select_condition = select->GetCondition();
1794 Condition cond = kNotEqual;
1795
1796 // Figure out how to test the 'condition'.
1797 if (select_condition->IsCondition()) {
1798 HCondition* condition = select_condition->AsCondition();
1799 if (!condition->IsEmittedAtUseSite()) {
1800 // This was a previously materialized condition.
1801 // Can we use the existing condition code?
1802 if (AreEflagsSetFrom(condition, select)) {
1803 // Materialization was the previous instruction. Condition codes are right.
1804 cond = X86Condition(condition->GetCondition());
1805 } else {
1806 // No, we have to recreate the condition code.
1807 Register cond_reg = locations->InAt(2).AsRegister<Register>();
1808 __ testl(cond_reg, cond_reg);
1809 }
1810 } else {
1811 // We can't handle FP or long here.
1812 DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
1813 DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
1814 LocationSummary* cond_locations = condition->GetLocations();
1815 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
1816 cond = X86Condition(condition->GetCondition());
1817 }
1818 } else {
1819 // Must be a Boolean condition, which needs to be compared to 0.
1820 Register cond_reg = locations->InAt(2).AsRegister<Register>();
1821 __ testl(cond_reg, cond_reg);
1822 }
1823
1824 // If the condition is true, overwrite the output, which already contains false.
1825 Location false_loc = locations->InAt(0);
1826 Location true_loc = locations->InAt(1);
1827 if (select->GetType() == DataType::Type::kInt64) {
1828 // 64 bit conditional move.
1829 Register false_high = false_loc.AsRegisterPairHigh<Register>();
1830 Register false_low = false_loc.AsRegisterPairLow<Register>();
1831 if (true_loc.IsRegisterPair()) {
1832 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
1833 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
1834 } else {
1835 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
1836 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
1837 }
1838 } else {
1839 // 32 bit conditional move.
1840 Register false_reg = false_loc.AsRegister<Register>();
1841 if (true_loc.IsRegister()) {
1842 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
1843 } else {
1844 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
1845 }
1846 }
1847 } else {
1848 NearLabel false_target;
1849 GenerateTestAndBranch<NearLabel>(
1850 select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
1851 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1852 __ Bind(&false_target);
1853 }
1854 }
1855
VisitNativeDebugInfo(HNativeDebugInfo * info)1856 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1857 new (GetGraph()->GetAllocator()) LocationSummary(info);
1858 }
1859
VisitNativeDebugInfo(HNativeDebugInfo *)1860 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
1861 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1862 }
1863
GenerateNop()1864 void CodeGeneratorX86::GenerateNop() {
1865 __ nop();
1866 }
1867
HandleCondition(HCondition * cond)1868 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
1869 LocationSummary* locations =
1870 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
1871 // Handle the long/FP comparisons made in instruction simplification.
1872 switch (cond->InputAt(0)->GetType()) {
1873 case DataType::Type::kInt64: {
1874 locations->SetInAt(0, Location::RequiresRegister());
1875 locations->SetInAt(1, Location::Any());
1876 if (!cond->IsEmittedAtUseSite()) {
1877 locations->SetOut(Location::RequiresRegister());
1878 }
1879 break;
1880 }
1881 case DataType::Type::kFloat32:
1882 case DataType::Type::kFloat64: {
1883 locations->SetInAt(0, Location::RequiresFpuRegister());
1884 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
1885 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
1886 } else if (cond->InputAt(1)->IsConstant()) {
1887 locations->SetInAt(1, Location::RequiresFpuRegister());
1888 } else {
1889 locations->SetInAt(1, Location::Any());
1890 }
1891 if (!cond->IsEmittedAtUseSite()) {
1892 locations->SetOut(Location::RequiresRegister());
1893 }
1894 break;
1895 }
1896 default:
1897 locations->SetInAt(0, Location::RequiresRegister());
1898 locations->SetInAt(1, Location::Any());
1899 if (!cond->IsEmittedAtUseSite()) {
1900 // We need a byte register.
1901 locations->SetOut(Location::RegisterLocation(ECX));
1902 }
1903 break;
1904 }
1905 }
1906
HandleCondition(HCondition * cond)1907 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
1908 if (cond->IsEmittedAtUseSite()) {
1909 return;
1910 }
1911
1912 LocationSummary* locations = cond->GetLocations();
1913 Location lhs = locations->InAt(0);
1914 Location rhs = locations->InAt(1);
1915 Register reg = locations->Out().AsRegister<Register>();
1916 NearLabel true_label, false_label;
1917
1918 switch (cond->InputAt(0)->GetType()) {
1919 default: {
1920 // Integer case.
1921
1922 // Clear output register: setb only sets the low byte.
1923 __ xorl(reg, reg);
1924 codegen_->GenerateIntCompare(lhs, rhs);
1925 __ setb(X86Condition(cond->GetCondition()), reg);
1926 return;
1927 }
1928 case DataType::Type::kInt64:
1929 GenerateLongComparesAndJumps(cond, &true_label, &false_label);
1930 break;
1931 case DataType::Type::kFloat32:
1932 GenerateFPCompare(lhs, rhs, cond, false);
1933 GenerateFPJumps(cond, &true_label, &false_label);
1934 break;
1935 case DataType::Type::kFloat64:
1936 GenerateFPCompare(lhs, rhs, cond, true);
1937 GenerateFPJumps(cond, &true_label, &false_label);
1938 break;
1939 }
1940
1941 // Convert the jumps into the result.
1942 NearLabel done_label;
1943
1944 // False case: result = 0.
1945 __ Bind(&false_label);
1946 __ xorl(reg, reg);
1947 __ jmp(&done_label);
1948
1949 // True case: result = 1.
1950 __ Bind(&true_label);
1951 __ movl(reg, Immediate(1));
1952 __ Bind(&done_label);
1953 }
1954
VisitEqual(HEqual * comp)1955 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
1956 HandleCondition(comp);
1957 }
1958
VisitEqual(HEqual * comp)1959 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
1960 HandleCondition(comp);
1961 }
1962
VisitNotEqual(HNotEqual * comp)1963 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
1964 HandleCondition(comp);
1965 }
1966
VisitNotEqual(HNotEqual * comp)1967 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
1968 HandleCondition(comp);
1969 }
1970
VisitLessThan(HLessThan * comp)1971 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
1972 HandleCondition(comp);
1973 }
1974
VisitLessThan(HLessThan * comp)1975 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
1976 HandleCondition(comp);
1977 }
1978
VisitLessThanOrEqual(HLessThanOrEqual * comp)1979 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1980 HandleCondition(comp);
1981 }
1982
VisitLessThanOrEqual(HLessThanOrEqual * comp)1983 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1984 HandleCondition(comp);
1985 }
1986
VisitGreaterThan(HGreaterThan * comp)1987 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
1988 HandleCondition(comp);
1989 }
1990
VisitGreaterThan(HGreaterThan * comp)1991 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
1992 HandleCondition(comp);
1993 }
1994
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1995 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1996 HandleCondition(comp);
1997 }
1998
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1999 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2000 HandleCondition(comp);
2001 }
2002
VisitBelow(HBelow * comp)2003 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2004 HandleCondition(comp);
2005 }
2006
VisitBelow(HBelow * comp)2007 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2008 HandleCondition(comp);
2009 }
2010
VisitBelowOrEqual(HBelowOrEqual * comp)2011 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2012 HandleCondition(comp);
2013 }
2014
VisitBelowOrEqual(HBelowOrEqual * comp)2015 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2016 HandleCondition(comp);
2017 }
2018
VisitAbove(HAbove * comp)2019 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2020 HandleCondition(comp);
2021 }
2022
VisitAbove(HAbove * comp)2023 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2024 HandleCondition(comp);
2025 }
2026
VisitAboveOrEqual(HAboveOrEqual * comp)2027 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2028 HandleCondition(comp);
2029 }
2030
VisitAboveOrEqual(HAboveOrEqual * comp)2031 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2032 HandleCondition(comp);
2033 }
2034
VisitIntConstant(HIntConstant * constant)2035 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2036 LocationSummary* locations =
2037 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2038 locations->SetOut(Location::ConstantLocation(constant));
2039 }
2040
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2041 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2042 // Will be generated at use site.
2043 }
2044
VisitNullConstant(HNullConstant * constant)2045 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2046 LocationSummary* locations =
2047 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2048 locations->SetOut(Location::ConstantLocation(constant));
2049 }
2050
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2051 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2052 // Will be generated at use site.
2053 }
2054
VisitLongConstant(HLongConstant * constant)2055 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2056 LocationSummary* locations =
2057 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2058 locations->SetOut(Location::ConstantLocation(constant));
2059 }
2060
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2061 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2062 // Will be generated at use site.
2063 }
2064
VisitFloatConstant(HFloatConstant * constant)2065 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2066 LocationSummary* locations =
2067 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2068 locations->SetOut(Location::ConstantLocation(constant));
2069 }
2070
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2071 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2072 // Will be generated at use site.
2073 }
2074
VisitDoubleConstant(HDoubleConstant * constant)2075 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2076 LocationSummary* locations =
2077 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2078 locations->SetOut(Location::ConstantLocation(constant));
2079 }
2080
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2081 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2082 // Will be generated at use site.
2083 }
2084
VisitConstructorFence(HConstructorFence * constructor_fence)2085 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2086 constructor_fence->SetLocations(nullptr);
2087 }
2088
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2089 void InstructionCodeGeneratorX86::VisitConstructorFence(
2090 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2091 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2092 }
2093
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2094 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2095 memory_barrier->SetLocations(nullptr);
2096 }
2097
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2098 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2099 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2100 }
2101
VisitReturnVoid(HReturnVoid * ret)2102 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2103 ret->SetLocations(nullptr);
2104 }
2105
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2106 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2107 codegen_->GenerateFrameExit();
2108 }
2109
VisitReturn(HReturn * ret)2110 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2111 LocationSummary* locations =
2112 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2113 switch (ret->InputAt(0)->GetType()) {
2114 case DataType::Type::kReference:
2115 case DataType::Type::kBool:
2116 case DataType::Type::kUint8:
2117 case DataType::Type::kInt8:
2118 case DataType::Type::kUint16:
2119 case DataType::Type::kInt16:
2120 case DataType::Type::kInt32:
2121 locations->SetInAt(0, Location::RegisterLocation(EAX));
2122 break;
2123
2124 case DataType::Type::kInt64:
2125 locations->SetInAt(
2126 0, Location::RegisterPairLocation(EAX, EDX));
2127 break;
2128
2129 case DataType::Type::kFloat32:
2130 case DataType::Type::kFloat64:
2131 locations->SetInAt(
2132 0, Location::FpuRegisterLocation(XMM0));
2133 break;
2134
2135 default:
2136 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2137 }
2138 }
2139
VisitReturn(HReturn * ret)2140 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2141 if (kIsDebugBuild) {
2142 switch (ret->InputAt(0)->GetType()) {
2143 case DataType::Type::kReference:
2144 case DataType::Type::kBool:
2145 case DataType::Type::kUint8:
2146 case DataType::Type::kInt8:
2147 case DataType::Type::kUint16:
2148 case DataType::Type::kInt16:
2149 case DataType::Type::kInt32:
2150 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2151 break;
2152
2153 case DataType::Type::kInt64:
2154 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2155 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2156 break;
2157
2158 case DataType::Type::kFloat32:
2159 case DataType::Type::kFloat64:
2160 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2161 break;
2162
2163 default:
2164 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2165 }
2166 }
2167 codegen_->GenerateFrameExit();
2168 }
2169
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2170 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2171 // The trampoline uses the same calling convention as dex calling conventions,
2172 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2173 // the method_idx.
2174 HandleInvoke(invoke);
2175 }
2176
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2177 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2178 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2179 }
2180
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2181 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2182 // Explicit clinit checks triggered by static invokes must have been pruned by
2183 // art::PrepareForRegisterAllocation.
2184 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2185
2186 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2187 if (intrinsic.TryDispatch(invoke)) {
2188 if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeMethodLoadKind()) {
2189 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2190 }
2191 return;
2192 }
2193
2194 HandleInvoke(invoke);
2195
2196 // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
2197 if (invoke->HasPcRelativeMethodLoadKind()) {
2198 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2199 }
2200 }
2201
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2202 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2203 if (invoke->GetLocations()->Intrinsified()) {
2204 IntrinsicCodeGeneratorX86 intrinsic(codegen);
2205 intrinsic.Dispatch(invoke);
2206 return true;
2207 }
2208 return false;
2209 }
2210
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2211 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2212 // Explicit clinit checks triggered by static invokes must have been pruned by
2213 // art::PrepareForRegisterAllocation.
2214 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2215
2216 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2217 return;
2218 }
2219
2220 LocationSummary* locations = invoke->GetLocations();
2221 codegen_->GenerateStaticOrDirectCall(
2222 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2223 }
2224
VisitInvokeVirtual(HInvokeVirtual * invoke)2225 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2226 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2227 if (intrinsic.TryDispatch(invoke)) {
2228 return;
2229 }
2230
2231 HandleInvoke(invoke);
2232 }
2233
HandleInvoke(HInvoke * invoke)2234 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2235 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2236 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2237 }
2238
VisitInvokeVirtual(HInvokeVirtual * invoke)2239 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2240 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2241 return;
2242 }
2243
2244 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2245 DCHECK(!codegen_->IsLeafMethod());
2246 }
2247
VisitInvokeInterface(HInvokeInterface * invoke)2248 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2249 // This call to HandleInvoke allocates a temporary (core) register
2250 // which is also used to transfer the hidden argument from FP to
2251 // core register.
2252 HandleInvoke(invoke);
2253 // Add the hidden argument.
2254 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2255 }
2256
VisitInvokeInterface(HInvokeInterface * invoke)2257 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2258 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2259 LocationSummary* locations = invoke->GetLocations();
2260 Register temp = locations->GetTemp(0).AsRegister<Register>();
2261 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2262 Location receiver = locations->InAt(0);
2263 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2264
2265 // Set the hidden argument. This is safe to do this here, as XMM7
2266 // won't be modified thereafter, before the `call` instruction.
2267 DCHECK_EQ(XMM7, hidden_reg);
2268 __ movl(temp, Immediate(invoke->GetDexMethodIndex()));
2269 __ movd(hidden_reg, temp);
2270
2271 if (receiver.IsStackSlot()) {
2272 __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2273 // /* HeapReference<Class> */ temp = temp->klass_
2274 __ movl(temp, Address(temp, class_offset));
2275 } else {
2276 // /* HeapReference<Class> */ temp = receiver->klass_
2277 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2278 }
2279 codegen_->MaybeRecordImplicitNullCheck(invoke);
2280 // Instead of simply (possibly) unpoisoning `temp` here, we should
2281 // emit a read barrier for the previous class reference load.
2282 // However this is not required in practice, as this is an
2283 // intermediate/temporary reference and because the current
2284 // concurrent copying collector keeps the from-space memory
2285 // intact/accessible until the end of the marking phase (the
2286 // concurrent copying collector may not in the future).
2287 __ MaybeUnpoisonHeapReference(temp);
2288 // temp = temp->GetAddressOfIMT()
2289 __ movl(temp,
2290 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2291 // temp = temp->GetImtEntryAt(method_offset);
2292 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2293 invoke->GetImtIndex(), kX86PointerSize));
2294 __ movl(temp, Address(temp, method_offset));
2295 // call temp->GetEntryPoint();
2296 __ call(Address(temp,
2297 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2298
2299 DCHECK(!codegen_->IsLeafMethod());
2300 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2301 }
2302
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2303 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2304 HandleInvoke(invoke);
2305 }
2306
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2307 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2308 codegen_->GenerateInvokePolymorphicCall(invoke);
2309 }
2310
VisitNeg(HNeg * neg)2311 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2312 LocationSummary* locations =
2313 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2314 switch (neg->GetResultType()) {
2315 case DataType::Type::kInt32:
2316 case DataType::Type::kInt64:
2317 locations->SetInAt(0, Location::RequiresRegister());
2318 locations->SetOut(Location::SameAsFirstInput());
2319 break;
2320
2321 case DataType::Type::kFloat32:
2322 locations->SetInAt(0, Location::RequiresFpuRegister());
2323 locations->SetOut(Location::SameAsFirstInput());
2324 locations->AddTemp(Location::RequiresRegister());
2325 locations->AddTemp(Location::RequiresFpuRegister());
2326 break;
2327
2328 case DataType::Type::kFloat64:
2329 locations->SetInAt(0, Location::RequiresFpuRegister());
2330 locations->SetOut(Location::SameAsFirstInput());
2331 locations->AddTemp(Location::RequiresFpuRegister());
2332 break;
2333
2334 default:
2335 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2336 }
2337 }
2338
VisitNeg(HNeg * neg)2339 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2340 LocationSummary* locations = neg->GetLocations();
2341 Location out = locations->Out();
2342 Location in = locations->InAt(0);
2343 switch (neg->GetResultType()) {
2344 case DataType::Type::kInt32:
2345 DCHECK(in.IsRegister());
2346 DCHECK(in.Equals(out));
2347 __ negl(out.AsRegister<Register>());
2348 break;
2349
2350 case DataType::Type::kInt64:
2351 DCHECK(in.IsRegisterPair());
2352 DCHECK(in.Equals(out));
2353 __ negl(out.AsRegisterPairLow<Register>());
2354 // Negation is similar to subtraction from zero. The least
2355 // significant byte triggers a borrow when it is different from
2356 // zero; to take it into account, add 1 to the most significant
2357 // byte if the carry flag (CF) is set to 1 after the first NEGL
2358 // operation.
2359 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2360 __ negl(out.AsRegisterPairHigh<Register>());
2361 break;
2362
2363 case DataType::Type::kFloat32: {
2364 DCHECK(in.Equals(out));
2365 Register constant = locations->GetTemp(0).AsRegister<Register>();
2366 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2367 // Implement float negation with an exclusive or with value
2368 // 0x80000000 (mask for bit 31, representing the sign of a
2369 // single-precision floating-point number).
2370 __ movl(constant, Immediate(INT32_C(0x80000000)));
2371 __ movd(mask, constant);
2372 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2373 break;
2374 }
2375
2376 case DataType::Type::kFloat64: {
2377 DCHECK(in.Equals(out));
2378 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2379 // Implement double negation with an exclusive or with value
2380 // 0x8000000000000000 (mask for bit 63, representing the sign of
2381 // a double-precision floating-point number).
2382 __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2383 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2384 break;
2385 }
2386
2387 default:
2388 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2389 }
2390 }
2391
VisitX86FPNeg(HX86FPNeg * neg)2392 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2393 LocationSummary* locations =
2394 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2395 DCHECK(DataType::IsFloatingPointType(neg->GetType()));
2396 locations->SetInAt(0, Location::RequiresFpuRegister());
2397 locations->SetInAt(1, Location::RequiresRegister());
2398 locations->SetOut(Location::SameAsFirstInput());
2399 locations->AddTemp(Location::RequiresFpuRegister());
2400 }
2401
VisitX86FPNeg(HX86FPNeg * neg)2402 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2403 LocationSummary* locations = neg->GetLocations();
2404 Location out = locations->Out();
2405 DCHECK(locations->InAt(0).Equals(out));
2406
2407 Register constant_area = locations->InAt(1).AsRegister<Register>();
2408 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2409 if (neg->GetType() == DataType::Type::kFloat32) {
2410 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2411 neg->GetBaseMethodAddress(),
2412 constant_area));
2413 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2414 } else {
2415 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2416 neg->GetBaseMethodAddress(),
2417 constant_area));
2418 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2419 }
2420 }
2421
VisitTypeConversion(HTypeConversion * conversion)2422 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2423 DataType::Type result_type = conversion->GetResultType();
2424 DataType::Type input_type = conversion->GetInputType();
2425 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2426 << input_type << " -> " << result_type;
2427
2428 // The float-to-long and double-to-long type conversions rely on a
2429 // call to the runtime.
2430 LocationSummary::CallKind call_kind =
2431 ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
2432 && result_type == DataType::Type::kInt64)
2433 ? LocationSummary::kCallOnMainOnly
2434 : LocationSummary::kNoCall;
2435 LocationSummary* locations =
2436 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
2437
2438 switch (result_type) {
2439 case DataType::Type::kUint8:
2440 case DataType::Type::kInt8:
2441 switch (input_type) {
2442 case DataType::Type::kUint8:
2443 case DataType::Type::kInt8:
2444 case DataType::Type::kUint16:
2445 case DataType::Type::kInt16:
2446 case DataType::Type::kInt32:
2447 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2448 // Make the output overlap to please the register allocator. This greatly simplifies
2449 // the validation of the linear scan implementation
2450 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2451 break;
2452 case DataType::Type::kInt64: {
2453 HInstruction* input = conversion->InputAt(0);
2454 Location input_location = input->IsConstant()
2455 ? Location::ConstantLocation(input->AsConstant())
2456 : Location::RegisterPairLocation(EAX, EDX);
2457 locations->SetInAt(0, input_location);
2458 // Make the output overlap to please the register allocator. This greatly simplifies
2459 // the validation of the linear scan implementation
2460 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2461 break;
2462 }
2463
2464 default:
2465 LOG(FATAL) << "Unexpected type conversion from " << input_type
2466 << " to " << result_type;
2467 }
2468 break;
2469
2470 case DataType::Type::kUint16:
2471 case DataType::Type::kInt16:
2472 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2473 locations->SetInAt(0, Location::Any());
2474 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2475 break;
2476
2477 case DataType::Type::kInt32:
2478 switch (input_type) {
2479 case DataType::Type::kInt64:
2480 locations->SetInAt(0, Location::Any());
2481 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2482 break;
2483
2484 case DataType::Type::kFloat32:
2485 locations->SetInAt(0, Location::RequiresFpuRegister());
2486 locations->SetOut(Location::RequiresRegister());
2487 locations->AddTemp(Location::RequiresFpuRegister());
2488 break;
2489
2490 case DataType::Type::kFloat64:
2491 locations->SetInAt(0, Location::RequiresFpuRegister());
2492 locations->SetOut(Location::RequiresRegister());
2493 locations->AddTemp(Location::RequiresFpuRegister());
2494 break;
2495
2496 default:
2497 LOG(FATAL) << "Unexpected type conversion from " << input_type
2498 << " to " << result_type;
2499 }
2500 break;
2501
2502 case DataType::Type::kInt64:
2503 switch (input_type) {
2504 case DataType::Type::kBool:
2505 case DataType::Type::kUint8:
2506 case DataType::Type::kInt8:
2507 case DataType::Type::kUint16:
2508 case DataType::Type::kInt16:
2509 case DataType::Type::kInt32:
2510 locations->SetInAt(0, Location::RegisterLocation(EAX));
2511 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2512 break;
2513
2514 case DataType::Type::kFloat32:
2515 case DataType::Type::kFloat64: {
2516 InvokeRuntimeCallingConvention calling_convention;
2517 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
2518 locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
2519
2520 // The runtime helper puts the result in EAX, EDX.
2521 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2522 }
2523 break;
2524
2525 default:
2526 LOG(FATAL) << "Unexpected type conversion from " << input_type
2527 << " to " << result_type;
2528 }
2529 break;
2530
2531 case DataType::Type::kFloat32:
2532 switch (input_type) {
2533 case DataType::Type::kBool:
2534 case DataType::Type::kUint8:
2535 case DataType::Type::kInt8:
2536 case DataType::Type::kUint16:
2537 case DataType::Type::kInt16:
2538 case DataType::Type::kInt32:
2539 locations->SetInAt(0, Location::RequiresRegister());
2540 locations->SetOut(Location::RequiresFpuRegister());
2541 break;
2542
2543 case DataType::Type::kInt64:
2544 locations->SetInAt(0, Location::Any());
2545 locations->SetOut(Location::Any());
2546 break;
2547
2548 case DataType::Type::kFloat64:
2549 locations->SetInAt(0, Location::RequiresFpuRegister());
2550 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2551 break;
2552
2553 default:
2554 LOG(FATAL) << "Unexpected type conversion from " << input_type
2555 << " to " << result_type;
2556 }
2557 break;
2558
2559 case DataType::Type::kFloat64:
2560 switch (input_type) {
2561 case DataType::Type::kBool:
2562 case DataType::Type::kUint8:
2563 case DataType::Type::kInt8:
2564 case DataType::Type::kUint16:
2565 case DataType::Type::kInt16:
2566 case DataType::Type::kInt32:
2567 locations->SetInAt(0, Location::RequiresRegister());
2568 locations->SetOut(Location::RequiresFpuRegister());
2569 break;
2570
2571 case DataType::Type::kInt64:
2572 locations->SetInAt(0, Location::Any());
2573 locations->SetOut(Location::Any());
2574 break;
2575
2576 case DataType::Type::kFloat32:
2577 locations->SetInAt(0, Location::RequiresFpuRegister());
2578 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2579 break;
2580
2581 default:
2582 LOG(FATAL) << "Unexpected type conversion from " << input_type
2583 << " to " << result_type;
2584 }
2585 break;
2586
2587 default:
2588 LOG(FATAL) << "Unexpected type conversion from " << input_type
2589 << " to " << result_type;
2590 }
2591 }
2592
VisitTypeConversion(HTypeConversion * conversion)2593 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
2594 LocationSummary* locations = conversion->GetLocations();
2595 Location out = locations->Out();
2596 Location in = locations->InAt(0);
2597 DataType::Type result_type = conversion->GetResultType();
2598 DataType::Type input_type = conversion->GetInputType();
2599 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2600 << input_type << " -> " << result_type;
2601 switch (result_type) {
2602 case DataType::Type::kUint8:
2603 switch (input_type) {
2604 case DataType::Type::kInt8:
2605 case DataType::Type::kUint16:
2606 case DataType::Type::kInt16:
2607 case DataType::Type::kInt32:
2608 if (in.IsRegister()) {
2609 __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2610 } else {
2611 DCHECK(in.GetConstant()->IsIntConstant());
2612 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2613 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2614 }
2615 break;
2616 case DataType::Type::kInt64:
2617 if (in.IsRegisterPair()) {
2618 __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2619 } else {
2620 DCHECK(in.GetConstant()->IsLongConstant());
2621 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2622 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2623 }
2624 break;
2625
2626 default:
2627 LOG(FATAL) << "Unexpected type conversion from " << input_type
2628 << " to " << result_type;
2629 }
2630 break;
2631
2632 case DataType::Type::kInt8:
2633 switch (input_type) {
2634 case DataType::Type::kUint8:
2635 case DataType::Type::kUint16:
2636 case DataType::Type::kInt16:
2637 case DataType::Type::kInt32:
2638 if (in.IsRegister()) {
2639 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2640 } else {
2641 DCHECK(in.GetConstant()->IsIntConstant());
2642 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2643 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2644 }
2645 break;
2646 case DataType::Type::kInt64:
2647 if (in.IsRegisterPair()) {
2648 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2649 } else {
2650 DCHECK(in.GetConstant()->IsLongConstant());
2651 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2652 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2653 }
2654 break;
2655
2656 default:
2657 LOG(FATAL) << "Unexpected type conversion from " << input_type
2658 << " to " << result_type;
2659 }
2660 break;
2661
2662 case DataType::Type::kUint16:
2663 switch (input_type) {
2664 case DataType::Type::kInt8:
2665 case DataType::Type::kInt16:
2666 case DataType::Type::kInt32:
2667 if (in.IsRegister()) {
2668 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2669 } else if (in.IsStackSlot()) {
2670 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2671 } else {
2672 DCHECK(in.GetConstant()->IsIntConstant());
2673 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2674 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2675 }
2676 break;
2677 case DataType::Type::kInt64:
2678 if (in.IsRegisterPair()) {
2679 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2680 } else if (in.IsDoubleStackSlot()) {
2681 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2682 } else {
2683 DCHECK(in.GetConstant()->IsLongConstant());
2684 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2685 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2686 }
2687 break;
2688
2689 default:
2690 LOG(FATAL) << "Unexpected type conversion from " << input_type
2691 << " to " << result_type;
2692 }
2693 break;
2694
2695 case DataType::Type::kInt16:
2696 switch (input_type) {
2697 case DataType::Type::kUint16:
2698 case DataType::Type::kInt32:
2699 if (in.IsRegister()) {
2700 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2701 } else if (in.IsStackSlot()) {
2702 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2703 } else {
2704 DCHECK(in.GetConstant()->IsIntConstant());
2705 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2706 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2707 }
2708 break;
2709 case DataType::Type::kInt64:
2710 if (in.IsRegisterPair()) {
2711 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2712 } else if (in.IsDoubleStackSlot()) {
2713 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2714 } else {
2715 DCHECK(in.GetConstant()->IsLongConstant());
2716 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2717 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2718 }
2719 break;
2720
2721 default:
2722 LOG(FATAL) << "Unexpected type conversion from " << input_type
2723 << " to " << result_type;
2724 }
2725 break;
2726
2727 case DataType::Type::kInt32:
2728 switch (input_type) {
2729 case DataType::Type::kInt64:
2730 if (in.IsRegisterPair()) {
2731 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2732 } else if (in.IsDoubleStackSlot()) {
2733 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2734 } else {
2735 DCHECK(in.IsConstant());
2736 DCHECK(in.GetConstant()->IsLongConstant());
2737 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2738 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
2739 }
2740 break;
2741
2742 case DataType::Type::kFloat32: {
2743 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2744 Register output = out.AsRegister<Register>();
2745 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2746 NearLabel done, nan;
2747
2748 __ movl(output, Immediate(kPrimIntMax));
2749 // temp = int-to-float(output)
2750 __ cvtsi2ss(temp, output);
2751 // if input >= temp goto done
2752 __ comiss(input, temp);
2753 __ j(kAboveEqual, &done);
2754 // if input == NaN goto nan
2755 __ j(kUnordered, &nan);
2756 // output = float-to-int-truncate(input)
2757 __ cvttss2si(output, input);
2758 __ jmp(&done);
2759 __ Bind(&nan);
2760 // output = 0
2761 __ xorl(output, output);
2762 __ Bind(&done);
2763 break;
2764 }
2765
2766 case DataType::Type::kFloat64: {
2767 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2768 Register output = out.AsRegister<Register>();
2769 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2770 NearLabel done, nan;
2771
2772 __ movl(output, Immediate(kPrimIntMax));
2773 // temp = int-to-double(output)
2774 __ cvtsi2sd(temp, output);
2775 // if input >= temp goto done
2776 __ comisd(input, temp);
2777 __ j(kAboveEqual, &done);
2778 // if input == NaN goto nan
2779 __ j(kUnordered, &nan);
2780 // output = double-to-int-truncate(input)
2781 __ cvttsd2si(output, input);
2782 __ jmp(&done);
2783 __ Bind(&nan);
2784 // output = 0
2785 __ xorl(output, output);
2786 __ Bind(&done);
2787 break;
2788 }
2789
2790 default:
2791 LOG(FATAL) << "Unexpected type conversion from " << input_type
2792 << " to " << result_type;
2793 }
2794 break;
2795
2796 case DataType::Type::kInt64:
2797 switch (input_type) {
2798 case DataType::Type::kBool:
2799 case DataType::Type::kUint8:
2800 case DataType::Type::kInt8:
2801 case DataType::Type::kUint16:
2802 case DataType::Type::kInt16:
2803 case DataType::Type::kInt32:
2804 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
2805 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
2806 DCHECK_EQ(in.AsRegister<Register>(), EAX);
2807 __ cdq();
2808 break;
2809
2810 case DataType::Type::kFloat32:
2811 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
2812 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
2813 break;
2814
2815 case DataType::Type::kFloat64:
2816 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
2817 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
2818 break;
2819
2820 default:
2821 LOG(FATAL) << "Unexpected type conversion from " << input_type
2822 << " to " << result_type;
2823 }
2824 break;
2825
2826 case DataType::Type::kFloat32:
2827 switch (input_type) {
2828 case DataType::Type::kBool:
2829 case DataType::Type::kUint8:
2830 case DataType::Type::kInt8:
2831 case DataType::Type::kUint16:
2832 case DataType::Type::kInt16:
2833 case DataType::Type::kInt32:
2834 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
2835 break;
2836
2837 case DataType::Type::kInt64: {
2838 size_t adjustment = 0;
2839
2840 // Create stack space for the call to
2841 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
2842 // TODO: enhance register allocator to ask for stack temporaries.
2843 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
2844 adjustment = DataType::Size(DataType::Type::kInt64);
2845 __ subl(ESP, Immediate(adjustment));
2846 }
2847
2848 // Load the value to the FP stack, using temporaries if needed.
2849 PushOntoFPStack(in, 0, adjustment, false, true);
2850
2851 if (out.IsStackSlot()) {
2852 __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
2853 } else {
2854 __ fstps(Address(ESP, 0));
2855 Location stack_temp = Location::StackSlot(0);
2856 codegen_->Move32(out, stack_temp);
2857 }
2858
2859 // Remove the temporary stack space we allocated.
2860 if (adjustment != 0) {
2861 __ addl(ESP, Immediate(adjustment));
2862 }
2863 break;
2864 }
2865
2866 case DataType::Type::kFloat64:
2867 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2868 break;
2869
2870 default:
2871 LOG(FATAL) << "Unexpected type conversion from " << input_type
2872 << " to " << result_type;
2873 }
2874 break;
2875
2876 case DataType::Type::kFloat64:
2877 switch (input_type) {
2878 case DataType::Type::kBool:
2879 case DataType::Type::kUint8:
2880 case DataType::Type::kInt8:
2881 case DataType::Type::kUint16:
2882 case DataType::Type::kInt16:
2883 case DataType::Type::kInt32:
2884 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
2885 break;
2886
2887 case DataType::Type::kInt64: {
2888 size_t adjustment = 0;
2889
2890 // Create stack space for the call to
2891 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
2892 // TODO: enhance register allocator to ask for stack temporaries.
2893 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
2894 adjustment = DataType::Size(DataType::Type::kInt64);
2895 __ subl(ESP, Immediate(adjustment));
2896 }
2897
2898 // Load the value to the FP stack, using temporaries if needed.
2899 PushOntoFPStack(in, 0, adjustment, false, true);
2900
2901 if (out.IsDoubleStackSlot()) {
2902 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
2903 } else {
2904 __ fstpl(Address(ESP, 0));
2905 Location stack_temp = Location::DoubleStackSlot(0);
2906 codegen_->Move64(out, stack_temp);
2907 }
2908
2909 // Remove the temporary stack space we allocated.
2910 if (adjustment != 0) {
2911 __ addl(ESP, Immediate(adjustment));
2912 }
2913 break;
2914 }
2915
2916 case DataType::Type::kFloat32:
2917 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2918 break;
2919
2920 default:
2921 LOG(FATAL) << "Unexpected type conversion from " << input_type
2922 << " to " << result_type;
2923 }
2924 break;
2925
2926 default:
2927 LOG(FATAL) << "Unexpected type conversion from " << input_type
2928 << " to " << result_type;
2929 }
2930 }
2931
VisitAdd(HAdd * add)2932 void LocationsBuilderX86::VisitAdd(HAdd* add) {
2933 LocationSummary* locations =
2934 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
2935 switch (add->GetResultType()) {
2936 case DataType::Type::kInt32: {
2937 locations->SetInAt(0, Location::RequiresRegister());
2938 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
2939 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2940 break;
2941 }
2942
2943 case DataType::Type::kInt64: {
2944 locations->SetInAt(0, Location::RequiresRegister());
2945 locations->SetInAt(1, Location::Any());
2946 locations->SetOut(Location::SameAsFirstInput());
2947 break;
2948 }
2949
2950 case DataType::Type::kFloat32:
2951 case DataType::Type::kFloat64: {
2952 locations->SetInAt(0, Location::RequiresFpuRegister());
2953 if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
2954 DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
2955 } else if (add->InputAt(1)->IsConstant()) {
2956 locations->SetInAt(1, Location::RequiresFpuRegister());
2957 } else {
2958 locations->SetInAt(1, Location::Any());
2959 }
2960 locations->SetOut(Location::SameAsFirstInput());
2961 break;
2962 }
2963
2964 default:
2965 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2966 break;
2967 }
2968 }
2969
VisitAdd(HAdd * add)2970 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
2971 LocationSummary* locations = add->GetLocations();
2972 Location first = locations->InAt(0);
2973 Location second = locations->InAt(1);
2974 Location out = locations->Out();
2975
2976 switch (add->GetResultType()) {
2977 case DataType::Type::kInt32: {
2978 if (second.IsRegister()) {
2979 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2980 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
2981 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2982 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
2983 } else {
2984 __ leal(out.AsRegister<Register>(), Address(
2985 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
2986 }
2987 } else if (second.IsConstant()) {
2988 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
2989 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2990 __ addl(out.AsRegister<Register>(), Immediate(value));
2991 } else {
2992 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
2993 }
2994 } else {
2995 DCHECK(first.Equals(locations->Out()));
2996 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
2997 }
2998 break;
2999 }
3000
3001 case DataType::Type::kInt64: {
3002 if (second.IsRegisterPair()) {
3003 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3004 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3005 } else if (second.IsDoubleStackSlot()) {
3006 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3007 __ adcl(first.AsRegisterPairHigh<Register>(),
3008 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3009 } else {
3010 DCHECK(second.IsConstant()) << second;
3011 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3012 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3013 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3014 }
3015 break;
3016 }
3017
3018 case DataType::Type::kFloat32: {
3019 if (second.IsFpuRegister()) {
3020 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3021 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3022 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3023 DCHECK(const_area->IsEmittedAtUseSite());
3024 __ addss(first.AsFpuRegister<XmmRegister>(),
3025 codegen_->LiteralFloatAddress(
3026 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3027 const_area->GetBaseMethodAddress(),
3028 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3029 } else {
3030 DCHECK(second.IsStackSlot());
3031 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3032 }
3033 break;
3034 }
3035
3036 case DataType::Type::kFloat64: {
3037 if (second.IsFpuRegister()) {
3038 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3039 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3040 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3041 DCHECK(const_area->IsEmittedAtUseSite());
3042 __ addsd(first.AsFpuRegister<XmmRegister>(),
3043 codegen_->LiteralDoubleAddress(
3044 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3045 const_area->GetBaseMethodAddress(),
3046 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3047 } else {
3048 DCHECK(second.IsDoubleStackSlot());
3049 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3050 }
3051 break;
3052 }
3053
3054 default:
3055 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3056 }
3057 }
3058
VisitSub(HSub * sub)3059 void LocationsBuilderX86::VisitSub(HSub* sub) {
3060 LocationSummary* locations =
3061 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3062 switch (sub->GetResultType()) {
3063 case DataType::Type::kInt32:
3064 case DataType::Type::kInt64: {
3065 locations->SetInAt(0, Location::RequiresRegister());
3066 locations->SetInAt(1, Location::Any());
3067 locations->SetOut(Location::SameAsFirstInput());
3068 break;
3069 }
3070 case DataType::Type::kFloat32:
3071 case DataType::Type::kFloat64: {
3072 locations->SetInAt(0, Location::RequiresFpuRegister());
3073 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3074 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3075 } else if (sub->InputAt(1)->IsConstant()) {
3076 locations->SetInAt(1, Location::RequiresFpuRegister());
3077 } else {
3078 locations->SetInAt(1, Location::Any());
3079 }
3080 locations->SetOut(Location::SameAsFirstInput());
3081 break;
3082 }
3083
3084 default:
3085 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3086 }
3087 }
3088
VisitSub(HSub * sub)3089 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3090 LocationSummary* locations = sub->GetLocations();
3091 Location first = locations->InAt(0);
3092 Location second = locations->InAt(1);
3093 DCHECK(first.Equals(locations->Out()));
3094 switch (sub->GetResultType()) {
3095 case DataType::Type::kInt32: {
3096 if (second.IsRegister()) {
3097 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3098 } else if (second.IsConstant()) {
3099 __ subl(first.AsRegister<Register>(),
3100 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3101 } else {
3102 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3103 }
3104 break;
3105 }
3106
3107 case DataType::Type::kInt64: {
3108 if (second.IsRegisterPair()) {
3109 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3110 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3111 } else if (second.IsDoubleStackSlot()) {
3112 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3113 __ sbbl(first.AsRegisterPairHigh<Register>(),
3114 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3115 } else {
3116 DCHECK(second.IsConstant()) << second;
3117 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3118 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3119 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3120 }
3121 break;
3122 }
3123
3124 case DataType::Type::kFloat32: {
3125 if (second.IsFpuRegister()) {
3126 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3127 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3128 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3129 DCHECK(const_area->IsEmittedAtUseSite());
3130 __ subss(first.AsFpuRegister<XmmRegister>(),
3131 codegen_->LiteralFloatAddress(
3132 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3133 const_area->GetBaseMethodAddress(),
3134 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3135 } else {
3136 DCHECK(second.IsStackSlot());
3137 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3138 }
3139 break;
3140 }
3141
3142 case DataType::Type::kFloat64: {
3143 if (second.IsFpuRegister()) {
3144 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3145 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3146 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3147 DCHECK(const_area->IsEmittedAtUseSite());
3148 __ subsd(first.AsFpuRegister<XmmRegister>(),
3149 codegen_->LiteralDoubleAddress(
3150 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3151 const_area->GetBaseMethodAddress(),
3152 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3153 } else {
3154 DCHECK(second.IsDoubleStackSlot());
3155 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3156 }
3157 break;
3158 }
3159
3160 default:
3161 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3162 }
3163 }
3164
VisitMul(HMul * mul)3165 void LocationsBuilderX86::VisitMul(HMul* mul) {
3166 LocationSummary* locations =
3167 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3168 switch (mul->GetResultType()) {
3169 case DataType::Type::kInt32:
3170 locations->SetInAt(0, Location::RequiresRegister());
3171 locations->SetInAt(1, Location::Any());
3172 if (mul->InputAt(1)->IsIntConstant()) {
3173 // Can use 3 operand multiply.
3174 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3175 } else {
3176 locations->SetOut(Location::SameAsFirstInput());
3177 }
3178 break;
3179 case DataType::Type::kInt64: {
3180 locations->SetInAt(0, Location::RequiresRegister());
3181 locations->SetInAt(1, Location::Any());
3182 locations->SetOut(Location::SameAsFirstInput());
3183 // Needed for imul on 32bits with 64bits output.
3184 locations->AddTemp(Location::RegisterLocation(EAX));
3185 locations->AddTemp(Location::RegisterLocation(EDX));
3186 break;
3187 }
3188 case DataType::Type::kFloat32:
3189 case DataType::Type::kFloat64: {
3190 locations->SetInAt(0, Location::RequiresFpuRegister());
3191 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3192 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3193 } else if (mul->InputAt(1)->IsConstant()) {
3194 locations->SetInAt(1, Location::RequiresFpuRegister());
3195 } else {
3196 locations->SetInAt(1, Location::Any());
3197 }
3198 locations->SetOut(Location::SameAsFirstInput());
3199 break;
3200 }
3201
3202 default:
3203 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3204 }
3205 }
3206
VisitMul(HMul * mul)3207 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3208 LocationSummary* locations = mul->GetLocations();
3209 Location first = locations->InAt(0);
3210 Location second = locations->InAt(1);
3211 Location out = locations->Out();
3212
3213 switch (mul->GetResultType()) {
3214 case DataType::Type::kInt32:
3215 // The constant may have ended up in a register, so test explicitly to avoid
3216 // problems where the output may not be the same as the first operand.
3217 if (mul->InputAt(1)->IsIntConstant()) {
3218 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3219 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3220 } else if (second.IsRegister()) {
3221 DCHECK(first.Equals(out));
3222 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3223 } else {
3224 DCHECK(second.IsStackSlot());
3225 DCHECK(first.Equals(out));
3226 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3227 }
3228 break;
3229
3230 case DataType::Type::kInt64: {
3231 Register in1_hi = first.AsRegisterPairHigh<Register>();
3232 Register in1_lo = first.AsRegisterPairLow<Register>();
3233 Register eax = locations->GetTemp(0).AsRegister<Register>();
3234 Register edx = locations->GetTemp(1).AsRegister<Register>();
3235
3236 DCHECK_EQ(EAX, eax);
3237 DCHECK_EQ(EDX, edx);
3238
3239 // input: in1 - 64 bits, in2 - 64 bits.
3240 // output: in1
3241 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3242 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3243 // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3244 if (second.IsConstant()) {
3245 DCHECK(second.GetConstant()->IsLongConstant());
3246
3247 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3248 int32_t low_value = Low32Bits(value);
3249 int32_t high_value = High32Bits(value);
3250 Immediate low(low_value);
3251 Immediate high(high_value);
3252
3253 __ movl(eax, high);
3254 // eax <- in1.lo * in2.hi
3255 __ imull(eax, in1_lo);
3256 // in1.hi <- in1.hi * in2.lo
3257 __ imull(in1_hi, low);
3258 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3259 __ addl(in1_hi, eax);
3260 // move in2_lo to eax to prepare for double precision
3261 __ movl(eax, low);
3262 // edx:eax <- in1.lo * in2.lo
3263 __ mull(in1_lo);
3264 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3265 __ addl(in1_hi, edx);
3266 // in1.lo <- (in1.lo * in2.lo)[31:0];
3267 __ movl(in1_lo, eax);
3268 } else if (second.IsRegisterPair()) {
3269 Register in2_hi = second.AsRegisterPairHigh<Register>();
3270 Register in2_lo = second.AsRegisterPairLow<Register>();
3271
3272 __ movl(eax, in2_hi);
3273 // eax <- in1.lo * in2.hi
3274 __ imull(eax, in1_lo);
3275 // in1.hi <- in1.hi * in2.lo
3276 __ imull(in1_hi, in2_lo);
3277 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3278 __ addl(in1_hi, eax);
3279 // move in1_lo to eax to prepare for double precision
3280 __ movl(eax, in1_lo);
3281 // edx:eax <- in1.lo * in2.lo
3282 __ mull(in2_lo);
3283 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3284 __ addl(in1_hi, edx);
3285 // in1.lo <- (in1.lo * in2.lo)[31:0];
3286 __ movl(in1_lo, eax);
3287 } else {
3288 DCHECK(second.IsDoubleStackSlot()) << second;
3289 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3290 Address in2_lo(ESP, second.GetStackIndex());
3291
3292 __ movl(eax, in2_hi);
3293 // eax <- in1.lo * in2.hi
3294 __ imull(eax, in1_lo);
3295 // in1.hi <- in1.hi * in2.lo
3296 __ imull(in1_hi, in2_lo);
3297 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3298 __ addl(in1_hi, eax);
3299 // move in1_lo to eax to prepare for double precision
3300 __ movl(eax, in1_lo);
3301 // edx:eax <- in1.lo * in2.lo
3302 __ mull(in2_lo);
3303 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3304 __ addl(in1_hi, edx);
3305 // in1.lo <- (in1.lo * in2.lo)[31:0];
3306 __ movl(in1_lo, eax);
3307 }
3308
3309 break;
3310 }
3311
3312 case DataType::Type::kFloat32: {
3313 DCHECK(first.Equals(locations->Out()));
3314 if (second.IsFpuRegister()) {
3315 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3316 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3317 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3318 DCHECK(const_area->IsEmittedAtUseSite());
3319 __ mulss(first.AsFpuRegister<XmmRegister>(),
3320 codegen_->LiteralFloatAddress(
3321 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3322 const_area->GetBaseMethodAddress(),
3323 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3324 } else {
3325 DCHECK(second.IsStackSlot());
3326 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3327 }
3328 break;
3329 }
3330
3331 case DataType::Type::kFloat64: {
3332 DCHECK(first.Equals(locations->Out()));
3333 if (second.IsFpuRegister()) {
3334 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3335 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3336 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3337 DCHECK(const_area->IsEmittedAtUseSite());
3338 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3339 codegen_->LiteralDoubleAddress(
3340 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3341 const_area->GetBaseMethodAddress(),
3342 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3343 } else {
3344 DCHECK(second.IsDoubleStackSlot());
3345 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3346 }
3347 break;
3348 }
3349
3350 default:
3351 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3352 }
3353 }
3354
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3355 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3356 uint32_t temp_offset,
3357 uint32_t stack_adjustment,
3358 bool is_fp,
3359 bool is_wide) {
3360 if (source.IsStackSlot()) {
3361 DCHECK(!is_wide);
3362 if (is_fp) {
3363 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3364 } else {
3365 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3366 }
3367 } else if (source.IsDoubleStackSlot()) {
3368 DCHECK(is_wide);
3369 if (is_fp) {
3370 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3371 } else {
3372 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3373 }
3374 } else {
3375 // Write the value to the temporary location on the stack and load to FP stack.
3376 if (!is_wide) {
3377 Location stack_temp = Location::StackSlot(temp_offset);
3378 codegen_->Move32(stack_temp, source);
3379 if (is_fp) {
3380 __ flds(Address(ESP, temp_offset));
3381 } else {
3382 __ filds(Address(ESP, temp_offset));
3383 }
3384 } else {
3385 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3386 codegen_->Move64(stack_temp, source);
3387 if (is_fp) {
3388 __ fldl(Address(ESP, temp_offset));
3389 } else {
3390 __ fildl(Address(ESP, temp_offset));
3391 }
3392 }
3393 }
3394 }
3395
GenerateRemFP(HRem * rem)3396 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3397 DataType::Type type = rem->GetResultType();
3398 bool is_float = type == DataType::Type::kFloat32;
3399 size_t elem_size = DataType::Size(type);
3400 LocationSummary* locations = rem->GetLocations();
3401 Location first = locations->InAt(0);
3402 Location second = locations->InAt(1);
3403 Location out = locations->Out();
3404
3405 // Create stack space for 2 elements.
3406 // TODO: enhance register allocator to ask for stack temporaries.
3407 __ subl(ESP, Immediate(2 * elem_size));
3408
3409 // Load the values to the FP stack in reverse order, using temporaries if needed.
3410 const bool is_wide = !is_float;
3411 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide);
3412 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide);
3413
3414 // Loop doing FPREM until we stabilize.
3415 NearLabel retry;
3416 __ Bind(&retry);
3417 __ fprem();
3418
3419 // Move FP status to AX.
3420 __ fstsw();
3421
3422 // And see if the argument reduction is complete. This is signaled by the
3423 // C2 FPU flag bit set to 0.
3424 __ andl(EAX, Immediate(kC2ConditionMask));
3425 __ j(kNotEqual, &retry);
3426
3427 // We have settled on the final value. Retrieve it into an XMM register.
3428 // Store FP top of stack to real stack.
3429 if (is_float) {
3430 __ fsts(Address(ESP, 0));
3431 } else {
3432 __ fstl(Address(ESP, 0));
3433 }
3434
3435 // Pop the 2 items from the FP stack.
3436 __ fucompp();
3437
3438 // Load the value from the stack into an XMM register.
3439 DCHECK(out.IsFpuRegister()) << out;
3440 if (is_float) {
3441 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3442 } else {
3443 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3444 }
3445
3446 // And remove the temporary stack space we allocated.
3447 __ addl(ESP, Immediate(2 * elem_size));
3448 }
3449
3450
DivRemOneOrMinusOne(HBinaryOperation * instruction)3451 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3452 DCHECK(instruction->IsDiv() || instruction->IsRem());
3453
3454 LocationSummary* locations = instruction->GetLocations();
3455 DCHECK(locations->InAt(1).IsConstant());
3456 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3457
3458 Register out_register = locations->Out().AsRegister<Register>();
3459 Register input_register = locations->InAt(0).AsRegister<Register>();
3460 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3461
3462 DCHECK(imm == 1 || imm == -1);
3463
3464 if (instruction->IsRem()) {
3465 __ xorl(out_register, out_register);
3466 } else {
3467 __ movl(out_register, input_register);
3468 if (imm == -1) {
3469 __ negl(out_register);
3470 }
3471 }
3472 }
3473
3474
DivByPowerOfTwo(HDiv * instruction)3475 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
3476 LocationSummary* locations = instruction->GetLocations();
3477
3478 Register out_register = locations->Out().AsRegister<Register>();
3479 Register input_register = locations->InAt(0).AsRegister<Register>();
3480 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3481 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3482 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3483
3484 Register num = locations->GetTemp(0).AsRegister<Register>();
3485
3486 __ leal(num, Address(input_register, abs_imm - 1));
3487 __ testl(input_register, input_register);
3488 __ cmovl(kGreaterEqual, num, input_register);
3489 int shift = CTZ(imm);
3490 __ sarl(num, Immediate(shift));
3491
3492 if (imm < 0) {
3493 __ negl(num);
3494 }
3495
3496 __ movl(out_register, num);
3497 }
3498
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3499 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3500 DCHECK(instruction->IsDiv() || instruction->IsRem());
3501
3502 LocationSummary* locations = instruction->GetLocations();
3503 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3504
3505 Register eax = locations->InAt(0).AsRegister<Register>();
3506 Register out = locations->Out().AsRegister<Register>();
3507 Register num;
3508 Register edx;
3509
3510 if (instruction->IsDiv()) {
3511 edx = locations->GetTemp(0).AsRegister<Register>();
3512 num = locations->GetTemp(1).AsRegister<Register>();
3513 } else {
3514 edx = locations->Out().AsRegister<Register>();
3515 num = locations->GetTemp(0).AsRegister<Register>();
3516 }
3517
3518 DCHECK_EQ(EAX, eax);
3519 DCHECK_EQ(EDX, edx);
3520 if (instruction->IsDiv()) {
3521 DCHECK_EQ(EAX, out);
3522 } else {
3523 DCHECK_EQ(EDX, out);
3524 }
3525
3526 int64_t magic;
3527 int shift;
3528 CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3529
3530 // Save the numerator.
3531 __ movl(num, eax);
3532
3533 // EAX = magic
3534 __ movl(eax, Immediate(magic));
3535
3536 // EDX:EAX = magic * numerator
3537 __ imull(num);
3538
3539 if (imm > 0 && magic < 0) {
3540 // EDX += num
3541 __ addl(edx, num);
3542 } else if (imm < 0 && magic > 0) {
3543 __ subl(edx, num);
3544 }
3545
3546 // Shift if needed.
3547 if (shift != 0) {
3548 __ sarl(edx, Immediate(shift));
3549 }
3550
3551 // EDX += 1 if EDX < 0
3552 __ movl(eax, edx);
3553 __ shrl(edx, Immediate(31));
3554 __ addl(edx, eax);
3555
3556 if (instruction->IsRem()) {
3557 __ movl(eax, num);
3558 __ imull(edx, Immediate(imm));
3559 __ subl(eax, edx);
3560 __ movl(edx, eax);
3561 } else {
3562 __ movl(eax, edx);
3563 }
3564 }
3565
GenerateDivRemIntegral(HBinaryOperation * instruction)3566 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3567 DCHECK(instruction->IsDiv() || instruction->IsRem());
3568
3569 LocationSummary* locations = instruction->GetLocations();
3570 Location out = locations->Out();
3571 Location first = locations->InAt(0);
3572 Location second = locations->InAt(1);
3573 bool is_div = instruction->IsDiv();
3574
3575 switch (instruction->GetResultType()) {
3576 case DataType::Type::kInt32: {
3577 DCHECK_EQ(EAX, first.AsRegister<Register>());
3578 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
3579
3580 if (second.IsConstant()) {
3581 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
3582
3583 if (imm == 0) {
3584 // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
3585 } else if (imm == 1 || imm == -1) {
3586 DivRemOneOrMinusOne(instruction);
3587 } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
3588 DivByPowerOfTwo(instruction->AsDiv());
3589 } else {
3590 DCHECK(imm <= -2 || imm >= 2);
3591 GenerateDivRemWithAnyConstant(instruction);
3592 }
3593 } else {
3594 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
3595 instruction, out.AsRegister<Register>(), is_div);
3596 codegen_->AddSlowPath(slow_path);
3597
3598 Register second_reg = second.AsRegister<Register>();
3599 // 0x80000000/-1 triggers an arithmetic exception!
3600 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
3601 // it's safe to just use negl instead of more complex comparisons.
3602
3603 __ cmpl(second_reg, Immediate(-1));
3604 __ j(kEqual, slow_path->GetEntryLabel());
3605
3606 // edx:eax <- sign-extended of eax
3607 __ cdq();
3608 // eax = quotient, edx = remainder
3609 __ idivl(second_reg);
3610 __ Bind(slow_path->GetExitLabel());
3611 }
3612 break;
3613 }
3614
3615 case DataType::Type::kInt64: {
3616 InvokeRuntimeCallingConvention calling_convention;
3617 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
3618 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
3619 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
3620 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
3621 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
3622 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
3623
3624 if (is_div) {
3625 codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
3626 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
3627 } else {
3628 codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
3629 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
3630 }
3631 break;
3632 }
3633
3634 default:
3635 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
3636 }
3637 }
3638
VisitDiv(HDiv * div)3639 void LocationsBuilderX86::VisitDiv(HDiv* div) {
3640 LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
3641 ? LocationSummary::kCallOnMainOnly
3642 : LocationSummary::kNoCall;
3643 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
3644
3645 switch (div->GetResultType()) {
3646 case DataType::Type::kInt32: {
3647 locations->SetInAt(0, Location::RegisterLocation(EAX));
3648 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3649 locations->SetOut(Location::SameAsFirstInput());
3650 // Intel uses edx:eax as the dividend.
3651 locations->AddTemp(Location::RegisterLocation(EDX));
3652 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3653 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
3654 // output and request another temp.
3655 if (div->InputAt(1)->IsIntConstant()) {
3656 locations->AddTemp(Location::RequiresRegister());
3657 }
3658 break;
3659 }
3660 case DataType::Type::kInt64: {
3661 InvokeRuntimeCallingConvention calling_convention;
3662 locations->SetInAt(0, Location::RegisterPairLocation(
3663 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3664 locations->SetInAt(1, Location::RegisterPairLocation(
3665 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3666 // Runtime helper puts the result in EAX, EDX.
3667 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3668 break;
3669 }
3670 case DataType::Type::kFloat32:
3671 case DataType::Type::kFloat64: {
3672 locations->SetInAt(0, Location::RequiresFpuRegister());
3673 if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3674 DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
3675 } else if (div->InputAt(1)->IsConstant()) {
3676 locations->SetInAt(1, Location::RequiresFpuRegister());
3677 } else {
3678 locations->SetInAt(1, Location::Any());
3679 }
3680 locations->SetOut(Location::SameAsFirstInput());
3681 break;
3682 }
3683
3684 default:
3685 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3686 }
3687 }
3688
VisitDiv(HDiv * div)3689 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
3690 LocationSummary* locations = div->GetLocations();
3691 Location first = locations->InAt(0);
3692 Location second = locations->InAt(1);
3693
3694 switch (div->GetResultType()) {
3695 case DataType::Type::kInt32:
3696 case DataType::Type::kInt64: {
3697 GenerateDivRemIntegral(div);
3698 break;
3699 }
3700
3701 case DataType::Type::kFloat32: {
3702 if (second.IsFpuRegister()) {
3703 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3704 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3705 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3706 DCHECK(const_area->IsEmittedAtUseSite());
3707 __ divss(first.AsFpuRegister<XmmRegister>(),
3708 codegen_->LiteralFloatAddress(
3709 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3710 const_area->GetBaseMethodAddress(),
3711 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3712 } else {
3713 DCHECK(second.IsStackSlot());
3714 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3715 }
3716 break;
3717 }
3718
3719 case DataType::Type::kFloat64: {
3720 if (second.IsFpuRegister()) {
3721 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3722 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3723 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3724 DCHECK(const_area->IsEmittedAtUseSite());
3725 __ divsd(first.AsFpuRegister<XmmRegister>(),
3726 codegen_->LiteralDoubleAddress(
3727 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3728 const_area->GetBaseMethodAddress(),
3729 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3730 } else {
3731 DCHECK(second.IsDoubleStackSlot());
3732 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3733 }
3734 break;
3735 }
3736
3737 default:
3738 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3739 }
3740 }
3741
VisitRem(HRem * rem)3742 void LocationsBuilderX86::VisitRem(HRem* rem) {
3743 DataType::Type type = rem->GetResultType();
3744
3745 LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
3746 ? LocationSummary::kCallOnMainOnly
3747 : LocationSummary::kNoCall;
3748 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
3749
3750 switch (type) {
3751 case DataType::Type::kInt32: {
3752 locations->SetInAt(0, Location::RegisterLocation(EAX));
3753 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3754 locations->SetOut(Location::RegisterLocation(EDX));
3755 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3756 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
3757 // output and request another temp.
3758 if (rem->InputAt(1)->IsIntConstant()) {
3759 locations->AddTemp(Location::RequiresRegister());
3760 }
3761 break;
3762 }
3763 case DataType::Type::kInt64: {
3764 InvokeRuntimeCallingConvention calling_convention;
3765 locations->SetInAt(0, Location::RegisterPairLocation(
3766 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3767 locations->SetInAt(1, Location::RegisterPairLocation(
3768 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3769 // Runtime helper puts the result in EAX, EDX.
3770 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3771 break;
3772 }
3773 case DataType::Type::kFloat64:
3774 case DataType::Type::kFloat32: {
3775 locations->SetInAt(0, Location::Any());
3776 locations->SetInAt(1, Location::Any());
3777 locations->SetOut(Location::RequiresFpuRegister());
3778 locations->AddTemp(Location::RegisterLocation(EAX));
3779 break;
3780 }
3781
3782 default:
3783 LOG(FATAL) << "Unexpected rem type " << type;
3784 }
3785 }
3786
VisitRem(HRem * rem)3787 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
3788 DataType::Type type = rem->GetResultType();
3789 switch (type) {
3790 case DataType::Type::kInt32:
3791 case DataType::Type::kInt64: {
3792 GenerateDivRemIntegral(rem);
3793 break;
3794 }
3795 case DataType::Type::kFloat32:
3796 case DataType::Type::kFloat64: {
3797 GenerateRemFP(rem);
3798 break;
3799 }
3800 default:
3801 LOG(FATAL) << "Unexpected rem type " << type;
3802 }
3803 }
3804
VisitDivZeroCheck(HDivZeroCheck * instruction)3805 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3806 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3807 switch (instruction->GetType()) {
3808 case DataType::Type::kBool:
3809 case DataType::Type::kUint8:
3810 case DataType::Type::kInt8:
3811 case DataType::Type::kUint16:
3812 case DataType::Type::kInt16:
3813 case DataType::Type::kInt32: {
3814 locations->SetInAt(0, Location::Any());
3815 break;
3816 }
3817 case DataType::Type::kInt64: {
3818 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3819 if (!instruction->IsConstant()) {
3820 locations->AddTemp(Location::RequiresRegister());
3821 }
3822 break;
3823 }
3824 default:
3825 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3826 }
3827 }
3828
VisitDivZeroCheck(HDivZeroCheck * instruction)3829 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3830 SlowPathCode* slow_path =
3831 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
3832 codegen_->AddSlowPath(slow_path);
3833
3834 LocationSummary* locations = instruction->GetLocations();
3835 Location value = locations->InAt(0);
3836
3837 switch (instruction->GetType()) {
3838 case DataType::Type::kBool:
3839 case DataType::Type::kUint8:
3840 case DataType::Type::kInt8:
3841 case DataType::Type::kUint16:
3842 case DataType::Type::kInt16:
3843 case DataType::Type::kInt32: {
3844 if (value.IsRegister()) {
3845 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
3846 __ j(kEqual, slow_path->GetEntryLabel());
3847 } else if (value.IsStackSlot()) {
3848 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
3849 __ j(kEqual, slow_path->GetEntryLabel());
3850 } else {
3851 DCHECK(value.IsConstant()) << value;
3852 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3853 __ jmp(slow_path->GetEntryLabel());
3854 }
3855 }
3856 break;
3857 }
3858 case DataType::Type::kInt64: {
3859 if (value.IsRegisterPair()) {
3860 Register temp = locations->GetTemp(0).AsRegister<Register>();
3861 __ movl(temp, value.AsRegisterPairLow<Register>());
3862 __ orl(temp, value.AsRegisterPairHigh<Register>());
3863 __ j(kEqual, slow_path->GetEntryLabel());
3864 } else {
3865 DCHECK(value.IsConstant()) << value;
3866 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3867 __ jmp(slow_path->GetEntryLabel());
3868 }
3869 }
3870 break;
3871 }
3872 default:
3873 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
3874 }
3875 }
3876
HandleShift(HBinaryOperation * op)3877 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
3878 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3879
3880 LocationSummary* locations =
3881 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
3882
3883 switch (op->GetResultType()) {
3884 case DataType::Type::kInt32:
3885 case DataType::Type::kInt64: {
3886 // Can't have Location::Any() and output SameAsFirstInput()
3887 locations->SetInAt(0, Location::RequiresRegister());
3888 // The shift count needs to be in CL or a constant.
3889 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
3890 locations->SetOut(Location::SameAsFirstInput());
3891 break;
3892 }
3893 default:
3894 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
3895 }
3896 }
3897
HandleShift(HBinaryOperation * op)3898 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
3899 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3900
3901 LocationSummary* locations = op->GetLocations();
3902 Location first = locations->InAt(0);
3903 Location second = locations->InAt(1);
3904 DCHECK(first.Equals(locations->Out()));
3905
3906 switch (op->GetResultType()) {
3907 case DataType::Type::kInt32: {
3908 DCHECK(first.IsRegister());
3909 Register first_reg = first.AsRegister<Register>();
3910 if (second.IsRegister()) {
3911 Register second_reg = second.AsRegister<Register>();
3912 DCHECK_EQ(ECX, second_reg);
3913 if (op->IsShl()) {
3914 __ shll(first_reg, second_reg);
3915 } else if (op->IsShr()) {
3916 __ sarl(first_reg, second_reg);
3917 } else {
3918 __ shrl(first_reg, second_reg);
3919 }
3920 } else {
3921 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
3922 if (shift == 0) {
3923 return;
3924 }
3925 Immediate imm(shift);
3926 if (op->IsShl()) {
3927 __ shll(first_reg, imm);
3928 } else if (op->IsShr()) {
3929 __ sarl(first_reg, imm);
3930 } else {
3931 __ shrl(first_reg, imm);
3932 }
3933 }
3934 break;
3935 }
3936 case DataType::Type::kInt64: {
3937 if (second.IsRegister()) {
3938 Register second_reg = second.AsRegister<Register>();
3939 DCHECK_EQ(ECX, second_reg);
3940 if (op->IsShl()) {
3941 GenerateShlLong(first, second_reg);
3942 } else if (op->IsShr()) {
3943 GenerateShrLong(first, second_reg);
3944 } else {
3945 GenerateUShrLong(first, second_reg);
3946 }
3947 } else {
3948 // Shift by a constant.
3949 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
3950 // Nothing to do if the shift is 0, as the input is already the output.
3951 if (shift != 0) {
3952 if (op->IsShl()) {
3953 GenerateShlLong(first, shift);
3954 } else if (op->IsShr()) {
3955 GenerateShrLong(first, shift);
3956 } else {
3957 GenerateUShrLong(first, shift);
3958 }
3959 }
3960 }
3961 break;
3962 }
3963 default:
3964 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
3965 }
3966 }
3967
GenerateShlLong(const Location & loc,int shift)3968 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
3969 Register low = loc.AsRegisterPairLow<Register>();
3970 Register high = loc.AsRegisterPairHigh<Register>();
3971 if (shift == 1) {
3972 // This is just an addition.
3973 __ addl(low, low);
3974 __ adcl(high, high);
3975 } else if (shift == 32) {
3976 // Shift by 32 is easy. High gets low, and low gets 0.
3977 codegen_->EmitParallelMoves(
3978 loc.ToLow(),
3979 loc.ToHigh(),
3980 DataType::Type::kInt32,
3981 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
3982 loc.ToLow(),
3983 DataType::Type::kInt32);
3984 } else if (shift > 32) {
3985 // Low part becomes 0. High part is low part << (shift-32).
3986 __ movl(high, low);
3987 __ shll(high, Immediate(shift - 32));
3988 __ xorl(low, low);
3989 } else {
3990 // Between 1 and 31.
3991 __ shld(high, low, Immediate(shift));
3992 __ shll(low, Immediate(shift));
3993 }
3994 }
3995
GenerateShlLong(const Location & loc,Register shifter)3996 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
3997 NearLabel done;
3998 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
3999 __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4000 __ testl(shifter, Immediate(32));
4001 __ j(kEqual, &done);
4002 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4003 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4004 __ Bind(&done);
4005 }
4006
GenerateShrLong(const Location & loc,int shift)4007 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4008 Register low = loc.AsRegisterPairLow<Register>();
4009 Register high = loc.AsRegisterPairHigh<Register>();
4010 if (shift == 32) {
4011 // Need to copy the sign.
4012 DCHECK_NE(low, high);
4013 __ movl(low, high);
4014 __ sarl(high, Immediate(31));
4015 } else if (shift > 32) {
4016 DCHECK_NE(low, high);
4017 // High part becomes sign. Low part is shifted by shift - 32.
4018 __ movl(low, high);
4019 __ sarl(high, Immediate(31));
4020 __ sarl(low, Immediate(shift - 32));
4021 } else {
4022 // Between 1 and 31.
4023 __ shrd(low, high, Immediate(shift));
4024 __ sarl(high, Immediate(shift));
4025 }
4026 }
4027
GenerateShrLong(const Location & loc,Register shifter)4028 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4029 NearLabel done;
4030 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4031 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4032 __ testl(shifter, Immediate(32));
4033 __ j(kEqual, &done);
4034 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4035 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4036 __ Bind(&done);
4037 }
4038
GenerateUShrLong(const Location & loc,int shift)4039 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4040 Register low = loc.AsRegisterPairLow<Register>();
4041 Register high = loc.AsRegisterPairHigh<Register>();
4042 if (shift == 32) {
4043 // Shift by 32 is easy. Low gets high, and high gets 0.
4044 codegen_->EmitParallelMoves(
4045 loc.ToHigh(),
4046 loc.ToLow(),
4047 DataType::Type::kInt32,
4048 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4049 loc.ToHigh(),
4050 DataType::Type::kInt32);
4051 } else if (shift > 32) {
4052 // Low part is high >> (shift - 32). High part becomes 0.
4053 __ movl(low, high);
4054 __ shrl(low, Immediate(shift - 32));
4055 __ xorl(high, high);
4056 } else {
4057 // Between 1 and 31.
4058 __ shrd(low, high, Immediate(shift));
4059 __ shrl(high, Immediate(shift));
4060 }
4061 }
4062
GenerateUShrLong(const Location & loc,Register shifter)4063 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4064 NearLabel done;
4065 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4066 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4067 __ testl(shifter, Immediate(32));
4068 __ j(kEqual, &done);
4069 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4070 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4071 __ Bind(&done);
4072 }
4073
VisitRor(HRor * ror)4074 void LocationsBuilderX86::VisitRor(HRor* ror) {
4075 LocationSummary* locations =
4076 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4077
4078 switch (ror->GetResultType()) {
4079 case DataType::Type::kInt64:
4080 // Add the temporary needed.
4081 locations->AddTemp(Location::RequiresRegister());
4082 FALLTHROUGH_INTENDED;
4083 case DataType::Type::kInt32:
4084 locations->SetInAt(0, Location::RequiresRegister());
4085 // The shift count needs to be in CL (unless it is a constant).
4086 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4087 locations->SetOut(Location::SameAsFirstInput());
4088 break;
4089 default:
4090 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4091 UNREACHABLE();
4092 }
4093 }
4094
VisitRor(HRor * ror)4095 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4096 LocationSummary* locations = ror->GetLocations();
4097 Location first = locations->InAt(0);
4098 Location second = locations->InAt(1);
4099
4100 if (ror->GetResultType() == DataType::Type::kInt32) {
4101 Register first_reg = first.AsRegister<Register>();
4102 if (second.IsRegister()) {
4103 Register second_reg = second.AsRegister<Register>();
4104 __ rorl(first_reg, second_reg);
4105 } else {
4106 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4107 __ rorl(first_reg, imm);
4108 }
4109 return;
4110 }
4111
4112 DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64);
4113 Register first_reg_lo = first.AsRegisterPairLow<Register>();
4114 Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4115 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4116 if (second.IsRegister()) {
4117 Register second_reg = second.AsRegister<Register>();
4118 DCHECK_EQ(second_reg, ECX);
4119 __ movl(temp_reg, first_reg_hi);
4120 __ shrd(first_reg_hi, first_reg_lo, second_reg);
4121 __ shrd(first_reg_lo, temp_reg, second_reg);
4122 __ movl(temp_reg, first_reg_hi);
4123 __ testl(second_reg, Immediate(32));
4124 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4125 __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4126 } else {
4127 int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4128 if (shift_amt == 0) {
4129 // Already fine.
4130 return;
4131 }
4132 if (shift_amt == 32) {
4133 // Just swap.
4134 __ movl(temp_reg, first_reg_lo);
4135 __ movl(first_reg_lo, first_reg_hi);
4136 __ movl(first_reg_hi, temp_reg);
4137 return;
4138 }
4139
4140 Immediate imm(shift_amt);
4141 // Save the constents of the low value.
4142 __ movl(temp_reg, first_reg_lo);
4143
4144 // Shift right into low, feeding bits from high.
4145 __ shrd(first_reg_lo, first_reg_hi, imm);
4146
4147 // Shift right into high, feeding bits from the original low.
4148 __ shrd(first_reg_hi, temp_reg, imm);
4149
4150 // Swap if needed.
4151 if (shift_amt > 32) {
4152 __ movl(temp_reg, first_reg_lo);
4153 __ movl(first_reg_lo, first_reg_hi);
4154 __ movl(first_reg_hi, temp_reg);
4155 }
4156 }
4157 }
4158
VisitShl(HShl * shl)4159 void LocationsBuilderX86::VisitShl(HShl* shl) {
4160 HandleShift(shl);
4161 }
4162
VisitShl(HShl * shl)4163 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
4164 HandleShift(shl);
4165 }
4166
VisitShr(HShr * shr)4167 void LocationsBuilderX86::VisitShr(HShr* shr) {
4168 HandleShift(shr);
4169 }
4170
VisitShr(HShr * shr)4171 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
4172 HandleShift(shr);
4173 }
4174
VisitUShr(HUShr * ushr)4175 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
4176 HandleShift(ushr);
4177 }
4178
VisitUShr(HUShr * ushr)4179 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
4180 HandleShift(ushr);
4181 }
4182
VisitNewInstance(HNewInstance * instruction)4183 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
4184 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4185 instruction, LocationSummary::kCallOnMainOnly);
4186 locations->SetOut(Location::RegisterLocation(EAX));
4187 if (instruction->IsStringAlloc()) {
4188 locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
4189 } else {
4190 InvokeRuntimeCallingConvention calling_convention;
4191 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4192 }
4193 }
4194
VisitNewInstance(HNewInstance * instruction)4195 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
4196 // Note: if heap poisoning is enabled, the entry point takes cares
4197 // of poisoning the reference.
4198 if (instruction->IsStringAlloc()) {
4199 // String is allocated through StringFactory. Call NewEmptyString entry point.
4200 Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
4201 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize);
4202 __ fs()->movl(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString)));
4203 __ call(Address(temp, code_offset.Int32Value()));
4204 codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
4205 } else {
4206 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4207 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4208 DCHECK(!codegen_->IsLeafMethod());
4209 }
4210 }
4211
VisitNewArray(HNewArray * instruction)4212 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
4213 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4214 instruction, LocationSummary::kCallOnMainOnly);
4215 locations->SetOut(Location::RegisterLocation(EAX));
4216 InvokeRuntimeCallingConvention calling_convention;
4217 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4218 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4219 }
4220
VisitNewArray(HNewArray * instruction)4221 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
4222 // Note: if heap poisoning is enabled, the entry point takes cares
4223 // of poisoning the reference.
4224 QuickEntrypointEnum entrypoint =
4225 CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
4226 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4227 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4228 DCHECK(!codegen_->IsLeafMethod());
4229 }
4230
VisitParameterValue(HParameterValue * instruction)4231 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
4232 LocationSummary* locations =
4233 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4234 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4235 if (location.IsStackSlot()) {
4236 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4237 } else if (location.IsDoubleStackSlot()) {
4238 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4239 }
4240 locations->SetOut(location);
4241 }
4242
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4243 void InstructionCodeGeneratorX86::VisitParameterValue(
4244 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4245 }
4246
VisitCurrentMethod(HCurrentMethod * instruction)4247 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
4248 LocationSummary* locations =
4249 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4250 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4251 }
4252
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4253 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4254 }
4255
VisitClassTableGet(HClassTableGet * instruction)4256 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
4257 LocationSummary* locations =
4258 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4259 locations->SetInAt(0, Location::RequiresRegister());
4260 locations->SetOut(Location::RequiresRegister());
4261 }
4262
VisitClassTableGet(HClassTableGet * instruction)4263 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
4264 LocationSummary* locations = instruction->GetLocations();
4265 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4266 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4267 instruction->GetIndex(), kX86PointerSize).SizeValue();
4268 __ movl(locations->Out().AsRegister<Register>(),
4269 Address(locations->InAt(0).AsRegister<Register>(), method_offset));
4270 } else {
4271 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4272 instruction->GetIndex(), kX86PointerSize));
4273 __ movl(locations->Out().AsRegister<Register>(),
4274 Address(locations->InAt(0).AsRegister<Register>(),
4275 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
4276 // temp = temp->GetImtEntryAt(method_offset);
4277 __ movl(locations->Out().AsRegister<Register>(),
4278 Address(locations->Out().AsRegister<Register>(), method_offset));
4279 }
4280 }
4281
VisitNot(HNot * not_)4282 void LocationsBuilderX86::VisitNot(HNot* not_) {
4283 LocationSummary* locations =
4284 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4285 locations->SetInAt(0, Location::RequiresRegister());
4286 locations->SetOut(Location::SameAsFirstInput());
4287 }
4288
VisitNot(HNot * not_)4289 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
4290 LocationSummary* locations = not_->GetLocations();
4291 Location in = locations->InAt(0);
4292 Location out = locations->Out();
4293 DCHECK(in.Equals(out));
4294 switch (not_->GetResultType()) {
4295 case DataType::Type::kInt32:
4296 __ notl(out.AsRegister<Register>());
4297 break;
4298
4299 case DataType::Type::kInt64:
4300 __ notl(out.AsRegisterPairLow<Register>());
4301 __ notl(out.AsRegisterPairHigh<Register>());
4302 break;
4303
4304 default:
4305 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4306 }
4307 }
4308
VisitBooleanNot(HBooleanNot * bool_not)4309 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
4310 LocationSummary* locations =
4311 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4312 locations->SetInAt(0, Location::RequiresRegister());
4313 locations->SetOut(Location::SameAsFirstInput());
4314 }
4315
VisitBooleanNot(HBooleanNot * bool_not)4316 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
4317 LocationSummary* locations = bool_not->GetLocations();
4318 Location in = locations->InAt(0);
4319 Location out = locations->Out();
4320 DCHECK(in.Equals(out));
4321 __ xorl(out.AsRegister<Register>(), Immediate(1));
4322 }
4323
VisitCompare(HCompare * compare)4324 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
4325 LocationSummary* locations =
4326 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
4327 switch (compare->InputAt(0)->GetType()) {
4328 case DataType::Type::kBool:
4329 case DataType::Type::kUint8:
4330 case DataType::Type::kInt8:
4331 case DataType::Type::kUint16:
4332 case DataType::Type::kInt16:
4333 case DataType::Type::kInt32:
4334 case DataType::Type::kInt64: {
4335 locations->SetInAt(0, Location::RequiresRegister());
4336 locations->SetInAt(1, Location::Any());
4337 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4338 break;
4339 }
4340 case DataType::Type::kFloat32:
4341 case DataType::Type::kFloat64: {
4342 locations->SetInAt(0, Location::RequiresFpuRegister());
4343 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
4344 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
4345 } else if (compare->InputAt(1)->IsConstant()) {
4346 locations->SetInAt(1, Location::RequiresFpuRegister());
4347 } else {
4348 locations->SetInAt(1, Location::Any());
4349 }
4350 locations->SetOut(Location::RequiresRegister());
4351 break;
4352 }
4353 default:
4354 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4355 }
4356 }
4357
VisitCompare(HCompare * compare)4358 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
4359 LocationSummary* locations = compare->GetLocations();
4360 Register out = locations->Out().AsRegister<Register>();
4361 Location left = locations->InAt(0);
4362 Location right = locations->InAt(1);
4363
4364 NearLabel less, greater, done;
4365 Condition less_cond = kLess;
4366
4367 switch (compare->InputAt(0)->GetType()) {
4368 case DataType::Type::kBool:
4369 case DataType::Type::kUint8:
4370 case DataType::Type::kInt8:
4371 case DataType::Type::kUint16:
4372 case DataType::Type::kInt16:
4373 case DataType::Type::kInt32: {
4374 codegen_->GenerateIntCompare(left, right);
4375 break;
4376 }
4377 case DataType::Type::kInt64: {
4378 Register left_low = left.AsRegisterPairLow<Register>();
4379 Register left_high = left.AsRegisterPairHigh<Register>();
4380 int32_t val_low = 0;
4381 int32_t val_high = 0;
4382 bool right_is_const = false;
4383
4384 if (right.IsConstant()) {
4385 DCHECK(right.GetConstant()->IsLongConstant());
4386 right_is_const = true;
4387 int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
4388 val_low = Low32Bits(val);
4389 val_high = High32Bits(val);
4390 }
4391
4392 if (right.IsRegisterPair()) {
4393 __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
4394 } else if (right.IsDoubleStackSlot()) {
4395 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
4396 } else {
4397 DCHECK(right_is_const) << right;
4398 codegen_->Compare32BitValue(left_high, val_high);
4399 }
4400 __ j(kLess, &less); // Signed compare.
4401 __ j(kGreater, &greater); // Signed compare.
4402 if (right.IsRegisterPair()) {
4403 __ cmpl(left_low, right.AsRegisterPairLow<Register>());
4404 } else if (right.IsDoubleStackSlot()) {
4405 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
4406 } else {
4407 DCHECK(right_is_const) << right;
4408 codegen_->Compare32BitValue(left_low, val_low);
4409 }
4410 less_cond = kBelow; // for CF (unsigned).
4411 break;
4412 }
4413 case DataType::Type::kFloat32: {
4414 GenerateFPCompare(left, right, compare, false);
4415 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4416 less_cond = kBelow; // for CF (floats).
4417 break;
4418 }
4419 case DataType::Type::kFloat64: {
4420 GenerateFPCompare(left, right, compare, true);
4421 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4422 less_cond = kBelow; // for CF (floats).
4423 break;
4424 }
4425 default:
4426 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4427 }
4428
4429 __ movl(out, Immediate(0));
4430 __ j(kEqual, &done);
4431 __ j(less_cond, &less);
4432
4433 __ Bind(&greater);
4434 __ movl(out, Immediate(1));
4435 __ jmp(&done);
4436
4437 __ Bind(&less);
4438 __ movl(out, Immediate(-1));
4439
4440 __ Bind(&done);
4441 }
4442
VisitPhi(HPhi * instruction)4443 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
4444 LocationSummary* locations =
4445 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4446 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4447 locations->SetInAt(i, Location::Any());
4448 }
4449 locations->SetOut(Location::Any());
4450 }
4451
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4452 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4453 LOG(FATAL) << "Unreachable";
4454 }
4455
GenerateMemoryBarrier(MemBarrierKind kind)4456 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
4457 /*
4458 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4459 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
4460 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4461 */
4462 switch (kind) {
4463 case MemBarrierKind::kAnyAny: {
4464 MemoryFence();
4465 break;
4466 }
4467 case MemBarrierKind::kAnyStore:
4468 case MemBarrierKind::kLoadAny:
4469 case MemBarrierKind::kStoreStore: {
4470 // nop
4471 break;
4472 }
4473 case MemBarrierKind::kNTStoreStore:
4474 // Non-Temporal Store/Store needs an explicit fence.
4475 MemoryFence(/* non-temporal */ true);
4476 break;
4477 }
4478 }
4479
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,HInvokeStaticOrDirect * invoke ATTRIBUTE_UNUSED)4480 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
4481 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4482 HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
4483 return desired_dispatch_info;
4484 }
4485
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)4486 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
4487 Register temp) {
4488 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
4489 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4490 if (!invoke->GetLocations()->Intrinsified()) {
4491 return location.AsRegister<Register>();
4492 }
4493 // For intrinsics we allow any location, so it may be on the stack.
4494 if (!location.IsRegister()) {
4495 __ movl(temp, Address(ESP, location.GetStackIndex()));
4496 return temp;
4497 }
4498 // For register locations, check if the register was saved. If so, get it from the stack.
4499 // Note: There is a chance that the register was saved but not overwritten, so we could
4500 // save one load. However, since this is just an intrinsic slow path we prefer this
4501 // simple and more robust approach rather that trying to determine if that's the case.
4502 SlowPathCode* slow_path = GetCurrentSlowPath();
4503 DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
4504 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
4505 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
4506 __ movl(temp, Address(ESP, stack_offset));
4507 return temp;
4508 }
4509 return location.AsRegister<Register>();
4510 }
4511
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4512 void CodeGeneratorX86::GenerateStaticOrDirectCall(
4513 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4514 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
4515 switch (invoke->GetMethodLoadKind()) {
4516 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4517 // temp = thread->string_init_entrypoint
4518 uint32_t offset =
4519 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4520 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
4521 break;
4522 }
4523 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4524 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4525 break;
4526 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
4527 DCHECK(GetCompilerOptions().IsBootImage());
4528 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4529 temp.AsRegister<Register>());
4530 __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset));
4531 RecordBootImageMethodPatch(invoke);
4532 break;
4533 }
4534 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
4535 __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
4536 break;
4537 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
4538 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4539 temp.AsRegister<Register>());
4540 __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
4541 RecordMethodBssEntryPatch(invoke);
4542 break;
4543 }
4544 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
4545 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
4546 return; // No code pointer retrieval; the runtime performs the call directly.
4547 }
4548 }
4549
4550 switch (invoke->GetCodePtrLocation()) {
4551 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
4552 __ call(GetFrameEntryLabel());
4553 break;
4554 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
4555 // (callee_method + offset_of_quick_compiled_code)()
4556 __ call(Address(callee_method.AsRegister<Register>(),
4557 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
4558 kX86PointerSize).Int32Value()));
4559 break;
4560 }
4561 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4562
4563 DCHECK(!IsLeafMethod());
4564 }
4565
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)4566 void CodeGeneratorX86::GenerateVirtualCall(
4567 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
4568 Register temp = temp_in.AsRegister<Register>();
4569 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4570 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
4571
4572 // Use the calling convention instead of the location of the receiver, as
4573 // intrinsics may have put the receiver in a different register. In the intrinsics
4574 // slow path, the arguments have been moved to the right place, so here we are
4575 // guaranteed that the receiver is the first register of the calling convention.
4576 InvokeDexCallingConvention calling_convention;
4577 Register receiver = calling_convention.GetRegisterAt(0);
4578 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4579 // /* HeapReference<Class> */ temp = receiver->klass_
4580 __ movl(temp, Address(receiver, class_offset));
4581 MaybeRecordImplicitNullCheck(invoke);
4582 // Instead of simply (possibly) unpoisoning `temp` here, we should
4583 // emit a read barrier for the previous class reference load.
4584 // However this is not required in practice, as this is an
4585 // intermediate/temporary reference and because the current
4586 // concurrent copying collector keeps the from-space memory
4587 // intact/accessible until the end of the marking phase (the
4588 // concurrent copying collector may not in the future).
4589 __ MaybeUnpoisonHeapReference(temp);
4590 // temp = temp->GetMethodAt(method_offset);
4591 __ movl(temp, Address(temp, method_offset));
4592 // call temp->GetEntryPoint();
4593 __ call(Address(
4594 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
4595 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4596 }
4597
RecordBootImageMethodPatch(HInvokeStaticOrDirect * invoke)4598 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
4599 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
4600 HX86ComputeBaseMethodAddress* method_address =
4601 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
4602 boot_image_method_patches_.emplace_back(
4603 method_address, invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
4604 __ Bind(&boot_image_method_patches_.back().label);
4605 }
4606
RecordMethodBssEntryPatch(HInvokeStaticOrDirect * invoke)4607 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
4608 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
4609 HX86ComputeBaseMethodAddress* method_address =
4610 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
4611 // Add the patch entry and bind its label at the end of the instruction.
4612 method_bss_entry_patches_.emplace_back(
4613 method_address, &GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
4614 __ Bind(&method_bss_entry_patches_.back().label);
4615 }
4616
RecordBootImageTypePatch(HLoadClass * load_class)4617 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
4618 HX86ComputeBaseMethodAddress* method_address =
4619 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
4620 boot_image_type_patches_.emplace_back(
4621 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
4622 __ Bind(&boot_image_type_patches_.back().label);
4623 }
4624
NewTypeBssEntryPatch(HLoadClass * load_class)4625 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
4626 HX86ComputeBaseMethodAddress* method_address =
4627 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
4628 type_bss_entry_patches_.emplace_back(
4629 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
4630 return &type_bss_entry_patches_.back().label;
4631 }
4632
RecordBootImageStringPatch(HLoadString * load_string)4633 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
4634 HX86ComputeBaseMethodAddress* method_address =
4635 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
4636 boot_image_string_patches_.emplace_back(
4637 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
4638 __ Bind(&boot_image_string_patches_.back().label);
4639 }
4640
NewStringBssEntryPatch(HLoadString * load_string)4641 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
4642 DCHECK(!GetCompilerOptions().IsBootImage());
4643 HX86ComputeBaseMethodAddress* method_address =
4644 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
4645 string_bss_entry_patches_.emplace_back(
4646 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
4647 return &string_bss_entry_patches_.back().label;
4648 }
4649
4650 // The label points to the end of the "movl" or another instruction but the literal offset
4651 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
4652 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
4653
4654 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)4655 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
4656 const ArenaDeque<X86PcRelativePatchInfo>& infos,
4657 ArenaVector<linker::LinkerPatch>* linker_patches) {
4658 for (const X86PcRelativePatchInfo& info : infos) {
4659 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
4660 linker_patches->push_back(Factory(literal_offset,
4661 info.target_dex_file,
4662 GetMethodAddressOffset(info.method_address),
4663 info.offset_or_index));
4664 }
4665 }
4666
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)4667 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
4668 DCHECK(linker_patches->empty());
4669 size_t size =
4670 boot_image_method_patches_.size() +
4671 method_bss_entry_patches_.size() +
4672 boot_image_type_patches_.size() +
4673 type_bss_entry_patches_.size() +
4674 boot_image_string_patches_.size() +
4675 string_bss_entry_patches_.size();
4676 linker_patches->reserve(size);
4677 if (GetCompilerOptions().IsBootImage()) {
4678 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
4679 boot_image_method_patches_, linker_patches);
4680 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
4681 boot_image_type_patches_, linker_patches);
4682 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
4683 boot_image_string_patches_, linker_patches);
4684 } else {
4685 DCHECK(boot_image_method_patches_.empty());
4686 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
4687 boot_image_type_patches_, linker_patches);
4688 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
4689 boot_image_string_patches_, linker_patches);
4690 }
4691 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
4692 method_bss_entry_patches_, linker_patches);
4693 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
4694 type_bss_entry_patches_, linker_patches);
4695 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
4696 string_bss_entry_patches_, linker_patches);
4697 DCHECK_EQ(size, linker_patches->size());
4698 }
4699
MarkGCCard(Register temp,Register card,Register object,Register value,bool value_can_be_null)4700 void CodeGeneratorX86::MarkGCCard(Register temp,
4701 Register card,
4702 Register object,
4703 Register value,
4704 bool value_can_be_null) {
4705 NearLabel is_null;
4706 if (value_can_be_null) {
4707 __ testl(value, value);
4708 __ j(kEqual, &is_null);
4709 }
4710 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
4711 __ movl(temp, object);
4712 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
4713 __ movb(Address(temp, card, TIMES_1, 0),
4714 X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
4715 if (value_can_be_null) {
4716 __ Bind(&is_null);
4717 }
4718 }
4719
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4720 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
4721 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4722
4723 bool object_field_get_with_read_barrier =
4724 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4725 LocationSummary* locations =
4726 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
4727 kEmitCompilerReadBarrier
4728 ? LocationSummary::kCallOnSlowPath
4729 : LocationSummary::kNoCall);
4730 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4731 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4732 }
4733 locations->SetInAt(0, Location::RequiresRegister());
4734
4735 if (DataType::IsFloatingPointType(instruction->GetType())) {
4736 locations->SetOut(Location::RequiresFpuRegister());
4737 } else {
4738 // The output overlaps in case of long: we don't want the low move
4739 // to overwrite the object's location. Likewise, in the case of
4740 // an object field get with read barriers enabled, we do not want
4741 // the move to overwrite the object's location, as we need it to emit
4742 // the read barrier.
4743 locations->SetOut(
4744 Location::RequiresRegister(),
4745 (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64) ?
4746 Location::kOutputOverlap :
4747 Location::kNoOutputOverlap);
4748 }
4749
4750 if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
4751 // Long values can be loaded atomically into an XMM using movsd.
4752 // So we use an XMM register as a temp to achieve atomicity (first
4753 // load the temp into the XMM and then copy the XMM into the
4754 // output, 32 bits at a time).
4755 locations->AddTemp(Location::RequiresFpuRegister());
4756 }
4757 }
4758
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4759 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
4760 const FieldInfo& field_info) {
4761 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4762
4763 LocationSummary* locations = instruction->GetLocations();
4764 Location base_loc = locations->InAt(0);
4765 Register base = base_loc.AsRegister<Register>();
4766 Location out = locations->Out();
4767 bool is_volatile = field_info.IsVolatile();
4768 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
4769 DataType::Type load_type = instruction->GetType();
4770 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4771
4772 switch (load_type) {
4773 case DataType::Type::kBool:
4774 case DataType::Type::kUint8: {
4775 __ movzxb(out.AsRegister<Register>(), Address(base, offset));
4776 break;
4777 }
4778
4779 case DataType::Type::kInt8: {
4780 __ movsxb(out.AsRegister<Register>(), Address(base, offset));
4781 break;
4782 }
4783
4784 case DataType::Type::kUint16: {
4785 __ movzxw(out.AsRegister<Register>(), Address(base, offset));
4786 break;
4787 }
4788
4789 case DataType::Type::kInt16: {
4790 __ movsxw(out.AsRegister<Register>(), Address(base, offset));
4791 break;
4792 }
4793
4794 case DataType::Type::kInt32:
4795 __ movl(out.AsRegister<Register>(), Address(base, offset));
4796 break;
4797
4798 case DataType::Type::kReference: {
4799 // /* HeapReference<Object> */ out = *(base + offset)
4800 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4801 // Note that a potential implicit null check is handled in this
4802 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
4803 codegen_->GenerateFieldLoadWithBakerReadBarrier(
4804 instruction, out, base, offset, /* needs_null_check */ true);
4805 if (is_volatile) {
4806 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4807 }
4808 } else {
4809 __ movl(out.AsRegister<Register>(), Address(base, offset));
4810 codegen_->MaybeRecordImplicitNullCheck(instruction);
4811 if (is_volatile) {
4812 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4813 }
4814 // If read barriers are enabled, emit read barriers other than
4815 // Baker's using a slow path (and also unpoison the loaded
4816 // reference, if heap poisoning is enabled).
4817 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4818 }
4819 break;
4820 }
4821
4822 case DataType::Type::kInt64: {
4823 if (is_volatile) {
4824 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4825 __ movsd(temp, Address(base, offset));
4826 codegen_->MaybeRecordImplicitNullCheck(instruction);
4827 __ movd(out.AsRegisterPairLow<Register>(), temp);
4828 __ psrlq(temp, Immediate(32));
4829 __ movd(out.AsRegisterPairHigh<Register>(), temp);
4830 } else {
4831 DCHECK_NE(base, out.AsRegisterPairLow<Register>());
4832 __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset));
4833 codegen_->MaybeRecordImplicitNullCheck(instruction);
4834 __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset));
4835 }
4836 break;
4837 }
4838
4839 case DataType::Type::kFloat32: {
4840 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4841 break;
4842 }
4843
4844 case DataType::Type::kFloat64: {
4845 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4846 break;
4847 }
4848
4849 case DataType::Type::kUint32:
4850 case DataType::Type::kUint64:
4851 case DataType::Type::kVoid:
4852 LOG(FATAL) << "Unreachable type " << load_type;
4853 UNREACHABLE();
4854 }
4855
4856 if (load_type == DataType::Type::kReference || load_type == DataType::Type::kInt64) {
4857 // Potential implicit null checks, in the case of reference or
4858 // long fields, are handled in the previous switch statement.
4859 } else {
4860 codegen_->MaybeRecordImplicitNullCheck(instruction);
4861 }
4862
4863 if (is_volatile) {
4864 if (load_type == DataType::Type::kReference) {
4865 // Memory barriers, in the case of references, are also handled
4866 // in the previous switch statement.
4867 } else {
4868 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4869 }
4870 }
4871 }
4872
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4873 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
4874 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4875
4876 LocationSummary* locations =
4877 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4878 locations->SetInAt(0, Location::RequiresRegister());
4879 bool is_volatile = field_info.IsVolatile();
4880 DataType::Type field_type = field_info.GetFieldType();
4881 bool is_byte_type = DataType::Size(field_type) == 1u;
4882
4883 // The register allocator does not support multiple
4884 // inputs that die at entry with one in a specific register.
4885 if (is_byte_type) {
4886 // Ensure the value is in a byte register.
4887 locations->SetInAt(1, Location::RegisterLocation(EAX));
4888 } else if (DataType::IsFloatingPointType(field_type)) {
4889 if (is_volatile && field_type == DataType::Type::kFloat64) {
4890 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4891 locations->SetInAt(1, Location::RequiresFpuRegister());
4892 } else {
4893 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4894 }
4895 } else if (is_volatile && field_type == DataType::Type::kInt64) {
4896 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4897 locations->SetInAt(1, Location::RequiresRegister());
4898
4899 // 64bits value can be atomically written to an address with movsd and an XMM register.
4900 // We need two XMM registers because there's no easier way to (bit) copy a register pair
4901 // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
4902 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
4903 // isolated cases when we need this it isn't worth adding the extra complexity.
4904 locations->AddTemp(Location::RequiresFpuRegister());
4905 locations->AddTemp(Location::RequiresFpuRegister());
4906 } else {
4907 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4908
4909 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4910 // Temporary registers for the write barrier.
4911 locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too.
4912 // Ensure the card is in a byte register.
4913 locations->AddTemp(Location::RegisterLocation(ECX));
4914 }
4915 }
4916 }
4917
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4918 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
4919 const FieldInfo& field_info,
4920 bool value_can_be_null) {
4921 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4922
4923 LocationSummary* locations = instruction->GetLocations();
4924 Register base = locations->InAt(0).AsRegister<Register>();
4925 Location value = locations->InAt(1);
4926 bool is_volatile = field_info.IsVolatile();
4927 DataType::Type field_type = field_info.GetFieldType();
4928 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4929 bool needs_write_barrier =
4930 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4931
4932 if (is_volatile) {
4933 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4934 }
4935
4936 bool maybe_record_implicit_null_check_done = false;
4937
4938 switch (field_type) {
4939 case DataType::Type::kBool:
4940 case DataType::Type::kUint8:
4941 case DataType::Type::kInt8: {
4942 __ movb(Address(base, offset), value.AsRegister<ByteRegister>());
4943 break;
4944 }
4945
4946 case DataType::Type::kUint16:
4947 case DataType::Type::kInt16: {
4948 if (value.IsConstant()) {
4949 __ movw(Address(base, offset),
4950 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
4951 } else {
4952 __ movw(Address(base, offset), value.AsRegister<Register>());
4953 }
4954 break;
4955 }
4956
4957 case DataType::Type::kInt32:
4958 case DataType::Type::kReference: {
4959 if (kPoisonHeapReferences && needs_write_barrier) {
4960 // Note that in the case where `value` is a null reference,
4961 // we do not enter this block, as the reference does not
4962 // need poisoning.
4963 DCHECK_EQ(field_type, DataType::Type::kReference);
4964 Register temp = locations->GetTemp(0).AsRegister<Register>();
4965 __ movl(temp, value.AsRegister<Register>());
4966 __ PoisonHeapReference(temp);
4967 __ movl(Address(base, offset), temp);
4968 } else if (value.IsConstant()) {
4969 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4970 __ movl(Address(base, offset), Immediate(v));
4971 } else {
4972 DCHECK(value.IsRegister()) << value;
4973 __ movl(Address(base, offset), value.AsRegister<Register>());
4974 }
4975 break;
4976 }
4977
4978 case DataType::Type::kInt64: {
4979 if (is_volatile) {
4980 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4981 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
4982 __ movd(temp1, value.AsRegisterPairLow<Register>());
4983 __ movd(temp2, value.AsRegisterPairHigh<Register>());
4984 __ punpckldq(temp1, temp2);
4985 __ movsd(Address(base, offset), temp1);
4986 codegen_->MaybeRecordImplicitNullCheck(instruction);
4987 } else if (value.IsConstant()) {
4988 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
4989 __ movl(Address(base, offset), Immediate(Low32Bits(v)));
4990 codegen_->MaybeRecordImplicitNullCheck(instruction);
4991 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
4992 } else {
4993 __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
4994 codegen_->MaybeRecordImplicitNullCheck(instruction);
4995 __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
4996 }
4997 maybe_record_implicit_null_check_done = true;
4998 break;
4999 }
5000
5001 case DataType::Type::kFloat32: {
5002 if (value.IsConstant()) {
5003 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5004 __ movl(Address(base, offset), Immediate(v));
5005 } else {
5006 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5007 }
5008 break;
5009 }
5010
5011 case DataType::Type::kFloat64: {
5012 if (value.IsConstant()) {
5013 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5014 __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5015 codegen_->MaybeRecordImplicitNullCheck(instruction);
5016 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5017 maybe_record_implicit_null_check_done = true;
5018 } else {
5019 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5020 }
5021 break;
5022 }
5023
5024 case DataType::Type::kUint32:
5025 case DataType::Type::kUint64:
5026 case DataType::Type::kVoid:
5027 LOG(FATAL) << "Unreachable type " << field_type;
5028 UNREACHABLE();
5029 }
5030
5031 if (!maybe_record_implicit_null_check_done) {
5032 codegen_->MaybeRecordImplicitNullCheck(instruction);
5033 }
5034
5035 if (needs_write_barrier) {
5036 Register temp = locations->GetTemp(0).AsRegister<Register>();
5037 Register card = locations->GetTemp(1).AsRegister<Register>();
5038 codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
5039 }
5040
5041 if (is_volatile) {
5042 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5043 }
5044 }
5045
VisitStaticFieldGet(HStaticFieldGet * instruction)5046 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5047 HandleFieldGet(instruction, instruction->GetFieldInfo());
5048 }
5049
VisitStaticFieldGet(HStaticFieldGet * instruction)5050 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5051 HandleFieldGet(instruction, instruction->GetFieldInfo());
5052 }
5053
VisitStaticFieldSet(HStaticFieldSet * instruction)5054 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5055 HandleFieldSet(instruction, instruction->GetFieldInfo());
5056 }
5057
VisitStaticFieldSet(HStaticFieldSet * instruction)5058 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5059 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5060 }
5061
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5062 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5063 HandleFieldSet(instruction, instruction->GetFieldInfo());
5064 }
5065
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5066 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5067 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5068 }
5069
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5070 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5071 HandleFieldGet(instruction, instruction->GetFieldInfo());
5072 }
5073
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5074 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5075 HandleFieldGet(instruction, instruction->GetFieldInfo());
5076 }
5077
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5078 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
5079 HUnresolvedInstanceFieldGet* instruction) {
5080 FieldAccessCallingConventionX86 calling_convention;
5081 codegen_->CreateUnresolvedFieldLocationSummary(
5082 instruction, instruction->GetFieldType(), calling_convention);
5083 }
5084
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5085 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
5086 HUnresolvedInstanceFieldGet* instruction) {
5087 FieldAccessCallingConventionX86 calling_convention;
5088 codegen_->GenerateUnresolvedFieldAccess(instruction,
5089 instruction->GetFieldType(),
5090 instruction->GetFieldIndex(),
5091 instruction->GetDexPc(),
5092 calling_convention);
5093 }
5094
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5095 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
5096 HUnresolvedInstanceFieldSet* instruction) {
5097 FieldAccessCallingConventionX86 calling_convention;
5098 codegen_->CreateUnresolvedFieldLocationSummary(
5099 instruction, instruction->GetFieldType(), calling_convention);
5100 }
5101
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5102 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
5103 HUnresolvedInstanceFieldSet* instruction) {
5104 FieldAccessCallingConventionX86 calling_convention;
5105 codegen_->GenerateUnresolvedFieldAccess(instruction,
5106 instruction->GetFieldType(),
5107 instruction->GetFieldIndex(),
5108 instruction->GetDexPc(),
5109 calling_convention);
5110 }
5111
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5112 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
5113 HUnresolvedStaticFieldGet* instruction) {
5114 FieldAccessCallingConventionX86 calling_convention;
5115 codegen_->CreateUnresolvedFieldLocationSummary(
5116 instruction, instruction->GetFieldType(), calling_convention);
5117 }
5118
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5119 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
5120 HUnresolvedStaticFieldGet* instruction) {
5121 FieldAccessCallingConventionX86 calling_convention;
5122 codegen_->GenerateUnresolvedFieldAccess(instruction,
5123 instruction->GetFieldType(),
5124 instruction->GetFieldIndex(),
5125 instruction->GetDexPc(),
5126 calling_convention);
5127 }
5128
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5129 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
5130 HUnresolvedStaticFieldSet* instruction) {
5131 FieldAccessCallingConventionX86 calling_convention;
5132 codegen_->CreateUnresolvedFieldLocationSummary(
5133 instruction, instruction->GetFieldType(), calling_convention);
5134 }
5135
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5136 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
5137 HUnresolvedStaticFieldSet* instruction) {
5138 FieldAccessCallingConventionX86 calling_convention;
5139 codegen_->GenerateUnresolvedFieldAccess(instruction,
5140 instruction->GetFieldType(),
5141 instruction->GetFieldIndex(),
5142 instruction->GetDexPc(),
5143 calling_convention);
5144 }
5145
VisitNullCheck(HNullCheck * instruction)5146 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
5147 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5148 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5149 ? Location::RequiresRegister()
5150 : Location::Any();
5151 locations->SetInAt(0, loc);
5152 }
5153
GenerateImplicitNullCheck(HNullCheck * instruction)5154 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
5155 if (CanMoveNullCheckToUser(instruction)) {
5156 return;
5157 }
5158 LocationSummary* locations = instruction->GetLocations();
5159 Location obj = locations->InAt(0);
5160
5161 __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
5162 RecordPcInfo(instruction, instruction->GetDexPc());
5163 }
5164
GenerateExplicitNullCheck(HNullCheck * instruction)5165 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
5166 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
5167 AddSlowPath(slow_path);
5168
5169 LocationSummary* locations = instruction->GetLocations();
5170 Location obj = locations->InAt(0);
5171
5172 if (obj.IsRegister()) {
5173 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
5174 } else if (obj.IsStackSlot()) {
5175 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
5176 } else {
5177 DCHECK(obj.IsConstant()) << obj;
5178 DCHECK(obj.GetConstant()->IsNullConstant());
5179 __ jmp(slow_path->GetEntryLabel());
5180 return;
5181 }
5182 __ j(kEqual, slow_path->GetEntryLabel());
5183 }
5184
VisitNullCheck(HNullCheck * instruction)5185 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
5186 codegen_->GenerateNullCheck(instruction);
5187 }
5188
VisitArrayGet(HArrayGet * instruction)5189 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
5190 bool object_array_get_with_read_barrier =
5191 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5192 LocationSummary* locations =
5193 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5194 object_array_get_with_read_barrier
5195 ? LocationSummary::kCallOnSlowPath
5196 : LocationSummary::kNoCall);
5197 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5198 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5199 }
5200 locations->SetInAt(0, Location::RequiresRegister());
5201 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5202 if (DataType::IsFloatingPointType(instruction->GetType())) {
5203 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5204 } else {
5205 // The output overlaps in case of long: we don't want the low move
5206 // to overwrite the array's location. Likewise, in the case of an
5207 // object array get with read barriers enabled, we do not want the
5208 // move to overwrite the array's location, as we need it to emit
5209 // the read barrier.
5210 locations->SetOut(
5211 Location::RequiresRegister(),
5212 (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
5213 ? Location::kOutputOverlap
5214 : Location::kNoOutputOverlap);
5215 }
5216 }
5217
VisitArrayGet(HArrayGet * instruction)5218 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
5219 LocationSummary* locations = instruction->GetLocations();
5220 Location obj_loc = locations->InAt(0);
5221 Register obj = obj_loc.AsRegister<Register>();
5222 Location index = locations->InAt(1);
5223 Location out_loc = locations->Out();
5224 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5225
5226 DataType::Type type = instruction->GetType();
5227 switch (type) {
5228 case DataType::Type::kBool:
5229 case DataType::Type::kUint8: {
5230 Register out = out_loc.AsRegister<Register>();
5231 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5232 break;
5233 }
5234
5235 case DataType::Type::kInt8: {
5236 Register out = out_loc.AsRegister<Register>();
5237 __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5238 break;
5239 }
5240
5241 case DataType::Type::kUint16: {
5242 Register out = out_loc.AsRegister<Register>();
5243 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5244 // Branch cases into compressed and uncompressed for each index's type.
5245 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5246 NearLabel done, not_compressed;
5247 __ testb(Address(obj, count_offset), Immediate(1));
5248 codegen_->MaybeRecordImplicitNullCheck(instruction);
5249 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5250 "Expecting 0=compressed, 1=uncompressed");
5251 __ j(kNotZero, ¬_compressed);
5252 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5253 __ jmp(&done);
5254 __ Bind(¬_compressed);
5255 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5256 __ Bind(&done);
5257 } else {
5258 // Common case for charAt of array of char or when string compression's
5259 // feature is turned off.
5260 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5261 }
5262 break;
5263 }
5264
5265 case DataType::Type::kInt16: {
5266 Register out = out_loc.AsRegister<Register>();
5267 __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5268 break;
5269 }
5270
5271 case DataType::Type::kInt32: {
5272 Register out = out_loc.AsRegister<Register>();
5273 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5274 break;
5275 }
5276
5277 case DataType::Type::kReference: {
5278 static_assert(
5279 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5280 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5281 // /* HeapReference<Object> */ out =
5282 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5283 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5284 // Note that a potential implicit null check is handled in this
5285 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
5286 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5287 instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
5288 } else {
5289 Register out = out_loc.AsRegister<Register>();
5290 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5291 codegen_->MaybeRecordImplicitNullCheck(instruction);
5292 // If read barriers are enabled, emit read barriers other than
5293 // Baker's using a slow path (and also unpoison the loaded
5294 // reference, if heap poisoning is enabled).
5295 if (index.IsConstant()) {
5296 uint32_t offset =
5297 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5298 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5299 } else {
5300 codegen_->MaybeGenerateReadBarrierSlow(
5301 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5302 }
5303 }
5304 break;
5305 }
5306
5307 case DataType::Type::kInt64: {
5308 DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
5309 __ movl(out_loc.AsRegisterPairLow<Register>(),
5310 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5311 codegen_->MaybeRecordImplicitNullCheck(instruction);
5312 __ movl(out_loc.AsRegisterPairHigh<Register>(),
5313 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize));
5314 break;
5315 }
5316
5317 case DataType::Type::kFloat32: {
5318 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5319 __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5320 break;
5321 }
5322
5323 case DataType::Type::kFloat64: {
5324 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5325 __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5326 break;
5327 }
5328
5329 case DataType::Type::kUint32:
5330 case DataType::Type::kUint64:
5331 case DataType::Type::kVoid:
5332 LOG(FATAL) << "Unreachable type " << type;
5333 UNREACHABLE();
5334 }
5335
5336 if (type == DataType::Type::kReference || type == DataType::Type::kInt64) {
5337 // Potential implicit null checks, in the case of reference or
5338 // long arrays, are handled in the previous switch statement.
5339 } else {
5340 codegen_->MaybeRecordImplicitNullCheck(instruction);
5341 }
5342 }
5343
VisitArraySet(HArraySet * instruction)5344 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
5345 DataType::Type value_type = instruction->GetComponentType();
5346
5347 bool needs_write_barrier =
5348 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5349 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
5350
5351 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5352 instruction,
5353 may_need_runtime_call_for_type_check ?
5354 LocationSummary::kCallOnSlowPath :
5355 LocationSummary::kNoCall);
5356
5357 bool is_byte_type = DataType::Size(value_type) == 1u;
5358 // We need the inputs to be different than the output in case of long operation.
5359 // In case of a byte operation, the register allocator does not support multiple
5360 // inputs that die at entry with one in a specific register.
5361 locations->SetInAt(0, Location::RequiresRegister());
5362 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5363 if (is_byte_type) {
5364 // Ensure the value is in a byte register.
5365 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
5366 } else if (DataType::IsFloatingPointType(value_type)) {
5367 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5368 } else {
5369 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5370 }
5371 if (needs_write_barrier) {
5372 // Temporary registers for the write barrier.
5373 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
5374 // Ensure the card is in a byte register.
5375 locations->AddTemp(Location::RegisterLocation(ECX));
5376 }
5377 }
5378
VisitArraySet(HArraySet * instruction)5379 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
5380 LocationSummary* locations = instruction->GetLocations();
5381 Location array_loc = locations->InAt(0);
5382 Register array = array_loc.AsRegister<Register>();
5383 Location index = locations->InAt(1);
5384 Location value = locations->InAt(2);
5385 DataType::Type value_type = instruction->GetComponentType();
5386 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5387 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5388 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5389 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
5390 bool needs_write_barrier =
5391 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5392
5393 switch (value_type) {
5394 case DataType::Type::kBool:
5395 case DataType::Type::kUint8:
5396 case DataType::Type::kInt8: {
5397 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5398 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
5399 if (value.IsRegister()) {
5400 __ movb(address, value.AsRegister<ByteRegister>());
5401 } else {
5402 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5403 }
5404 codegen_->MaybeRecordImplicitNullCheck(instruction);
5405 break;
5406 }
5407
5408 case DataType::Type::kUint16:
5409 case DataType::Type::kInt16: {
5410 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5411 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
5412 if (value.IsRegister()) {
5413 __ movw(address, value.AsRegister<Register>());
5414 } else {
5415 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5416 }
5417 codegen_->MaybeRecordImplicitNullCheck(instruction);
5418 break;
5419 }
5420
5421 case DataType::Type::kReference: {
5422 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5423 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5424
5425 if (!value.IsRegister()) {
5426 // Just setting null.
5427 DCHECK(instruction->InputAt(2)->IsNullConstant());
5428 DCHECK(value.IsConstant()) << value;
5429 __ movl(address, Immediate(0));
5430 codegen_->MaybeRecordImplicitNullCheck(instruction);
5431 DCHECK(!needs_write_barrier);
5432 DCHECK(!may_need_runtime_call_for_type_check);
5433 break;
5434 }
5435
5436 DCHECK(needs_write_barrier);
5437 Register register_value = value.AsRegister<Register>();
5438 // We cannot use a NearLabel for `done`, as its range may be too
5439 // short when Baker read barriers are enabled.
5440 Label done;
5441 NearLabel not_null, do_put;
5442 SlowPathCode* slow_path = nullptr;
5443 Location temp_loc = locations->GetTemp(0);
5444 Register temp = temp_loc.AsRegister<Register>();
5445 if (may_need_runtime_call_for_type_check) {
5446 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
5447 codegen_->AddSlowPath(slow_path);
5448 if (instruction->GetValueCanBeNull()) {
5449 __ testl(register_value, register_value);
5450 __ j(kNotEqual, ¬_null);
5451 __ movl(address, Immediate(0));
5452 codegen_->MaybeRecordImplicitNullCheck(instruction);
5453 __ jmp(&done);
5454 __ Bind(¬_null);
5455 }
5456
5457 // Note that when Baker read barriers are enabled, the type
5458 // checks are performed without read barriers. This is fine,
5459 // even in the case where a class object is in the from-space
5460 // after the flip, as a comparison involving such a type would
5461 // not produce a false positive; it may of course produce a
5462 // false negative, in which case we would take the ArraySet
5463 // slow path.
5464
5465 // /* HeapReference<Class> */ temp = array->klass_
5466 __ movl(temp, Address(array, class_offset));
5467 codegen_->MaybeRecordImplicitNullCheck(instruction);
5468 __ MaybeUnpoisonHeapReference(temp);
5469
5470 // /* HeapReference<Class> */ temp = temp->component_type_
5471 __ movl(temp, Address(temp, component_offset));
5472 // If heap poisoning is enabled, no need to unpoison `temp`
5473 // nor the object reference in `register_value->klass`, as
5474 // we are comparing two poisoned references.
5475 __ cmpl(temp, Address(register_value, class_offset));
5476
5477 if (instruction->StaticTypeOfArrayIsObjectArray()) {
5478 __ j(kEqual, &do_put);
5479 // If heap poisoning is enabled, the `temp` reference has
5480 // not been unpoisoned yet; unpoison it now.
5481 __ MaybeUnpoisonHeapReference(temp);
5482
5483 // If heap poisoning is enabled, no need to unpoison the
5484 // heap reference loaded below, as it is only used for a
5485 // comparison with null.
5486 __ cmpl(Address(temp, super_offset), Immediate(0));
5487 __ j(kNotEqual, slow_path->GetEntryLabel());
5488 __ Bind(&do_put);
5489 } else {
5490 __ j(kNotEqual, slow_path->GetEntryLabel());
5491 }
5492 }
5493
5494 if (kPoisonHeapReferences) {
5495 __ movl(temp, register_value);
5496 __ PoisonHeapReference(temp);
5497 __ movl(address, temp);
5498 } else {
5499 __ movl(address, register_value);
5500 }
5501 if (!may_need_runtime_call_for_type_check) {
5502 codegen_->MaybeRecordImplicitNullCheck(instruction);
5503 }
5504
5505 Register card = locations->GetTemp(1).AsRegister<Register>();
5506 codegen_->MarkGCCard(
5507 temp, card, array, value.AsRegister<Register>(), instruction->GetValueCanBeNull());
5508 __ Bind(&done);
5509
5510 if (slow_path != nullptr) {
5511 __ Bind(slow_path->GetExitLabel());
5512 }
5513
5514 break;
5515 }
5516
5517 case DataType::Type::kInt32: {
5518 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5519 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5520 if (value.IsRegister()) {
5521 __ movl(address, value.AsRegister<Register>());
5522 } else {
5523 DCHECK(value.IsConstant()) << value;
5524 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5525 __ movl(address, Immediate(v));
5526 }
5527 codegen_->MaybeRecordImplicitNullCheck(instruction);
5528 break;
5529 }
5530
5531 case DataType::Type::kInt64: {
5532 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5533 if (value.IsRegisterPair()) {
5534 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
5535 value.AsRegisterPairLow<Register>());
5536 codegen_->MaybeRecordImplicitNullCheck(instruction);
5537 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
5538 value.AsRegisterPairHigh<Register>());
5539 } else {
5540 DCHECK(value.IsConstant());
5541 int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
5542 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
5543 Immediate(Low32Bits(val)));
5544 codegen_->MaybeRecordImplicitNullCheck(instruction);
5545 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
5546 Immediate(High32Bits(val)));
5547 }
5548 break;
5549 }
5550
5551 case DataType::Type::kFloat32: {
5552 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5553 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5554 if (value.IsFpuRegister()) {
5555 __ movss(address, value.AsFpuRegister<XmmRegister>());
5556 } else {
5557 DCHECK(value.IsConstant());
5558 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5559 __ movl(address, Immediate(v));
5560 }
5561 codegen_->MaybeRecordImplicitNullCheck(instruction);
5562 break;
5563 }
5564
5565 case DataType::Type::kFloat64: {
5566 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5567 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
5568 if (value.IsFpuRegister()) {
5569 __ movsd(address, value.AsFpuRegister<XmmRegister>());
5570 } else {
5571 DCHECK(value.IsConstant());
5572 Address address_hi =
5573 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
5574 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5575 __ movl(address, Immediate(Low32Bits(v)));
5576 codegen_->MaybeRecordImplicitNullCheck(instruction);
5577 __ movl(address_hi, Immediate(High32Bits(v)));
5578 }
5579 break;
5580 }
5581
5582 case DataType::Type::kUint32:
5583 case DataType::Type::kUint64:
5584 case DataType::Type::kVoid:
5585 LOG(FATAL) << "Unreachable type " << instruction->GetType();
5586 UNREACHABLE();
5587 }
5588 }
5589
VisitArrayLength(HArrayLength * instruction)5590 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
5591 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5592 locations->SetInAt(0, Location::RequiresRegister());
5593 if (!instruction->IsEmittedAtUseSite()) {
5594 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5595 }
5596 }
5597
VisitArrayLength(HArrayLength * instruction)5598 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
5599 if (instruction->IsEmittedAtUseSite()) {
5600 return;
5601 }
5602
5603 LocationSummary* locations = instruction->GetLocations();
5604 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5605 Register obj = locations->InAt(0).AsRegister<Register>();
5606 Register out = locations->Out().AsRegister<Register>();
5607 __ movl(out, Address(obj, offset));
5608 codegen_->MaybeRecordImplicitNullCheck(instruction);
5609 // Mask out most significant bit in case the array is String's array of char.
5610 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5611 __ shrl(out, Immediate(1));
5612 }
5613 }
5614
VisitBoundsCheck(HBoundsCheck * instruction)5615 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
5616 RegisterSet caller_saves = RegisterSet::Empty();
5617 InvokeRuntimeCallingConvention calling_convention;
5618 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5619 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5620 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5621 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5622 HInstruction* length = instruction->InputAt(1);
5623 if (!length->IsEmittedAtUseSite()) {
5624 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5625 }
5626 // Need register to see array's length.
5627 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5628 locations->AddTemp(Location::RequiresRegister());
5629 }
5630 }
5631
VisitBoundsCheck(HBoundsCheck * instruction)5632 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
5633 const bool is_string_compressed_char_at =
5634 mirror::kUseStringCompression && instruction->IsStringCharAt();
5635 LocationSummary* locations = instruction->GetLocations();
5636 Location index_loc = locations->InAt(0);
5637 Location length_loc = locations->InAt(1);
5638 SlowPathCode* slow_path =
5639 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
5640
5641 if (length_loc.IsConstant()) {
5642 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5643 if (index_loc.IsConstant()) {
5644 // BCE will remove the bounds check if we are guarenteed to pass.
5645 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5646 if (index < 0 || index >= length) {
5647 codegen_->AddSlowPath(slow_path);
5648 __ jmp(slow_path->GetEntryLabel());
5649 } else {
5650 // Some optimization after BCE may have generated this, and we should not
5651 // generate a bounds check if it is a valid range.
5652 }
5653 return;
5654 }
5655
5656 // We have to reverse the jump condition because the length is the constant.
5657 Register index_reg = index_loc.AsRegister<Register>();
5658 __ cmpl(index_reg, Immediate(length));
5659 codegen_->AddSlowPath(slow_path);
5660 __ j(kAboveEqual, slow_path->GetEntryLabel());
5661 } else {
5662 HInstruction* array_length = instruction->InputAt(1);
5663 if (array_length->IsEmittedAtUseSite()) {
5664 // Address the length field in the array.
5665 DCHECK(array_length->IsArrayLength());
5666 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5667 Location array_loc = array_length->GetLocations()->InAt(0);
5668 Address array_len(array_loc.AsRegister<Register>(), len_offset);
5669 if (is_string_compressed_char_at) {
5670 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5671 // the string compression flag) with the in-memory length and avoid the temporary.
5672 Register length_reg = locations->GetTemp(0).AsRegister<Register>();
5673 __ movl(length_reg, array_len);
5674 codegen_->MaybeRecordImplicitNullCheck(array_length);
5675 __ shrl(length_reg, Immediate(1));
5676 codegen_->GenerateIntCompare(length_reg, index_loc);
5677 } else {
5678 // Checking bounds for general case:
5679 // Array of char or string's array with feature compression off.
5680 if (index_loc.IsConstant()) {
5681 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5682 __ cmpl(array_len, Immediate(value));
5683 } else {
5684 __ cmpl(array_len, index_loc.AsRegister<Register>());
5685 }
5686 codegen_->MaybeRecordImplicitNullCheck(array_length);
5687 }
5688 } else {
5689 codegen_->GenerateIntCompare(length_loc, index_loc);
5690 }
5691 codegen_->AddSlowPath(slow_path);
5692 __ j(kBelowEqual, slow_path->GetEntryLabel());
5693 }
5694 }
5695
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5696 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5697 LOG(FATAL) << "Unreachable";
5698 }
5699
VisitParallelMove(HParallelMove * instruction)5700 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
5701 if (instruction->GetNext()->IsSuspendCheck() &&
5702 instruction->GetBlock()->GetLoopInformation() != nullptr) {
5703 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5704 // The back edge will generate the suspend check.
5705 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5706 }
5707
5708 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5709 }
5710
VisitSuspendCheck(HSuspendCheck * instruction)5711 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
5712 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5713 instruction, LocationSummary::kCallOnSlowPath);
5714 // In suspend check slow path, usually there are no caller-save registers at all.
5715 // If SIMD instructions are present, however, we force spilling all live SIMD
5716 // registers in full width (since the runtime only saves/restores lower part).
5717 locations->SetCustomSlowPathCallerSaves(
5718 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5719 }
5720
VisitSuspendCheck(HSuspendCheck * instruction)5721 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
5722 HBasicBlock* block = instruction->GetBlock();
5723 if (block->GetLoopInformation() != nullptr) {
5724 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5725 // The back edge will generate the suspend check.
5726 return;
5727 }
5728 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5729 // The goto will generate the suspend check.
5730 return;
5731 }
5732 GenerateSuspendCheck(instruction, nullptr);
5733 }
5734
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5735 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
5736 HBasicBlock* successor) {
5737 SuspendCheckSlowPathX86* slow_path =
5738 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
5739 if (slow_path == nullptr) {
5740 slow_path =
5741 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
5742 instruction->SetSlowPath(slow_path);
5743 codegen_->AddSlowPath(slow_path);
5744 if (successor != nullptr) {
5745 DCHECK(successor->IsLoopHeader());
5746 }
5747 } else {
5748 DCHECK_EQ(slow_path->GetSuccessor(), successor);
5749 }
5750
5751 __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
5752 Immediate(0));
5753 if (successor == nullptr) {
5754 __ j(kNotEqual, slow_path->GetEntryLabel());
5755 __ Bind(slow_path->GetReturnLabel());
5756 } else {
5757 __ j(kEqual, codegen_->GetLabelOf(successor));
5758 __ jmp(slow_path->GetEntryLabel());
5759 }
5760 }
5761
GetAssembler() const5762 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
5763 return codegen_->GetAssembler();
5764 }
5765
MoveMemoryToMemory(int dst,int src,int number_of_words)5766 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
5767 ScratchRegisterScope ensure_scratch(
5768 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5769 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
5770 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
5771
5772 // Now that temp register is available (possibly spilled), move blocks of memory.
5773 for (int i = 0; i < number_of_words; i++) {
5774 __ movl(temp_reg, Address(ESP, src + stack_offset));
5775 __ movl(Address(ESP, dst + stack_offset), temp_reg);
5776 stack_offset += kX86WordSize;
5777 }
5778 }
5779
EmitMove(size_t index)5780 void ParallelMoveResolverX86::EmitMove(size_t index) {
5781 MoveOperands* move = moves_[index];
5782 Location source = move->GetSource();
5783 Location destination = move->GetDestination();
5784
5785 if (source.IsRegister()) {
5786 if (destination.IsRegister()) {
5787 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
5788 } else if (destination.IsFpuRegister()) {
5789 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
5790 } else {
5791 DCHECK(destination.IsStackSlot());
5792 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
5793 }
5794 } else if (source.IsRegisterPair()) {
5795 size_t elem_size = DataType::Size(DataType::Type::kInt32);
5796 // Create stack space for 2 elements.
5797 __ subl(ESP, Immediate(2 * elem_size));
5798 __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
5799 __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
5800 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
5801 // And remove the temporary stack space we allocated.
5802 __ addl(ESP, Immediate(2 * elem_size));
5803 } else if (source.IsFpuRegister()) {
5804 if (destination.IsRegister()) {
5805 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
5806 } else if (destination.IsFpuRegister()) {
5807 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5808 } else if (destination.IsRegisterPair()) {
5809 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
5810 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
5811 __ psrlq(src_reg, Immediate(32));
5812 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
5813 } else if (destination.IsStackSlot()) {
5814 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
5815 } else if (destination.IsDoubleStackSlot()) {
5816 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
5817 } else {
5818 DCHECK(destination.IsSIMDStackSlot());
5819 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
5820 }
5821 } else if (source.IsStackSlot()) {
5822 if (destination.IsRegister()) {
5823 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
5824 } else if (destination.IsFpuRegister()) {
5825 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
5826 } else {
5827 DCHECK(destination.IsStackSlot());
5828 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
5829 }
5830 } else if (source.IsDoubleStackSlot()) {
5831 if (destination.IsRegisterPair()) {
5832 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
5833 __ movl(destination.AsRegisterPairHigh<Register>(),
5834 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
5835 } else if (destination.IsFpuRegister()) {
5836 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
5837 } else {
5838 DCHECK(destination.IsDoubleStackSlot()) << destination;
5839 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
5840 }
5841 } else if (source.IsSIMDStackSlot()) {
5842 if (destination.IsFpuRegister()) {
5843 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
5844 } else {
5845 DCHECK(destination.IsSIMDStackSlot());
5846 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
5847 }
5848 } else if (source.IsConstant()) {
5849 HConstant* constant = source.GetConstant();
5850 if (constant->IsIntConstant() || constant->IsNullConstant()) {
5851 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5852 if (destination.IsRegister()) {
5853 if (value == 0) {
5854 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
5855 } else {
5856 __ movl(destination.AsRegister<Register>(), Immediate(value));
5857 }
5858 } else {
5859 DCHECK(destination.IsStackSlot()) << destination;
5860 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
5861 }
5862 } else if (constant->IsFloatConstant()) {
5863 float fp_value = constant->AsFloatConstant()->GetValue();
5864 int32_t value = bit_cast<int32_t, float>(fp_value);
5865 Immediate imm(value);
5866 if (destination.IsFpuRegister()) {
5867 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5868 if (value == 0) {
5869 // Easy handling of 0.0.
5870 __ xorps(dest, dest);
5871 } else {
5872 ScratchRegisterScope ensure_scratch(
5873 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5874 Register temp = static_cast<Register>(ensure_scratch.GetRegister());
5875 __ movl(temp, Immediate(value));
5876 __ movd(dest, temp);
5877 }
5878 } else {
5879 DCHECK(destination.IsStackSlot()) << destination;
5880 __ movl(Address(ESP, destination.GetStackIndex()), imm);
5881 }
5882 } else if (constant->IsLongConstant()) {
5883 int64_t value = constant->AsLongConstant()->GetValue();
5884 int32_t low_value = Low32Bits(value);
5885 int32_t high_value = High32Bits(value);
5886 Immediate low(low_value);
5887 Immediate high(high_value);
5888 if (destination.IsDoubleStackSlot()) {
5889 __ movl(Address(ESP, destination.GetStackIndex()), low);
5890 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
5891 } else {
5892 __ movl(destination.AsRegisterPairLow<Register>(), low);
5893 __ movl(destination.AsRegisterPairHigh<Register>(), high);
5894 }
5895 } else {
5896 DCHECK(constant->IsDoubleConstant());
5897 double dbl_value = constant->AsDoubleConstant()->GetValue();
5898 int64_t value = bit_cast<int64_t, double>(dbl_value);
5899 int32_t low_value = Low32Bits(value);
5900 int32_t high_value = High32Bits(value);
5901 Immediate low(low_value);
5902 Immediate high(high_value);
5903 if (destination.IsFpuRegister()) {
5904 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5905 if (value == 0) {
5906 // Easy handling of 0.0.
5907 __ xorpd(dest, dest);
5908 } else {
5909 __ pushl(high);
5910 __ pushl(low);
5911 __ movsd(dest, Address(ESP, 0));
5912 __ addl(ESP, Immediate(8));
5913 }
5914 } else {
5915 DCHECK(destination.IsDoubleStackSlot()) << destination;
5916 __ movl(Address(ESP, destination.GetStackIndex()), low);
5917 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
5918 }
5919 }
5920 } else {
5921 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
5922 }
5923 }
5924
Exchange(Register reg,int mem)5925 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
5926 Register suggested_scratch = reg == EAX ? EBX : EAX;
5927 ScratchRegisterScope ensure_scratch(
5928 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
5929
5930 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
5931 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
5932 __ movl(Address(ESP, mem + stack_offset), reg);
5933 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
5934 }
5935
Exchange32(XmmRegister reg,int mem)5936 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
5937 ScratchRegisterScope ensure_scratch(
5938 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5939
5940 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
5941 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
5942 __ movl(temp_reg, Address(ESP, mem + stack_offset));
5943 __ movss(Address(ESP, mem + stack_offset), reg);
5944 __ movd(reg, temp_reg);
5945 }
5946
Exchange128(XmmRegister reg,int mem)5947 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
5948 size_t extra_slot = 4 * kX86WordSize;
5949 __ subl(ESP, Immediate(extra_slot));
5950 __ movups(Address(ESP, 0), XmmRegister(reg));
5951 ExchangeMemory(0, mem + extra_slot, 4);
5952 __ movups(XmmRegister(reg), Address(ESP, 0));
5953 __ addl(ESP, Immediate(extra_slot));
5954 }
5955
ExchangeMemory(int mem1,int mem2,int number_of_words)5956 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
5957 ScratchRegisterScope ensure_scratch1(
5958 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
5959
5960 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
5961 ScratchRegisterScope ensure_scratch2(
5962 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
5963
5964 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
5965 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
5966
5967 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
5968 for (int i = 0; i < number_of_words; i++) {
5969 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
5970 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
5971 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
5972 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
5973 stack_offset += kX86WordSize;
5974 }
5975 }
5976
EmitSwap(size_t index)5977 void ParallelMoveResolverX86::EmitSwap(size_t index) {
5978 MoveOperands* move = moves_[index];
5979 Location source = move->GetSource();
5980 Location destination = move->GetDestination();
5981
5982 if (source.IsRegister() && destination.IsRegister()) {
5983 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
5984 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
5985 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
5986 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
5987 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
5988 } else if (source.IsRegister() && destination.IsStackSlot()) {
5989 Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
5990 } else if (source.IsStackSlot() && destination.IsRegister()) {
5991 Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
5992 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5993 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
5994 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5995 // Use XOR Swap algorithm to avoid a temporary.
5996 DCHECK_NE(source.reg(), destination.reg());
5997 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5998 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5999 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6000 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6001 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6002 } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
6003 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6004 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6005 // Take advantage of the 16 bytes in the XMM register.
6006 XmmRegister reg = source.AsFpuRegister<XmmRegister>();
6007 Address stack(ESP, destination.GetStackIndex());
6008 // Load the double into the high doubleword.
6009 __ movhpd(reg, stack);
6010
6011 // Store the low double into the destination.
6012 __ movsd(stack, reg);
6013
6014 // Move the high double to the low double.
6015 __ psrldq(reg, Immediate(8));
6016 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
6017 // Take advantage of the 16 bytes in the XMM register.
6018 XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
6019 Address stack(ESP, source.GetStackIndex());
6020 // Load the double into the high doubleword.
6021 __ movhpd(reg, stack);
6022
6023 // Store the low double into the destination.
6024 __ movsd(stack, reg);
6025
6026 // Move the high double to the low double.
6027 __ psrldq(reg, Immediate(8));
6028 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
6029 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6030 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6031 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6032 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6033 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6034 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6035 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6036 } else {
6037 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
6038 }
6039 }
6040
SpillScratch(int reg)6041 void ParallelMoveResolverX86::SpillScratch(int reg) {
6042 __ pushl(static_cast<Register>(reg));
6043 }
6044
RestoreScratch(int reg)6045 void ParallelMoveResolverX86::RestoreScratch(int reg) {
6046 __ popl(static_cast<Register>(reg));
6047 }
6048
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6049 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
6050 HLoadClass::LoadKind desired_class_load_kind) {
6051 switch (desired_class_load_kind) {
6052 case HLoadClass::LoadKind::kInvalid:
6053 LOG(FATAL) << "UNREACHABLE";
6054 UNREACHABLE();
6055 case HLoadClass::LoadKind::kReferrersClass:
6056 break;
6057 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6058 case HLoadClass::LoadKind::kBootImageClassTable:
6059 case HLoadClass::LoadKind::kBssEntry:
6060 DCHECK(!Runtime::Current()->UseJitCompilation());
6061 break;
6062 case HLoadClass::LoadKind::kJitTableAddress:
6063 DCHECK(Runtime::Current()->UseJitCompilation());
6064 break;
6065 case HLoadClass::LoadKind::kBootImageAddress:
6066 case HLoadClass::LoadKind::kRuntimeCall:
6067 break;
6068 }
6069 return desired_class_load_kind;
6070 }
6071
VisitLoadClass(HLoadClass * cls)6072 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
6073 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6074 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6075 InvokeRuntimeCallingConvention calling_convention;
6076 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6077 cls,
6078 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
6079 Location::RegisterLocation(EAX));
6080 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
6081 return;
6082 }
6083 DCHECK(!cls->NeedsAccessCheck());
6084
6085 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6086 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6087 ? LocationSummary::kCallOnSlowPath
6088 : LocationSummary::kNoCall;
6089 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6090 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6091 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6092 }
6093
6094 if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
6095 load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
6096 load_kind == HLoadClass::LoadKind::kBootImageClassTable ||
6097 load_kind == HLoadClass::LoadKind::kBssEntry) {
6098 locations->SetInAt(0, Location::RequiresRegister());
6099 }
6100 locations->SetOut(Location::RequiresRegister());
6101 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6102 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6103 // Rely on the type resolution and/or initialization to save everything.
6104 RegisterSet caller_saves = RegisterSet::Empty();
6105 InvokeRuntimeCallingConvention calling_convention;
6106 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6107 locations->SetCustomSlowPathCallerSaves(caller_saves);
6108 } else {
6109 // For non-Baker read barrier we have a temp-clobbering call.
6110 }
6111 }
6112 }
6113
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6114 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
6115 dex::TypeIndex type_index,
6116 Handle<mirror::Class> handle) {
6117 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6118 // Add a patch entry and return the label.
6119 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6120 PatchInfo<Label>* info = &jit_class_patches_.back();
6121 return &info->label;
6122 }
6123
6124 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6125 // move.
VisitLoadClass(HLoadClass * cls)6126 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6127 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6128 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6129 codegen_->GenerateLoadClassRuntimeCall(cls);
6130 return;
6131 }
6132 DCHECK(!cls->NeedsAccessCheck());
6133
6134 LocationSummary* locations = cls->GetLocations();
6135 Location out_loc = locations->Out();
6136 Register out = out_loc.AsRegister<Register>();
6137
6138 bool generate_null_check = false;
6139 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6140 ? kWithoutReadBarrier
6141 : kCompilerReadBarrierOption;
6142 switch (load_kind) {
6143 case HLoadClass::LoadKind::kReferrersClass: {
6144 DCHECK(!cls->CanCallRuntime());
6145 DCHECK(!cls->MustGenerateClinitCheck());
6146 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6147 Register current_method = locations->InAt(0).AsRegister<Register>();
6148 GenerateGcRootFieldLoad(
6149 cls,
6150 out_loc,
6151 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6152 /* fixup_label */ nullptr,
6153 read_barrier_option);
6154 break;
6155 }
6156 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
6157 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
6158 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6159 Register method_address = locations->InAt(0).AsRegister<Register>();
6160 __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6161 codegen_->RecordBootImageTypePatch(cls);
6162 break;
6163 }
6164 case HLoadClass::LoadKind::kBootImageAddress: {
6165 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6166 uint32_t address = dchecked_integral_cast<uint32_t>(
6167 reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
6168 DCHECK_NE(address, 0u);
6169 __ movl(out, Immediate(address));
6170 break;
6171 }
6172 case HLoadClass::LoadKind::kBootImageClassTable: {
6173 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6174 Register method_address = locations->InAt(0).AsRegister<Register>();
6175 __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6176 codegen_->RecordBootImageTypePatch(cls);
6177 // Extract the reference from the slot data, i.e. clear the hash bits.
6178 int32_t masked_hash = ClassTable::TableSlot::MaskHash(
6179 ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
6180 if (masked_hash != 0) {
6181 __ subl(out, Immediate(masked_hash));
6182 }
6183 break;
6184 }
6185 case HLoadClass::LoadKind::kBssEntry: {
6186 Register method_address = locations->InAt(0).AsRegister<Register>();
6187 Address address(method_address, CodeGeneratorX86::kDummy32BitOffset);
6188 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6189 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6190 generate_null_check = true;
6191 break;
6192 }
6193 case HLoadClass::LoadKind::kJitTableAddress: {
6194 Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
6195 Label* fixup_label = codegen_->NewJitRootClassPatch(
6196 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6197 // /* GcRoot<mirror::Class> */ out = *address
6198 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6199 break;
6200 }
6201 case HLoadClass::LoadKind::kRuntimeCall:
6202 case HLoadClass::LoadKind::kInvalid:
6203 LOG(FATAL) << "UNREACHABLE";
6204 UNREACHABLE();
6205 }
6206
6207 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6208 DCHECK(cls->CanCallRuntime());
6209 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(
6210 cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
6211 codegen_->AddSlowPath(slow_path);
6212
6213 if (generate_null_check) {
6214 __ testl(out, out);
6215 __ j(kEqual, slow_path->GetEntryLabel());
6216 }
6217
6218 if (cls->MustGenerateClinitCheck()) {
6219 GenerateClassInitializationCheck(slow_path, out);
6220 } else {
6221 __ Bind(slow_path->GetExitLabel());
6222 }
6223 }
6224 }
6225
VisitClinitCheck(HClinitCheck * check)6226 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
6227 LocationSummary* locations =
6228 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6229 locations->SetInAt(0, Location::RequiresRegister());
6230 if (check->HasUses()) {
6231 locations->SetOut(Location::SameAsFirstInput());
6232 }
6233 }
6234
VisitClinitCheck(HClinitCheck * check)6235 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
6236 // We assume the class to not be null.
6237 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(
6238 check->GetLoadClass(), check, check->GetDexPc(), true);
6239 codegen_->AddSlowPath(slow_path);
6240 GenerateClassInitializationCheck(slow_path,
6241 check->GetLocations()->InAt(0).AsRegister<Register>());
6242 }
6243
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)6244 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
6245 SlowPathCode* slow_path, Register class_reg) {
6246 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
6247 const size_t status_byte_offset =
6248 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
6249 constexpr uint32_t shifted_initialized_value =
6250 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
6251
6252 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value));
6253 __ j(kBelow, slow_path->GetEntryLabel());
6254 __ Bind(slow_path->GetExitLabel());
6255 // No need for memory fence, thanks to the X86 memory model.
6256 }
6257
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6258 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
6259 HLoadString::LoadKind desired_string_load_kind) {
6260 switch (desired_string_load_kind) {
6261 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6262 case HLoadString::LoadKind::kBootImageInternTable:
6263 case HLoadString::LoadKind::kBssEntry:
6264 DCHECK(!Runtime::Current()->UseJitCompilation());
6265 break;
6266 case HLoadString::LoadKind::kJitTableAddress:
6267 DCHECK(Runtime::Current()->UseJitCompilation());
6268 break;
6269 case HLoadString::LoadKind::kBootImageAddress:
6270 case HLoadString::LoadKind::kRuntimeCall:
6271 break;
6272 }
6273 return desired_string_load_kind;
6274 }
6275
VisitLoadString(HLoadString * load)6276 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
6277 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6278 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6279 HLoadString::LoadKind load_kind = load->GetLoadKind();
6280 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
6281 load_kind == HLoadString::LoadKind::kBootImageInternTable ||
6282 load_kind == HLoadString::LoadKind::kBssEntry) {
6283 locations->SetInAt(0, Location::RequiresRegister());
6284 }
6285 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
6286 locations->SetOut(Location::RegisterLocation(EAX));
6287 } else {
6288 locations->SetOut(Location::RequiresRegister());
6289 if (load_kind == HLoadString::LoadKind::kBssEntry) {
6290 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6291 // Rely on the pResolveString to save everything.
6292 RegisterSet caller_saves = RegisterSet::Empty();
6293 InvokeRuntimeCallingConvention calling_convention;
6294 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6295 locations->SetCustomSlowPathCallerSaves(caller_saves);
6296 } else {
6297 // For non-Baker read barrier we have a temp-clobbering call.
6298 }
6299 }
6300 }
6301 }
6302
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6303 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
6304 dex::StringIndex string_index,
6305 Handle<mirror::String> handle) {
6306 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6307 // Add a patch entry and return the label.
6308 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6309 PatchInfo<Label>* info = &jit_string_patches_.back();
6310 return &info->label;
6311 }
6312
6313 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6314 // move.
VisitLoadString(HLoadString * load)6315 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6316 LocationSummary* locations = load->GetLocations();
6317 Location out_loc = locations->Out();
6318 Register out = out_loc.AsRegister<Register>();
6319
6320 switch (load->GetLoadKind()) {
6321 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6322 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
6323 Register method_address = locations->InAt(0).AsRegister<Register>();
6324 __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6325 codegen_->RecordBootImageStringPatch(load);
6326 return;
6327 }
6328 case HLoadString::LoadKind::kBootImageAddress: {
6329 uint32_t address = dchecked_integral_cast<uint32_t>(
6330 reinterpret_cast<uintptr_t>(load->GetString().Get()));
6331 DCHECK_NE(address, 0u);
6332 __ movl(out, Immediate(address));
6333 return;
6334 }
6335 case HLoadString::LoadKind::kBootImageInternTable: {
6336 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6337 Register method_address = locations->InAt(0).AsRegister<Register>();
6338 __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6339 codegen_->RecordBootImageStringPatch(load);
6340 return;
6341 }
6342 case HLoadString::LoadKind::kBssEntry: {
6343 Register method_address = locations->InAt(0).AsRegister<Register>();
6344 Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset);
6345 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6346 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
6347 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6348 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
6349 codegen_->AddSlowPath(slow_path);
6350 __ testl(out, out);
6351 __ j(kEqual, slow_path->GetEntryLabel());
6352 __ Bind(slow_path->GetExitLabel());
6353 return;
6354 }
6355 case HLoadString::LoadKind::kJitTableAddress: {
6356 Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
6357 Label* fixup_label = codegen_->NewJitRootStringPatch(
6358 load->GetDexFile(), load->GetStringIndex(), load->GetString());
6359 // /* GcRoot<mirror::String> */ out = *address
6360 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6361 return;
6362 }
6363 default:
6364 break;
6365 }
6366
6367 // TODO: Re-add the compiler code to do string dex cache lookup again.
6368 InvokeRuntimeCallingConvention calling_convention;
6369 DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
6370 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
6371 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
6372 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6373 }
6374
GetExceptionTlsAddress()6375 static Address GetExceptionTlsAddress() {
6376 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
6377 }
6378
VisitLoadException(HLoadException * load)6379 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
6380 LocationSummary* locations =
6381 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6382 locations->SetOut(Location::RequiresRegister());
6383 }
6384
VisitLoadException(HLoadException * load)6385 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
6386 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
6387 }
6388
VisitClearException(HClearException * clear)6389 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
6390 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6391 }
6392
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6393 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6394 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
6395 }
6396
VisitThrow(HThrow * instruction)6397 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
6398 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6399 instruction, LocationSummary::kCallOnMainOnly);
6400 InvokeRuntimeCallingConvention calling_convention;
6401 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6402 }
6403
VisitThrow(HThrow * instruction)6404 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
6405 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6406 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6407 }
6408
6409 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6410 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6411 if (kEmitCompilerReadBarrier &&
6412 !kUseBakerReadBarrier &&
6413 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6414 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6415 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6416 return 1;
6417 }
6418 return 0;
6419 }
6420
6421 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
6422 // interface pointer, one for loading the current interface.
6423 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6424 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6425 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6426 return 2;
6427 }
6428 return 1 + NumberOfInstanceOfTemps(type_check_kind);
6429 }
6430
VisitInstanceOf(HInstanceOf * instruction)6431 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
6432 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6433 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6434 bool baker_read_barrier_slow_path = false;
6435 switch (type_check_kind) {
6436 case TypeCheckKind::kExactCheck:
6437 case TypeCheckKind::kAbstractClassCheck:
6438 case TypeCheckKind::kClassHierarchyCheck:
6439 case TypeCheckKind::kArrayObjectCheck: {
6440 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
6441 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6442 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
6443 break;
6444 }
6445 case TypeCheckKind::kArrayCheck:
6446 case TypeCheckKind::kUnresolvedCheck:
6447 case TypeCheckKind::kInterfaceCheck:
6448 call_kind = LocationSummary::kCallOnSlowPath;
6449 break;
6450 }
6451
6452 LocationSummary* locations =
6453 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6454 if (baker_read_barrier_slow_path) {
6455 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6456 }
6457 locations->SetInAt(0, Location::RequiresRegister());
6458 locations->SetInAt(1, Location::Any());
6459 // Note that TypeCheckSlowPathX86 uses this "out" register too.
6460 locations->SetOut(Location::RequiresRegister());
6461 // When read barriers are enabled, we need a temporary register for some cases.
6462 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6463 }
6464
VisitInstanceOf(HInstanceOf * instruction)6465 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
6466 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6467 LocationSummary* locations = instruction->GetLocations();
6468 Location obj_loc = locations->InAt(0);
6469 Register obj = obj_loc.AsRegister<Register>();
6470 Location cls = locations->InAt(1);
6471 Location out_loc = locations->Out();
6472 Register out = out_loc.AsRegister<Register>();
6473 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6474 DCHECK_LE(num_temps, 1u);
6475 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
6476 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6477 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6478 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6479 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6480 SlowPathCode* slow_path = nullptr;
6481 NearLabel done, zero;
6482
6483 // Return 0 if `obj` is null.
6484 // Avoid null check if we know obj is not null.
6485 if (instruction->MustDoNullCheck()) {
6486 __ testl(obj, obj);
6487 __ j(kEqual, &zero);
6488 }
6489
6490 switch (type_check_kind) {
6491 case TypeCheckKind::kExactCheck: {
6492 ReadBarrierOption read_barrier_option =
6493 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6494 // /* HeapReference<Class> */ out = obj->klass_
6495 GenerateReferenceLoadTwoRegisters(instruction,
6496 out_loc,
6497 obj_loc,
6498 class_offset,
6499 read_barrier_option);
6500 if (cls.IsRegister()) {
6501 __ cmpl(out, cls.AsRegister<Register>());
6502 } else {
6503 DCHECK(cls.IsStackSlot()) << cls;
6504 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6505 }
6506
6507 // Classes must be equal for the instanceof to succeed.
6508 __ j(kNotEqual, &zero);
6509 __ movl(out, Immediate(1));
6510 __ jmp(&done);
6511 break;
6512 }
6513
6514 case TypeCheckKind::kAbstractClassCheck: {
6515 ReadBarrierOption read_barrier_option =
6516 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6517 // /* HeapReference<Class> */ out = obj->klass_
6518 GenerateReferenceLoadTwoRegisters(instruction,
6519 out_loc,
6520 obj_loc,
6521 class_offset,
6522 read_barrier_option);
6523 // If the class is abstract, we eagerly fetch the super class of the
6524 // object to avoid doing a comparison we know will fail.
6525 NearLabel loop;
6526 __ Bind(&loop);
6527 // /* HeapReference<Class> */ out = out->super_class_
6528 GenerateReferenceLoadOneRegister(instruction,
6529 out_loc,
6530 super_offset,
6531 maybe_temp_loc,
6532 read_barrier_option);
6533 __ testl(out, out);
6534 // If `out` is null, we use it for the result, and jump to `done`.
6535 __ j(kEqual, &done);
6536 if (cls.IsRegister()) {
6537 __ cmpl(out, cls.AsRegister<Register>());
6538 } else {
6539 DCHECK(cls.IsStackSlot()) << cls;
6540 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6541 }
6542 __ j(kNotEqual, &loop);
6543 __ movl(out, Immediate(1));
6544 if (zero.IsLinked()) {
6545 __ jmp(&done);
6546 }
6547 break;
6548 }
6549
6550 case TypeCheckKind::kClassHierarchyCheck: {
6551 ReadBarrierOption read_barrier_option =
6552 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6553 // /* HeapReference<Class> */ out = obj->klass_
6554 GenerateReferenceLoadTwoRegisters(instruction,
6555 out_loc,
6556 obj_loc,
6557 class_offset,
6558 read_barrier_option);
6559 // Walk over the class hierarchy to find a match.
6560 NearLabel loop, success;
6561 __ Bind(&loop);
6562 if (cls.IsRegister()) {
6563 __ cmpl(out, cls.AsRegister<Register>());
6564 } else {
6565 DCHECK(cls.IsStackSlot()) << cls;
6566 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6567 }
6568 __ j(kEqual, &success);
6569 // /* HeapReference<Class> */ out = out->super_class_
6570 GenerateReferenceLoadOneRegister(instruction,
6571 out_loc,
6572 super_offset,
6573 maybe_temp_loc,
6574 read_barrier_option);
6575 __ testl(out, out);
6576 __ j(kNotEqual, &loop);
6577 // If `out` is null, we use it for the result, and jump to `done`.
6578 __ jmp(&done);
6579 __ Bind(&success);
6580 __ movl(out, Immediate(1));
6581 if (zero.IsLinked()) {
6582 __ jmp(&done);
6583 }
6584 break;
6585 }
6586
6587 case TypeCheckKind::kArrayObjectCheck: {
6588 ReadBarrierOption read_barrier_option =
6589 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6590 // /* HeapReference<Class> */ out = obj->klass_
6591 GenerateReferenceLoadTwoRegisters(instruction,
6592 out_loc,
6593 obj_loc,
6594 class_offset,
6595 read_barrier_option);
6596 // Do an exact check.
6597 NearLabel exact_check;
6598 if (cls.IsRegister()) {
6599 __ cmpl(out, cls.AsRegister<Register>());
6600 } else {
6601 DCHECK(cls.IsStackSlot()) << cls;
6602 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6603 }
6604 __ j(kEqual, &exact_check);
6605 // Otherwise, we need to check that the object's class is a non-primitive array.
6606 // /* HeapReference<Class> */ out = out->component_type_
6607 GenerateReferenceLoadOneRegister(instruction,
6608 out_loc,
6609 component_offset,
6610 maybe_temp_loc,
6611 read_barrier_option);
6612 __ testl(out, out);
6613 // If `out` is null, we use it for the result, and jump to `done`.
6614 __ j(kEqual, &done);
6615 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6616 __ j(kNotEqual, &zero);
6617 __ Bind(&exact_check);
6618 __ movl(out, Immediate(1));
6619 __ jmp(&done);
6620 break;
6621 }
6622
6623 case TypeCheckKind::kArrayCheck: {
6624 // No read barrier since the slow path will retry upon failure.
6625 // /* HeapReference<Class> */ out = obj->klass_
6626 GenerateReferenceLoadTwoRegisters(instruction,
6627 out_loc,
6628 obj_loc,
6629 class_offset,
6630 kWithoutReadBarrier);
6631 if (cls.IsRegister()) {
6632 __ cmpl(out, cls.AsRegister<Register>());
6633 } else {
6634 DCHECK(cls.IsStackSlot()) << cls;
6635 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
6636 }
6637 DCHECK(locations->OnlyCallsOnSlowPath());
6638 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
6639 instruction, /* is_fatal */ false);
6640 codegen_->AddSlowPath(slow_path);
6641 __ j(kNotEqual, slow_path->GetEntryLabel());
6642 __ movl(out, Immediate(1));
6643 if (zero.IsLinked()) {
6644 __ jmp(&done);
6645 }
6646 break;
6647 }
6648
6649 case TypeCheckKind::kUnresolvedCheck:
6650 case TypeCheckKind::kInterfaceCheck: {
6651 // Note that we indeed only call on slow path, but we always go
6652 // into the slow path for the unresolved and interface check
6653 // cases.
6654 //
6655 // We cannot directly call the InstanceofNonTrivial runtime
6656 // entry point without resorting to a type checking slow path
6657 // here (i.e. by calling InvokeRuntime directly), as it would
6658 // require to assign fixed registers for the inputs of this
6659 // HInstanceOf instruction (following the runtime calling
6660 // convention), which might be cluttered by the potential first
6661 // read barrier emission at the beginning of this method.
6662 //
6663 // TODO: Introduce a new runtime entry point taking the object
6664 // to test (instead of its class) as argument, and let it deal
6665 // with the read barrier issues. This will let us refactor this
6666 // case of the `switch` code as it was previously (with a direct
6667 // call to the runtime not using a type checking slow path).
6668 // This should also be beneficial for the other cases above.
6669 DCHECK(locations->OnlyCallsOnSlowPath());
6670 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
6671 instruction, /* is_fatal */ false);
6672 codegen_->AddSlowPath(slow_path);
6673 __ jmp(slow_path->GetEntryLabel());
6674 if (zero.IsLinked()) {
6675 __ jmp(&done);
6676 }
6677 break;
6678 }
6679 }
6680
6681 if (zero.IsLinked()) {
6682 __ Bind(&zero);
6683 __ xorl(out, out);
6684 }
6685
6686 if (done.IsLinked()) {
6687 __ Bind(&done);
6688 }
6689
6690 if (slow_path != nullptr) {
6691 __ Bind(slow_path->GetExitLabel());
6692 }
6693 }
6694
VisitCheckCast(HCheckCast * instruction)6695 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
6696 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6697 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
6698 LocationSummary* locations =
6699 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6700 locations->SetInAt(0, Location::RequiresRegister());
6701 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6702 // Require a register for the interface check since there is a loop that compares the class to
6703 // a memory address.
6704 locations->SetInAt(1, Location::RequiresRegister());
6705 } else {
6706 locations->SetInAt(1, Location::Any());
6707 }
6708 // Note that TypeCheckSlowPathX86 uses this "temp" register too.
6709 locations->AddTemp(Location::RequiresRegister());
6710 // When read barriers are enabled, we need an additional temporary register for some cases.
6711 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
6712 }
6713
VisitCheckCast(HCheckCast * instruction)6714 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
6715 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6716 LocationSummary* locations = instruction->GetLocations();
6717 Location obj_loc = locations->InAt(0);
6718 Register obj = obj_loc.AsRegister<Register>();
6719 Location cls = locations->InAt(1);
6720 Location temp_loc = locations->GetTemp(0);
6721 Register temp = temp_loc.AsRegister<Register>();
6722 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
6723 DCHECK_GE(num_temps, 1u);
6724 DCHECK_LE(num_temps, 2u);
6725 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
6726 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6727 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6728 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6729 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6730 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6731 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6732 const uint32_t object_array_data_offset =
6733 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6734
6735 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
6736 SlowPathCode* type_check_slow_path =
6737 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
6738 instruction, is_type_check_slow_path_fatal);
6739 codegen_->AddSlowPath(type_check_slow_path);
6740
6741 NearLabel done;
6742 // Avoid null check if we know obj is not null.
6743 if (instruction->MustDoNullCheck()) {
6744 __ testl(obj, obj);
6745 __ j(kEqual, &done);
6746 }
6747
6748 switch (type_check_kind) {
6749 case TypeCheckKind::kExactCheck:
6750 case TypeCheckKind::kArrayCheck: {
6751 // /* HeapReference<Class> */ temp = obj->klass_
6752 GenerateReferenceLoadTwoRegisters(instruction,
6753 temp_loc,
6754 obj_loc,
6755 class_offset,
6756 kWithoutReadBarrier);
6757
6758 if (cls.IsRegister()) {
6759 __ cmpl(temp, cls.AsRegister<Register>());
6760 } else {
6761 DCHECK(cls.IsStackSlot()) << cls;
6762 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
6763 }
6764 // Jump to slow path for throwing the exception or doing a
6765 // more involved array check.
6766 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6767 break;
6768 }
6769
6770 case TypeCheckKind::kAbstractClassCheck: {
6771 // /* HeapReference<Class> */ temp = obj->klass_
6772 GenerateReferenceLoadTwoRegisters(instruction,
6773 temp_loc,
6774 obj_loc,
6775 class_offset,
6776 kWithoutReadBarrier);
6777
6778 // If the class is abstract, we eagerly fetch the super class of the
6779 // object to avoid doing a comparison we know will fail.
6780 NearLabel loop;
6781 __ Bind(&loop);
6782 // /* HeapReference<Class> */ temp = temp->super_class_
6783 GenerateReferenceLoadOneRegister(instruction,
6784 temp_loc,
6785 super_offset,
6786 maybe_temp2_loc,
6787 kWithoutReadBarrier);
6788
6789 // If the class reference currently in `temp` is null, jump to the slow path to throw the
6790 // exception.
6791 __ testl(temp, temp);
6792 __ j(kZero, type_check_slow_path->GetEntryLabel());
6793
6794 // Otherwise, compare the classes
6795 if (cls.IsRegister()) {
6796 __ cmpl(temp, cls.AsRegister<Register>());
6797 } else {
6798 DCHECK(cls.IsStackSlot()) << cls;
6799 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
6800 }
6801 __ j(kNotEqual, &loop);
6802 break;
6803 }
6804
6805 case TypeCheckKind::kClassHierarchyCheck: {
6806 // /* HeapReference<Class> */ temp = obj->klass_
6807 GenerateReferenceLoadTwoRegisters(instruction,
6808 temp_loc,
6809 obj_loc,
6810 class_offset,
6811 kWithoutReadBarrier);
6812
6813 // Walk over the class hierarchy to find a match.
6814 NearLabel loop;
6815 __ Bind(&loop);
6816 if (cls.IsRegister()) {
6817 __ cmpl(temp, cls.AsRegister<Register>());
6818 } else {
6819 DCHECK(cls.IsStackSlot()) << cls;
6820 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
6821 }
6822 __ j(kEqual, &done);
6823
6824 // /* HeapReference<Class> */ temp = temp->super_class_
6825 GenerateReferenceLoadOneRegister(instruction,
6826 temp_loc,
6827 super_offset,
6828 maybe_temp2_loc,
6829 kWithoutReadBarrier);
6830
6831 // If the class reference currently in `temp` is not null, jump
6832 // back at the beginning of the loop.
6833 __ testl(temp, temp);
6834 __ j(kNotZero, &loop);
6835 // Otherwise, jump to the slow path to throw the exception.;
6836 __ jmp(type_check_slow_path->GetEntryLabel());
6837 break;
6838 }
6839
6840 case TypeCheckKind::kArrayObjectCheck: {
6841 // /* HeapReference<Class> */ temp = obj->klass_
6842 GenerateReferenceLoadTwoRegisters(instruction,
6843 temp_loc,
6844 obj_loc,
6845 class_offset,
6846 kWithoutReadBarrier);
6847
6848 // Do an exact check.
6849 if (cls.IsRegister()) {
6850 __ cmpl(temp, cls.AsRegister<Register>());
6851 } else {
6852 DCHECK(cls.IsStackSlot()) << cls;
6853 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
6854 }
6855 __ j(kEqual, &done);
6856
6857 // Otherwise, we need to check that the object's class is a non-primitive array.
6858 // /* HeapReference<Class> */ temp = temp->component_type_
6859 GenerateReferenceLoadOneRegister(instruction,
6860 temp_loc,
6861 component_offset,
6862 maybe_temp2_loc,
6863 kWithoutReadBarrier);
6864
6865 // If the component type is null (i.e. the object not an array), jump to the slow path to
6866 // throw the exception. Otherwise proceed with the check.
6867 __ testl(temp, temp);
6868 __ j(kZero, type_check_slow_path->GetEntryLabel());
6869
6870 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
6871 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6872 break;
6873 }
6874
6875 case TypeCheckKind::kUnresolvedCheck:
6876 // We always go into the type check slow path for the unresolved check case.
6877 // We cannot directly call the CheckCast runtime entry point
6878 // without resorting to a type checking slow path here (i.e. by
6879 // calling InvokeRuntime directly), as it would require to
6880 // assign fixed registers for the inputs of this HInstanceOf
6881 // instruction (following the runtime calling convention), which
6882 // might be cluttered by the potential first read barrier
6883 // emission at the beginning of this method.
6884 __ jmp(type_check_slow_path->GetEntryLabel());
6885 break;
6886
6887 case TypeCheckKind::kInterfaceCheck: {
6888 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
6889 // We can not get false positives by doing this.
6890 // /* HeapReference<Class> */ temp = obj->klass_
6891 GenerateReferenceLoadTwoRegisters(instruction,
6892 temp_loc,
6893 obj_loc,
6894 class_offset,
6895 kWithoutReadBarrier);
6896
6897 // /* HeapReference<Class> */ temp = temp->iftable_
6898 GenerateReferenceLoadTwoRegisters(instruction,
6899 temp_loc,
6900 temp_loc,
6901 iftable_offset,
6902 kWithoutReadBarrier);
6903 // Iftable is never null.
6904 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
6905 // Maybe poison the `cls` for direct comparison with memory.
6906 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
6907 // Loop through the iftable and check if any class matches.
6908 NearLabel start_loop;
6909 __ Bind(&start_loop);
6910 // Need to subtract first to handle the empty array case.
6911 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
6912 __ j(kNegative, type_check_slow_path->GetEntryLabel());
6913 // Go to next interface if the classes do not match.
6914 __ cmpl(cls.AsRegister<Register>(),
6915 CodeGeneratorX86::ArrayAddress(temp,
6916 maybe_temp2_loc,
6917 TIMES_4,
6918 object_array_data_offset));
6919 __ j(kNotEqual, &start_loop);
6920 // If `cls` was poisoned above, unpoison it.
6921 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
6922 break;
6923 }
6924 }
6925 __ Bind(&done);
6926
6927 __ Bind(type_check_slow_path->GetExitLabel());
6928 }
6929
VisitMonitorOperation(HMonitorOperation * instruction)6930 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
6931 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6932 instruction, LocationSummary::kCallOnMainOnly);
6933 InvokeRuntimeCallingConvention calling_convention;
6934 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6935 }
6936
VisitMonitorOperation(HMonitorOperation * instruction)6937 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
6938 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
6939 : kQuickUnlockObject,
6940 instruction,
6941 instruction->GetDexPc());
6942 if (instruction->IsEnter()) {
6943 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6944 } else {
6945 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6946 }
6947 }
6948
VisitAnd(HAnd * instruction)6949 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6950 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6951 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6952
HandleBitwiseOperation(HBinaryOperation * instruction)6953 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
6954 LocationSummary* locations =
6955 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6956 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
6957 || instruction->GetResultType() == DataType::Type::kInt64);
6958 locations->SetInAt(0, Location::RequiresRegister());
6959 locations->SetInAt(1, Location::Any());
6960 locations->SetOut(Location::SameAsFirstInput());
6961 }
6962
VisitAnd(HAnd * instruction)6963 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
6964 HandleBitwiseOperation(instruction);
6965 }
6966
VisitOr(HOr * instruction)6967 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
6968 HandleBitwiseOperation(instruction);
6969 }
6970
VisitXor(HXor * instruction)6971 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
6972 HandleBitwiseOperation(instruction);
6973 }
6974
HandleBitwiseOperation(HBinaryOperation * instruction)6975 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
6976 LocationSummary* locations = instruction->GetLocations();
6977 Location first = locations->InAt(0);
6978 Location second = locations->InAt(1);
6979 DCHECK(first.Equals(locations->Out()));
6980
6981 if (instruction->GetResultType() == DataType::Type::kInt32) {
6982 if (second.IsRegister()) {
6983 if (instruction->IsAnd()) {
6984 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
6985 } else if (instruction->IsOr()) {
6986 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
6987 } else {
6988 DCHECK(instruction->IsXor());
6989 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
6990 }
6991 } else if (second.IsConstant()) {
6992 if (instruction->IsAnd()) {
6993 __ andl(first.AsRegister<Register>(),
6994 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
6995 } else if (instruction->IsOr()) {
6996 __ orl(first.AsRegister<Register>(),
6997 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
6998 } else {
6999 DCHECK(instruction->IsXor());
7000 __ xorl(first.AsRegister<Register>(),
7001 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7002 }
7003 } else {
7004 if (instruction->IsAnd()) {
7005 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7006 } else if (instruction->IsOr()) {
7007 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7008 } else {
7009 DCHECK(instruction->IsXor());
7010 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7011 }
7012 }
7013 } else {
7014 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7015 if (second.IsRegisterPair()) {
7016 if (instruction->IsAnd()) {
7017 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7018 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7019 } else if (instruction->IsOr()) {
7020 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7021 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7022 } else {
7023 DCHECK(instruction->IsXor());
7024 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7025 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7026 }
7027 } else if (second.IsDoubleStackSlot()) {
7028 if (instruction->IsAnd()) {
7029 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7030 __ andl(first.AsRegisterPairHigh<Register>(),
7031 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7032 } else if (instruction->IsOr()) {
7033 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7034 __ orl(first.AsRegisterPairHigh<Register>(),
7035 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7036 } else {
7037 DCHECK(instruction->IsXor());
7038 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7039 __ xorl(first.AsRegisterPairHigh<Register>(),
7040 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7041 }
7042 } else {
7043 DCHECK(second.IsConstant()) << second;
7044 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
7045 int32_t low_value = Low32Bits(value);
7046 int32_t high_value = High32Bits(value);
7047 Immediate low(low_value);
7048 Immediate high(high_value);
7049 Register first_low = first.AsRegisterPairLow<Register>();
7050 Register first_high = first.AsRegisterPairHigh<Register>();
7051 if (instruction->IsAnd()) {
7052 if (low_value == 0) {
7053 __ xorl(first_low, first_low);
7054 } else if (low_value != -1) {
7055 __ andl(first_low, low);
7056 }
7057 if (high_value == 0) {
7058 __ xorl(first_high, first_high);
7059 } else if (high_value != -1) {
7060 __ andl(first_high, high);
7061 }
7062 } else if (instruction->IsOr()) {
7063 if (low_value != 0) {
7064 __ orl(first_low, low);
7065 }
7066 if (high_value != 0) {
7067 __ orl(first_high, high);
7068 }
7069 } else {
7070 DCHECK(instruction->IsXor());
7071 if (low_value != 0) {
7072 __ xorl(first_low, low);
7073 }
7074 if (high_value != 0) {
7075 __ xorl(first_high, high);
7076 }
7077 }
7078 }
7079 }
7080 }
7081
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7082 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
7083 HInstruction* instruction,
7084 Location out,
7085 uint32_t offset,
7086 Location maybe_temp,
7087 ReadBarrierOption read_barrier_option) {
7088 Register out_reg = out.AsRegister<Register>();
7089 if (read_barrier_option == kWithReadBarrier) {
7090 CHECK(kEmitCompilerReadBarrier);
7091 if (kUseBakerReadBarrier) {
7092 // Load with fast path based Baker's read barrier.
7093 // /* HeapReference<Object> */ out = *(out + offset)
7094 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7095 instruction, out, out_reg, offset, /* needs_null_check */ false);
7096 } else {
7097 // Load with slow path based read barrier.
7098 // Save the value of `out` into `maybe_temp` before overwriting it
7099 // in the following move operation, as we will need it for the
7100 // read barrier below.
7101 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7102 __ movl(maybe_temp.AsRegister<Register>(), out_reg);
7103 // /* HeapReference<Object> */ out = *(out + offset)
7104 __ movl(out_reg, Address(out_reg, offset));
7105 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7106 }
7107 } else {
7108 // Plain load with no read barrier.
7109 // /* HeapReference<Object> */ out = *(out + offset)
7110 __ movl(out_reg, Address(out_reg, offset));
7111 __ MaybeUnpoisonHeapReference(out_reg);
7112 }
7113 }
7114
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7115 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
7116 HInstruction* instruction,
7117 Location out,
7118 Location obj,
7119 uint32_t offset,
7120 ReadBarrierOption read_barrier_option) {
7121 Register out_reg = out.AsRegister<Register>();
7122 Register obj_reg = obj.AsRegister<Register>();
7123 if (read_barrier_option == kWithReadBarrier) {
7124 CHECK(kEmitCompilerReadBarrier);
7125 if (kUseBakerReadBarrier) {
7126 // Load with fast path based Baker's read barrier.
7127 // /* HeapReference<Object> */ out = *(obj + offset)
7128 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7129 instruction, out, obj_reg, offset, /* needs_null_check */ false);
7130 } else {
7131 // Load with slow path based read barrier.
7132 // /* HeapReference<Object> */ out = *(obj + offset)
7133 __ movl(out_reg, Address(obj_reg, offset));
7134 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7135 }
7136 } else {
7137 // Plain load with no read barrier.
7138 // /* HeapReference<Object> */ out = *(obj + offset)
7139 __ movl(out_reg, Address(obj_reg, offset));
7140 __ MaybeUnpoisonHeapReference(out_reg);
7141 }
7142 }
7143
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7144 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
7145 HInstruction* instruction,
7146 Location root,
7147 const Address& address,
7148 Label* fixup_label,
7149 ReadBarrierOption read_barrier_option) {
7150 Register root_reg = root.AsRegister<Register>();
7151 if (read_barrier_option == kWithReadBarrier) {
7152 DCHECK(kEmitCompilerReadBarrier);
7153 if (kUseBakerReadBarrier) {
7154 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7155 // Baker's read barrier are used:
7156 //
7157 // root = obj.field;
7158 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7159 // if (temp != null) {
7160 // root = temp(root)
7161 // }
7162
7163 // /* GcRoot<mirror::Object> */ root = *address
7164 __ movl(root_reg, address);
7165 if (fixup_label != nullptr) {
7166 __ Bind(fixup_label);
7167 }
7168 static_assert(
7169 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7170 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7171 "have different sizes.");
7172 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7173 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7174 "have different sizes.");
7175
7176 // Slow path marking the GC root `root`.
7177 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
7178 instruction, root, /* unpoison_ref_before_marking */ false);
7179 codegen_->AddSlowPath(slow_path);
7180
7181 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
7182 const int32_t entry_point_offset =
7183 Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
7184 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
7185 // The entrypoint is null when the GC is not marking.
7186 __ j(kNotEqual, slow_path->GetEntryLabel());
7187 __ Bind(slow_path->GetExitLabel());
7188 } else {
7189 // GC root loaded through a slow path for read barriers other
7190 // than Baker's.
7191 // /* GcRoot<mirror::Object>* */ root = address
7192 __ leal(root_reg, address);
7193 if (fixup_label != nullptr) {
7194 __ Bind(fixup_label);
7195 }
7196 // /* mirror::Object* */ root = root->Read()
7197 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7198 }
7199 } else {
7200 // Plain GC root load with no read barrier.
7201 // /* GcRoot<mirror::Object> */ root = *address
7202 __ movl(root_reg, address);
7203 if (fixup_label != nullptr) {
7204 __ Bind(fixup_label);
7205 }
7206 // Note that GC roots are not affected by heap poisoning, thus we
7207 // do not have to unpoison `root_reg` here.
7208 }
7209 }
7210
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)7211 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7212 Location ref,
7213 Register obj,
7214 uint32_t offset,
7215 bool needs_null_check) {
7216 DCHECK(kEmitCompilerReadBarrier);
7217 DCHECK(kUseBakerReadBarrier);
7218
7219 // /* HeapReference<Object> */ ref = *(obj + offset)
7220 Address src(obj, offset);
7221 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7222 }
7223
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)7224 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7225 Location ref,
7226 Register obj,
7227 uint32_t data_offset,
7228 Location index,
7229 bool needs_null_check) {
7230 DCHECK(kEmitCompilerReadBarrier);
7231 DCHECK(kUseBakerReadBarrier);
7232
7233 static_assert(
7234 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7235 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7236 // /* HeapReference<Object> */ ref =
7237 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
7238 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
7239 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7240 }
7241
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)7242 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7243 Location ref,
7244 Register obj,
7245 const Address& src,
7246 bool needs_null_check,
7247 bool always_update_field,
7248 Register* temp) {
7249 DCHECK(kEmitCompilerReadBarrier);
7250 DCHECK(kUseBakerReadBarrier);
7251
7252 // In slow path based read barriers, the read barrier call is
7253 // inserted after the original load. However, in fast path based
7254 // Baker's read barriers, we need to perform the load of
7255 // mirror::Object::monitor_ *before* the original reference load.
7256 // This load-load ordering is required by the read barrier.
7257 // The fast path/slow path (for Baker's algorithm) should look like:
7258 //
7259 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7260 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
7261 // HeapReference<Object> ref = *src; // Original reference load.
7262 // bool is_gray = (rb_state == ReadBarrier::GrayState());
7263 // if (is_gray) {
7264 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
7265 // }
7266 //
7267 // Note: the original implementation in ReadBarrier::Barrier is
7268 // slightly more complex as:
7269 // - it implements the load-load fence using a data dependency on
7270 // the high-bits of rb_state, which are expected to be all zeroes
7271 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
7272 // which is a no-op thanks to the x86 memory model);
7273 // - it performs additional checks that we do not do here for
7274 // performance reasons.
7275
7276 Register ref_reg = ref.AsRegister<Register>();
7277 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7278
7279 // Given the numeric representation, it's enough to check the low bit of the rb_state.
7280 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
7281 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7282 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7283 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7284 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7285
7286 // if (rb_state == ReadBarrier::GrayState())
7287 // ref = ReadBarrier::Mark(ref);
7288 // At this point, just do the "if" and make sure that flags are preserved until the branch.
7289 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7290 if (needs_null_check) {
7291 MaybeRecordImplicitNullCheck(instruction);
7292 }
7293
7294 // Load fence to prevent load-load reordering.
7295 // Note that this is a no-op, thanks to the x86 memory model.
7296 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7297
7298 // The actual reference load.
7299 // /* HeapReference<Object> */ ref = *src
7300 __ movl(ref_reg, src); // Flags are unaffected.
7301
7302 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7303 // Slow path marking the object `ref` when it is gray.
7304 SlowPathCode* slow_path;
7305 if (always_update_field) {
7306 DCHECK(temp != nullptr);
7307 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
7308 instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp);
7309 } else {
7310 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
7311 instruction, ref, /* unpoison_ref_before_marking */ true);
7312 }
7313 AddSlowPath(slow_path);
7314
7315 // We have done the "if" of the gray bit check above, now branch based on the flags.
7316 __ j(kNotZero, slow_path->GetEntryLabel());
7317
7318 // Object* ref = ref_addr->AsMirrorPtr()
7319 __ MaybeUnpoisonHeapReference(ref_reg);
7320
7321 __ Bind(slow_path->GetExitLabel());
7322 }
7323
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7324 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
7325 Location out,
7326 Location ref,
7327 Location obj,
7328 uint32_t offset,
7329 Location index) {
7330 DCHECK(kEmitCompilerReadBarrier);
7331
7332 // Insert a slow path based read barrier *after* the reference load.
7333 //
7334 // If heap poisoning is enabled, the unpoisoning of the loaded
7335 // reference will be carried out by the runtime within the slow
7336 // path.
7337 //
7338 // Note that `ref` currently does not get unpoisoned (when heap
7339 // poisoning is enabled), which is alright as the `ref` argument is
7340 // not used by the artReadBarrierSlow entry point.
7341 //
7342 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7343 SlowPathCode* slow_path = new (GetScopedAllocator())
7344 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
7345 AddSlowPath(slow_path);
7346
7347 __ jmp(slow_path->GetEntryLabel());
7348 __ Bind(slow_path->GetExitLabel());
7349 }
7350
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7351 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7352 Location out,
7353 Location ref,
7354 Location obj,
7355 uint32_t offset,
7356 Location index) {
7357 if (kEmitCompilerReadBarrier) {
7358 // Baker's read barriers shall be handled by the fast path
7359 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
7360 DCHECK(!kUseBakerReadBarrier);
7361 // If heap poisoning is enabled, unpoisoning will be taken care of
7362 // by the runtime within the slow path.
7363 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7364 } else if (kPoisonHeapReferences) {
7365 __ UnpoisonHeapReference(out.AsRegister<Register>());
7366 }
7367 }
7368
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7369 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7370 Location out,
7371 Location root) {
7372 DCHECK(kEmitCompilerReadBarrier);
7373
7374 // Insert a slow path based read barrier *after* the GC root load.
7375 //
7376 // Note that GC roots are not affected by heap poisoning, so we do
7377 // not need to do anything special for this here.
7378 SlowPathCode* slow_path =
7379 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
7380 AddSlowPath(slow_path);
7381
7382 __ jmp(slow_path->GetEntryLabel());
7383 __ Bind(slow_path->GetExitLabel());
7384 }
7385
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7386 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7387 // Nothing to do, this should be removed during prepare for register allocator.
7388 LOG(FATAL) << "Unreachable";
7389 }
7390
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7391 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7392 // Nothing to do, this should be removed during prepare for register allocator.
7393 LOG(FATAL) << "Unreachable";
7394 }
7395
7396 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7397 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7398 LocationSummary* locations =
7399 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7400 locations->SetInAt(0, Location::RequiresRegister());
7401 }
7402
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)7403 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
7404 int32_t lower_bound,
7405 uint32_t num_entries,
7406 HBasicBlock* switch_block,
7407 HBasicBlock* default_block) {
7408 // Figure out the correct compare values and jump conditions.
7409 // Handle the first compare/branch as a special case because it might
7410 // jump to the default case.
7411 DCHECK_GT(num_entries, 2u);
7412 Condition first_condition;
7413 uint32_t index;
7414 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
7415 if (lower_bound != 0) {
7416 first_condition = kLess;
7417 __ cmpl(value_reg, Immediate(lower_bound));
7418 __ j(first_condition, codegen_->GetLabelOf(default_block));
7419 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7420
7421 index = 1;
7422 } else {
7423 // Handle all the compare/jumps below.
7424 first_condition = kBelow;
7425 index = 0;
7426 }
7427
7428 // Handle the rest of the compare/jumps.
7429 for (; index + 1 < num_entries; index += 2) {
7430 int32_t compare_to_value = lower_bound + index + 1;
7431 __ cmpl(value_reg, Immediate(compare_to_value));
7432 // Jump to successors[index] if value < case_value[index].
7433 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7434 // Jump to successors[index + 1] if value == case_value[index + 1].
7435 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7436 }
7437
7438 if (index != num_entries) {
7439 // There are an odd number of entries. Handle the last one.
7440 DCHECK_EQ(index + 1, num_entries);
7441 __ cmpl(value_reg, Immediate(lower_bound + index));
7442 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7443 }
7444
7445 // And the default for any other value.
7446 if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
7447 __ jmp(codegen_->GetLabelOf(default_block));
7448 }
7449 }
7450
VisitPackedSwitch(HPackedSwitch * switch_instr)7451 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7452 int32_t lower_bound = switch_instr->GetStartValue();
7453 uint32_t num_entries = switch_instr->GetNumEntries();
7454 LocationSummary* locations = switch_instr->GetLocations();
7455 Register value_reg = locations->InAt(0).AsRegister<Register>();
7456
7457 GenPackedSwitchWithCompares(value_reg,
7458 lower_bound,
7459 num_entries,
7460 switch_instr->GetBlock(),
7461 switch_instr->GetDefaultBlock());
7462 }
7463
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)7464 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
7465 LocationSummary* locations =
7466 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7467 locations->SetInAt(0, Location::RequiresRegister());
7468
7469 // Constant area pointer.
7470 locations->SetInAt(1, Location::RequiresRegister());
7471
7472 // And the temporary we need.
7473 locations->AddTemp(Location::RequiresRegister());
7474 }
7475
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)7476 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
7477 int32_t lower_bound = switch_instr->GetStartValue();
7478 uint32_t num_entries = switch_instr->GetNumEntries();
7479 LocationSummary* locations = switch_instr->GetLocations();
7480 Register value_reg = locations->InAt(0).AsRegister<Register>();
7481 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7482
7483 if (num_entries <= kPackedSwitchJumpTableThreshold) {
7484 GenPackedSwitchWithCompares(value_reg,
7485 lower_bound,
7486 num_entries,
7487 switch_instr->GetBlock(),
7488 default_block);
7489 return;
7490 }
7491
7492 // Optimizing has a jump area.
7493 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
7494 Register constant_area = locations->InAt(1).AsRegister<Register>();
7495
7496 // Remove the bias, if needed.
7497 if (lower_bound != 0) {
7498 __ leal(temp_reg, Address(value_reg, -lower_bound));
7499 value_reg = temp_reg;
7500 }
7501
7502 // Is the value in range?
7503 DCHECK_GE(num_entries, 1u);
7504 __ cmpl(value_reg, Immediate(num_entries - 1));
7505 __ j(kAbove, codegen_->GetLabelOf(default_block));
7506
7507 // We are in the range of the table.
7508 // Load (target-constant_area) from the jump table, indexing by the value.
7509 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
7510
7511 // Compute the actual target address by adding in constant_area.
7512 __ addl(temp_reg, constant_area);
7513
7514 // And jump.
7515 __ jmp(temp_reg);
7516 }
7517
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)7518 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
7519 HX86ComputeBaseMethodAddress* insn) {
7520 LocationSummary* locations =
7521 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
7522 locations->SetOut(Location::RequiresRegister());
7523 }
7524
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)7525 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
7526 HX86ComputeBaseMethodAddress* insn) {
7527 LocationSummary* locations = insn->GetLocations();
7528 Register reg = locations->Out().AsRegister<Register>();
7529
7530 // Generate call to next instruction.
7531 Label next_instruction;
7532 __ call(&next_instruction);
7533 __ Bind(&next_instruction);
7534
7535 // Remember this offset for later use with constant area.
7536 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
7537
7538 // Grab the return address off the stack.
7539 __ popl(reg);
7540 }
7541
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)7542 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
7543 HX86LoadFromConstantTable* insn) {
7544 LocationSummary* locations =
7545 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
7546
7547 locations->SetInAt(0, Location::RequiresRegister());
7548 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
7549
7550 // If we don't need to be materialized, we only need the inputs to be set.
7551 if (insn->IsEmittedAtUseSite()) {
7552 return;
7553 }
7554
7555 switch (insn->GetType()) {
7556 case DataType::Type::kFloat32:
7557 case DataType::Type::kFloat64:
7558 locations->SetOut(Location::RequiresFpuRegister());
7559 break;
7560
7561 case DataType::Type::kInt32:
7562 locations->SetOut(Location::RequiresRegister());
7563 break;
7564
7565 default:
7566 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
7567 }
7568 }
7569
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)7570 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
7571 if (insn->IsEmittedAtUseSite()) {
7572 return;
7573 }
7574
7575 LocationSummary* locations = insn->GetLocations();
7576 Location out = locations->Out();
7577 Register const_area = locations->InAt(0).AsRegister<Register>();
7578 HConstant *value = insn->GetConstant();
7579
7580 switch (insn->GetType()) {
7581 case DataType::Type::kFloat32:
7582 __ movss(out.AsFpuRegister<XmmRegister>(),
7583 codegen_->LiteralFloatAddress(
7584 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
7585 break;
7586
7587 case DataType::Type::kFloat64:
7588 __ movsd(out.AsFpuRegister<XmmRegister>(),
7589 codegen_->LiteralDoubleAddress(
7590 value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
7591 break;
7592
7593 case DataType::Type::kInt32:
7594 __ movl(out.AsRegister<Register>(),
7595 codegen_->LiteralInt32Address(
7596 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
7597 break;
7598
7599 default:
7600 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
7601 }
7602 }
7603
7604 /**
7605 * Class to handle late fixup of offsets into constant area.
7606 */
7607 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
7608 public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)7609 RIPFixup(CodeGeneratorX86& codegen,
7610 HX86ComputeBaseMethodAddress* base_method_address,
7611 size_t offset)
7612 : codegen_(&codegen),
7613 base_method_address_(base_method_address),
7614 offset_into_constant_area_(offset) {}
7615
7616 protected:
SetOffset(size_t offset)7617 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7618
7619 CodeGeneratorX86* codegen_;
7620 HX86ComputeBaseMethodAddress* base_method_address_;
7621
7622 private:
Process(const MemoryRegion & region,int pos)7623 void Process(const MemoryRegion& region, int pos) OVERRIDE {
7624 // Patch the correct offset for the instruction. The place to patch is the
7625 // last 4 bytes of the instruction.
7626 // The value to patch is the distance from the offset in the constant area
7627 // from the address computed by the HX86ComputeBaseMethodAddress instruction.
7628 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7629 int32_t relative_position =
7630 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
7631
7632 // Patch in the right value.
7633 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7634 }
7635
7636 // Location in constant area that the fixup refers to.
7637 int32_t offset_into_constant_area_;
7638 };
7639
7640 /**
7641 * Class to handle late fixup of offsets to a jump table that will be created in the
7642 * constant area.
7643 */
7644 class JumpTableRIPFixup : public RIPFixup {
7645 public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)7646 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
7647 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
7648 switch_instr_(switch_instr) {}
7649
CreateJumpTable()7650 void CreateJumpTable() {
7651 X86Assembler* assembler = codegen_->GetAssembler();
7652
7653 // Ensure that the reference to the jump table has the correct offset.
7654 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7655 SetOffset(offset_in_constant_table);
7656
7657 // The label values in the jump table are computed relative to the
7658 // instruction addressing the constant area.
7659 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
7660
7661 // Populate the jump table with the correct values for the jump table.
7662 int32_t num_entries = switch_instr_->GetNumEntries();
7663 HBasicBlock* block = switch_instr_->GetBlock();
7664 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7665 // The value that we want is the target offset - the position of the table.
7666 for (int32_t i = 0; i < num_entries; i++) {
7667 HBasicBlock* b = successors[i];
7668 Label* l = codegen_->GetLabelOf(b);
7669 DCHECK(l->IsBound());
7670 int32_t offset_to_block = l->Position() - relative_offset;
7671 assembler->AppendInt32(offset_to_block);
7672 }
7673 }
7674
7675 private:
7676 const HX86PackedSwitch* switch_instr_;
7677 };
7678
Finalize(CodeAllocator * allocator)7679 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
7680 // Generate the constant area if needed.
7681 X86Assembler* assembler = GetAssembler();
7682 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7683 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
7684 // byte values.
7685 assembler->Align(4, 0);
7686 constant_area_start_ = assembler->CodeSize();
7687
7688 // Populate any jump tables.
7689 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
7690 jump_table->CreateJumpTable();
7691 }
7692
7693 // And now add the constant area to the generated code.
7694 assembler->AddConstantArea();
7695 }
7696
7697 // And finish up.
7698 CodeGenerator::Finalize(allocator);
7699 }
7700
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)7701 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
7702 HX86ComputeBaseMethodAddress* method_base,
7703 Register reg) {
7704 AssemblerFixup* fixup =
7705 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
7706 return Address(reg, kDummy32BitOffset, fixup);
7707 }
7708
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)7709 Address CodeGeneratorX86::LiteralFloatAddress(float v,
7710 HX86ComputeBaseMethodAddress* method_base,
7711 Register reg) {
7712 AssemblerFixup* fixup =
7713 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
7714 return Address(reg, kDummy32BitOffset, fixup);
7715 }
7716
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)7717 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
7718 HX86ComputeBaseMethodAddress* method_base,
7719 Register reg) {
7720 AssemblerFixup* fixup =
7721 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
7722 return Address(reg, kDummy32BitOffset, fixup);
7723 }
7724
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)7725 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
7726 HX86ComputeBaseMethodAddress* method_base,
7727 Register reg) {
7728 AssemblerFixup* fixup =
7729 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
7730 return Address(reg, kDummy32BitOffset, fixup);
7731 }
7732
Load32BitValue(Register dest,int32_t value)7733 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
7734 if (value == 0) {
7735 __ xorl(dest, dest);
7736 } else {
7737 __ movl(dest, Immediate(value));
7738 }
7739 }
7740
Compare32BitValue(Register dest,int32_t value)7741 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
7742 if (value == 0) {
7743 __ testl(dest, dest);
7744 } else {
7745 __ cmpl(dest, Immediate(value));
7746 }
7747 }
7748
GenerateIntCompare(Location lhs,Location rhs)7749 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
7750 Register lhs_reg = lhs.AsRegister<Register>();
7751 GenerateIntCompare(lhs_reg, rhs);
7752 }
7753
GenerateIntCompare(Register lhs,Location rhs)7754 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
7755 if (rhs.IsConstant()) {
7756 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
7757 Compare32BitValue(lhs, value);
7758 } else if (rhs.IsStackSlot()) {
7759 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
7760 } else {
7761 __ cmpl(lhs, rhs.AsRegister<Register>());
7762 }
7763 }
7764
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)7765 Address CodeGeneratorX86::ArrayAddress(Register obj,
7766 Location index,
7767 ScaleFactor scale,
7768 uint32_t data_offset) {
7769 return index.IsConstant() ?
7770 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
7771 Address(obj, index.AsRegister<Register>(), scale, data_offset);
7772 }
7773
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)7774 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
7775 Register reg,
7776 Register value) {
7777 // Create a fixup to be used to create and address the jump table.
7778 JumpTableRIPFixup* table_fixup =
7779 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
7780
7781 // We have to populate the jump tables.
7782 fixups_to_jump_tables_.push_back(table_fixup);
7783
7784 // We want a scaled address, as we are extracting the correct offset from the table.
7785 return Address(reg, value, TIMES_4, kDummy32BitOffset, table_fixup);
7786 }
7787
7788 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)7789 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
7790 if (!target.IsValid()) {
7791 DCHECK_EQ(type, DataType::Type::kVoid);
7792 return;
7793 }
7794
7795 DCHECK_NE(type, DataType::Type::kVoid);
7796
7797 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
7798 if (target.Equals(return_loc)) {
7799 return;
7800 }
7801
7802 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
7803 // with the else branch.
7804 if (type == DataType::Type::kInt64) {
7805 HParallelMove parallel_move(GetGraph()->GetAllocator());
7806 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
7807 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
7808 GetMoveResolver()->EmitNativeCode(¶llel_move);
7809 } else {
7810 // Let the parallel move resolver take care of all of this.
7811 HParallelMove parallel_move(GetGraph()->GetAllocator());
7812 parallel_move.AddMove(return_loc, target, type, nullptr);
7813 GetMoveResolver()->EmitNativeCode(¶llel_move);
7814 }
7815 }
7816
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7817 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
7818 const uint8_t* roots_data,
7819 const PatchInfo<Label>& info,
7820 uint64_t index_in_table) const {
7821 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
7822 uintptr_t address =
7823 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7824 typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
7825 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
7826 dchecked_integral_cast<uint32_t>(address);
7827 }
7828
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)7829 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7830 for (const PatchInfo<Label>& info : jit_string_patches_) {
7831 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
7832 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
7833 PatchJitRootUse(code, roots_data, info, index_in_table);
7834 }
7835
7836 for (const PatchInfo<Label>& info : jit_class_patches_) {
7837 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
7838 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
7839 PatchJitRootUse(code, roots_data, info, index_in_table);
7840 }
7841 }
7842
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7843 void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
7844 ATTRIBUTE_UNUSED) {
7845 LOG(FATAL) << "Unreachable";
7846 }
7847
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7848 void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
7849 ATTRIBUTE_UNUSED) {
7850 LOG(FATAL) << "Unreachable";
7851 }
7852
7853 #undef __
7854
7855 } // namespace x86
7856 } // namespace art
7857