• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "arch/x86_64/jni_frame_x86_64.h"
20 #include "art_method-inl.h"
21 #include "class_root-inl.h"
22 #include "class_table.h"
23 #include "code_generator_utils.h"
24 #include "compiled_method.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "gc/accounting/card_table.h"
27 #include "gc/space/image_space.h"
28 #include "heap_poisoning.h"
29 #include "interpreter/mterp/nterp.h"
30 #include "intrinsics.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86_64.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/object_reference.h"
39 #include "mirror/var_handle.h"
40 #include "scoped_thread_state_change-inl.h"
41 #include "thread.h"
42 #include "utils/assembler.h"
43 #include "utils/stack_checks.h"
44 #include "utils/x86_64/assembler_x86_64.h"
45 #include "utils/x86_64/constants_x86_64.h"
46 #include "utils/x86_64/managed_register_x86_64.h"
47 
48 namespace art {
49 
50 template<class MirrorType>
51 class GcRoot;
52 
53 namespace x86_64 {
54 
55 static constexpr int kCurrentMethodStackOffset = 0;
56 static constexpr Register kMethodRegisterArgument = RDI;
57 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
58 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
59 // generates less code/data with a small num_entries.
60 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
61 
62 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
63 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
64 
65 static constexpr int kC2ConditionMask = 0x400;
66 
OneRegInReferenceOutSaveEverythingCallerSaves()67 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
68   // Custom calling convention: RAX serves as both input and output.
69   RegisterSet caller_saves = RegisterSet::Empty();
70   caller_saves.Add(Location::RegisterLocation(RAX));
71   return caller_saves;
72 }
73 
74 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
75 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
76 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
77 
78 class NullCheckSlowPathX86_64 : public SlowPathCode {
79  public:
NullCheckSlowPathX86_64(HNullCheck * instruction)80   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
81 
EmitNativeCode(CodeGenerator * codegen)82   void EmitNativeCode(CodeGenerator* codegen) override {
83     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
84     __ Bind(GetEntryLabel());
85     if (instruction_->CanThrowIntoCatchBlock()) {
86       // Live registers will be restored in the catch block if caught.
87       SaveLiveRegisters(codegen, instruction_->GetLocations());
88     }
89     x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
90                                   instruction_,
91                                   instruction_->GetDexPc(),
92                                   this);
93     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
94   }
95 
IsFatal() const96   bool IsFatal() const override { return true; }
97 
GetDescription() const98   const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
99 
100  private:
101   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
102 };
103 
104 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
105  public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)106   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
107 
EmitNativeCode(CodeGenerator * codegen)108   void EmitNativeCode(CodeGenerator* codegen) override {
109     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
110     __ Bind(GetEntryLabel());
111     x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
112     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
113   }
114 
IsFatal() const115   bool IsFatal() const override { return true; }
116 
GetDescription() const117   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
118 
119  private:
120   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
121 };
122 
123 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
124  public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)125   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
126       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
127 
EmitNativeCode(CodeGenerator * codegen)128   void EmitNativeCode(CodeGenerator* codegen) override {
129     __ Bind(GetEntryLabel());
130     if (type_ == DataType::Type::kInt32) {
131       if (is_div_) {
132         __ negl(cpu_reg_);
133       } else {
134         __ xorl(cpu_reg_, cpu_reg_);
135       }
136 
137     } else {
138       DCHECK_EQ(DataType::Type::kInt64, type_);
139       if (is_div_) {
140         __ negq(cpu_reg_);
141       } else {
142         __ xorl(cpu_reg_, cpu_reg_);
143       }
144     }
145     __ jmp(GetExitLabel());
146   }
147 
GetDescription() const148   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
149 
150  private:
151   const CpuRegister cpu_reg_;
152   const DataType::Type type_;
153   const bool is_div_;
154   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
155 };
156 
157 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
158  public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)159   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
160       : SlowPathCode(instruction), successor_(successor) {}
161 
EmitNativeCode(CodeGenerator * codegen)162   void EmitNativeCode(CodeGenerator* codegen) override {
163     LocationSummary* locations = instruction_->GetLocations();
164     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
165     __ Bind(GetEntryLabel());
166     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
167     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
168     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
169     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
170     if (successor_ == nullptr) {
171       __ jmp(GetReturnLabel());
172     } else {
173       __ jmp(x86_64_codegen->GetLabelOf(successor_));
174     }
175   }
176 
GetReturnLabel()177   Label* GetReturnLabel() {
178     DCHECK(successor_ == nullptr);
179     return &return_label_;
180   }
181 
GetSuccessor() const182   HBasicBlock* GetSuccessor() const {
183     return successor_;
184   }
185 
GetDescription() const186   const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
187 
188  private:
189   HBasicBlock* const successor_;
190   Label return_label_;
191 
192   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
193 };
194 
195 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
196  public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)197   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
198     : SlowPathCode(instruction) {}
199 
EmitNativeCode(CodeGenerator * codegen)200   void EmitNativeCode(CodeGenerator* codegen) override {
201     LocationSummary* locations = instruction_->GetLocations();
202     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
203     __ Bind(GetEntryLabel());
204     if (instruction_->CanThrowIntoCatchBlock()) {
205       // Live registers will be restored in the catch block if caught.
206       SaveLiveRegisters(codegen, locations);
207     }
208 
209     Location index_loc = locations->InAt(0);
210     Location length_loc = locations->InAt(1);
211     InvokeRuntimeCallingConvention calling_convention;
212     Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
213     Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
214 
215     // Are we using an array length from memory?
216     if (!length_loc.IsValid()) {
217       DCHECK(instruction_->InputAt(1)->IsArrayLength());
218       HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
219       DCHECK(array_length->IsEmittedAtUseSite());
220       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
221       Location array_loc = array_length->GetLocations()->InAt(0);
222       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
223       if (!index_loc.Equals(length_arg)) {
224         // The index is not clobbered by loading the length directly to `length_arg`.
225         __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
226         x86_64_codegen->Move(index_arg, index_loc);
227       } else if (!array_loc.Equals(index_arg)) {
228         // The array reference is not clobbered by the index move.
229         x86_64_codegen->Move(index_arg, index_loc);
230         __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
231       } else {
232         // Load the array length into `TMP`.
233         DCHECK(codegen->IsBlockedCoreRegister(TMP));
234         __ movl(CpuRegister(TMP), array_len);
235         // Single move to CPU register does not clobber `TMP`.
236         x86_64_codegen->Move(index_arg, index_loc);
237         __ movl(length_arg.AsRegister<CpuRegister>(), CpuRegister(TMP));
238       }
239       if (mirror::kUseStringCompression && array_length->IsStringLength()) {
240         __ shrl(length_arg.AsRegister<CpuRegister>(), Immediate(1));
241       }
242     } else {
243       // We're moving two locations to locations that could overlap,
244       // so we need a parallel move resolver.
245       codegen->EmitParallelMoves(
246           index_loc,
247           index_arg,
248           DataType::Type::kInt32,
249           length_loc,
250           length_arg,
251           DataType::Type::kInt32);
252     }
253 
254     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
255         ? kQuickThrowStringBounds
256         : kQuickThrowArrayBounds;
257     x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
258     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
259     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
260   }
261 
IsFatal() const262   bool IsFatal() const override { return true; }
263 
GetDescription() const264   const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
265 
266  private:
267   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
268 };
269 
270 class LoadClassSlowPathX86_64 : public SlowPathCode {
271  public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)272   LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
273       : SlowPathCode(at), cls_(cls) {
274     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
275     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
276   }
277 
EmitNativeCode(CodeGenerator * codegen)278   void EmitNativeCode(CodeGenerator* codegen) override {
279     LocationSummary* locations = instruction_->GetLocations();
280     Location out = locations->Out();
281     const uint32_t dex_pc = instruction_->GetDexPc();
282     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
283     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
284 
285     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
286     __ Bind(GetEntryLabel());
287     SaveLiveRegisters(codegen, locations);
288 
289     // Custom calling convention: RAX serves as both input and output.
290     if (must_resolve_type) {
291       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()) ||
292              x86_64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
293              ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
294                              &cls_->GetDexFile()));
295       dex::TypeIndex type_index = cls_->GetTypeIndex();
296       __ movl(CpuRegister(RAX), Immediate(type_index.index_));
297       if (cls_->NeedsAccessCheck()) {
298         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
299         x86_64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
300       } else {
301         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
302         x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
303       }
304       // If we also must_do_clinit, the resolved type is now in the correct register.
305     } else {
306       DCHECK(must_do_clinit);
307       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
308       x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
309     }
310     if (must_do_clinit) {
311       x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
312       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
313     }
314 
315     // Move the class to the desired location.
316     if (out.IsValid()) {
317       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
318       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
319     }
320 
321     RestoreLiveRegisters(codegen, locations);
322     __ jmp(GetExitLabel());
323   }
324 
GetDescription() const325   const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
326 
327  private:
328   // The class this slow path will load.
329   HLoadClass* const cls_;
330 
331   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
332 };
333 
334 class LoadStringSlowPathX86_64 : public SlowPathCode {
335  public:
LoadStringSlowPathX86_64(HLoadString * instruction)336   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
337 
EmitNativeCode(CodeGenerator * codegen)338   void EmitNativeCode(CodeGenerator* codegen) override {
339     LocationSummary* locations = instruction_->GetLocations();
340     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
341 
342     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
343     __ Bind(GetEntryLabel());
344     SaveLiveRegisters(codegen, locations);
345 
346     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
347     // Custom calling convention: RAX serves as both input and output.
348     __ movl(CpuRegister(RAX), Immediate(string_index.index_));
349     x86_64_codegen->InvokeRuntime(kQuickResolveString,
350                                   instruction_,
351                                   instruction_->GetDexPc(),
352                                   this);
353     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
354     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
355     RestoreLiveRegisters(codegen, locations);
356 
357     __ jmp(GetExitLabel());
358   }
359 
GetDescription() const360   const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
361 
362  private:
363   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
364 };
365 
366 class TypeCheckSlowPathX86_64 : public SlowPathCode {
367  public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)368   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
369       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
370 
EmitNativeCode(CodeGenerator * codegen)371   void EmitNativeCode(CodeGenerator* codegen) override {
372     LocationSummary* locations = instruction_->GetLocations();
373     uint32_t dex_pc = instruction_->GetDexPc();
374     DCHECK(instruction_->IsCheckCast()
375            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
376 
377     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
378     __ Bind(GetEntryLabel());
379 
380     if (kPoisonHeapReferences &&
381         instruction_->IsCheckCast() &&
382         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
383       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
384       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
385     }
386 
387     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
388       SaveLiveRegisters(codegen, locations);
389     }
390 
391     // We're moving two locations to locations that could overlap, so we need a parallel
392     // move resolver.
393     InvokeRuntimeCallingConvention calling_convention;
394     codegen->EmitParallelMoves(locations->InAt(0),
395                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
396                                DataType::Type::kReference,
397                                locations->InAt(1),
398                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
399                                DataType::Type::kReference);
400     if (instruction_->IsInstanceOf()) {
401       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
402       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
403     } else {
404       DCHECK(instruction_->IsCheckCast());
405       x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
406       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
407     }
408 
409     if (!is_fatal_) {
410       if (instruction_->IsInstanceOf()) {
411         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
412       }
413 
414       RestoreLiveRegisters(codegen, locations);
415       __ jmp(GetExitLabel());
416     }
417   }
418 
GetDescription() const419   const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
420 
IsFatal() const421   bool IsFatal() const override { return is_fatal_; }
422 
423  private:
424   const bool is_fatal_;
425 
426   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
427 };
428 
429 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
430  public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)431   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
432       : SlowPathCode(instruction) {}
433 
EmitNativeCode(CodeGenerator * codegen)434   void EmitNativeCode(CodeGenerator* codegen) override {
435     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
436     __ Bind(GetEntryLabel());
437     LocationSummary* locations = instruction_->GetLocations();
438     SaveLiveRegisters(codegen, locations);
439     InvokeRuntimeCallingConvention calling_convention;
440     x86_64_codegen->Load32BitValue(
441         CpuRegister(calling_convention.GetRegisterAt(0)),
442         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
443     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
444     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
445   }
446 
GetDescription() const447   const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
448 
449  private:
450   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
451 };
452 
453 class ArraySetSlowPathX86_64 : public SlowPathCode {
454  public:
ArraySetSlowPathX86_64(HInstruction * instruction)455   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
456 
EmitNativeCode(CodeGenerator * codegen)457   void EmitNativeCode(CodeGenerator* codegen) override {
458     LocationSummary* locations = instruction_->GetLocations();
459     __ Bind(GetEntryLabel());
460     SaveLiveRegisters(codegen, locations);
461 
462     InvokeRuntimeCallingConvention calling_convention;
463     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
464     parallel_move.AddMove(
465         locations->InAt(0),
466         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
467         DataType::Type::kReference,
468         nullptr);
469     parallel_move.AddMove(
470         locations->InAt(1),
471         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
472         DataType::Type::kInt32,
473         nullptr);
474     parallel_move.AddMove(
475         locations->InAt(2),
476         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
477         DataType::Type::kReference,
478         nullptr);
479     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
480 
481     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
482     x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
483     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
484     RestoreLiveRegisters(codegen, locations);
485     __ jmp(GetExitLabel());
486   }
487 
GetDescription() const488   const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
489 
490  private:
491   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
492 };
493 
494 // Slow path marking an object reference `ref` during a read
495 // barrier. The field `obj.field` in the object `obj` holding this
496 // reference does not get updated by this slow path after marking (see
497 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
498 //
499 // This means that after the execution of this slow path, `ref` will
500 // always be up-to-date, but `obj.field` may not; i.e., after the
501 // flip, `ref` will be a to-space reference, but `obj.field` will
502 // probably still be a from-space reference (unless it gets updated by
503 // another thread, or if another thread installed another object
504 // reference (different from `ref`) in `obj.field`).
505 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
506  public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)507   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
508                                 Location ref,
509                                 bool unpoison_ref_before_marking)
510       : SlowPathCode(instruction),
511         ref_(ref),
512         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
513     DCHECK(kEmitCompilerReadBarrier);
514   }
515 
GetDescription() const516   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
517 
EmitNativeCode(CodeGenerator * codegen)518   void EmitNativeCode(CodeGenerator* codegen) override {
519     LocationSummary* locations = instruction_->GetLocations();
520     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
521     Register ref_reg = ref_cpu_reg.AsRegister();
522     DCHECK(locations->CanCall());
523     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
524     DCHECK(instruction_->IsInstanceFieldGet() ||
525            instruction_->IsPredicatedInstanceFieldGet() ||
526            instruction_->IsStaticFieldGet() ||
527            instruction_->IsArrayGet() ||
528            instruction_->IsArraySet() ||
529            instruction_->IsLoadClass() ||
530            instruction_->IsLoadString() ||
531            instruction_->IsInstanceOf() ||
532            instruction_->IsCheckCast() ||
533            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
534         << "Unexpected instruction in read barrier marking slow path: "
535         << instruction_->DebugName();
536 
537     __ Bind(GetEntryLabel());
538     if (unpoison_ref_before_marking_) {
539       // Object* ref = ref_addr->AsMirrorPtr()
540       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
541     }
542     // No need to save live registers; it's taken care of by the
543     // entrypoint. Also, there is no need to update the stack mask,
544     // as this runtime call will not trigger a garbage collection.
545     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
546     DCHECK_NE(ref_reg, RSP);
547     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
548     // "Compact" slow path, saving two moves.
549     //
550     // Instead of using the standard runtime calling convention (input
551     // and output in R0):
552     //
553     //   RDI <- ref
554     //   RAX <- ReadBarrierMark(RDI)
555     //   ref <- RAX
556     //
557     // we just use rX (the register containing `ref`) as input and output
558     // of a dedicated entrypoint:
559     //
560     //   rX <- ReadBarrierMarkRegX(rX)
561     //
562     int32_t entry_point_offset =
563         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
564     // This runtime call does not require a stack map.
565     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
566     __ jmp(GetExitLabel());
567   }
568 
569  private:
570   // The location (register) of the marked object reference.
571   const Location ref_;
572   // Should the reference in `ref_` be unpoisoned prior to marking it?
573   const bool unpoison_ref_before_marking_;
574 
575   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
576 };
577 
578 // Slow path marking an object reference `ref` during a read barrier,
579 // and if needed, atomically updating the field `obj.field` in the
580 // object `obj` holding this reference after marking (contrary to
581 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
582 // `obj.field`).
583 //
584 // This means that after the execution of this slow path, both `ref`
585 // and `obj.field` will be up-to-date; i.e., after the flip, both will
586 // hold the same to-space reference (unless another thread installed
587 // another object reference (different from `ref`) in `obj.field`).
588 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
589  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)590   ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
591                                               Location ref,
592                                               CpuRegister obj,
593                                               const Address& field_addr,
594                                               bool unpoison_ref_before_marking,
595                                               CpuRegister temp1,
596                                               CpuRegister temp2)
597       : SlowPathCode(instruction),
598         ref_(ref),
599         obj_(obj),
600         field_addr_(field_addr),
601         unpoison_ref_before_marking_(unpoison_ref_before_marking),
602         temp1_(temp1),
603         temp2_(temp2) {
604     DCHECK(kEmitCompilerReadBarrier);
605   }
606 
GetDescription() const607   const char* GetDescription() const override {
608     return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
609   }
610 
EmitNativeCode(CodeGenerator * codegen)611   void EmitNativeCode(CodeGenerator* codegen) override {
612     LocationSummary* locations = instruction_->GetLocations();
613     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
614     Register ref_reg = ref_cpu_reg.AsRegister();
615     DCHECK(locations->CanCall());
616     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
617     DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
618         << "Unexpected instruction in read barrier marking and field updating slow path: "
619         << instruction_->DebugName();
620     HInvoke* invoke = instruction_->AsInvoke();
621     DCHECK(IsUnsafeCASObject(invoke) || IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
622 
623     __ Bind(GetEntryLabel());
624     if (unpoison_ref_before_marking_) {
625       // Object* ref = ref_addr->AsMirrorPtr()
626       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
627     }
628 
629     // Save the old (unpoisoned) reference.
630     __ movl(temp1_, ref_cpu_reg);
631 
632     // No need to save live registers; it's taken care of by the
633     // entrypoint. Also, there is no need to update the stack mask,
634     // as this runtime call will not trigger a garbage collection.
635     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
636     DCHECK_NE(ref_reg, RSP);
637     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
638     // "Compact" slow path, saving two moves.
639     //
640     // Instead of using the standard runtime calling convention (input
641     // and output in R0):
642     //
643     //   RDI <- ref
644     //   RAX <- ReadBarrierMark(RDI)
645     //   ref <- RAX
646     //
647     // we just use rX (the register containing `ref`) as input and output
648     // of a dedicated entrypoint:
649     //
650     //   rX <- ReadBarrierMarkRegX(rX)
651     //
652     int32_t entry_point_offset =
653         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
654     // This runtime call does not require a stack map.
655     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
656 
657     // If the new reference is different from the old reference,
658     // update the field in the holder (`*field_addr`).
659     //
660     // Note that this field could also hold a different object, if
661     // another thread had concurrently changed it. In that case, the
662     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
663     // operation below would abort the CAS, leaving the field as-is.
664     NearLabel done;
665     __ cmpl(temp1_, ref_cpu_reg);
666     __ j(kEqual, &done);
667 
668     // Update the the holder's field atomically.  This may fail if
669     // mutator updates before us, but it's OK.  This is achived
670     // using a strong compare-and-set (CAS) operation with relaxed
671     // memory synchronization ordering, where the expected value is
672     // the old reference and the desired value is the new reference.
673     // This operation is implemented with a 32-bit LOCK CMPXLCHG
674     // instruction, which requires the expected value (the old
675     // reference) to be in EAX.  Save RAX beforehand, and move the
676     // expected value (stored in `temp1_`) into EAX.
677     __ movq(temp2_, CpuRegister(RAX));
678     __ movl(CpuRegister(RAX), temp1_);
679 
680     // Convenience aliases.
681     CpuRegister base = obj_;
682     CpuRegister expected = CpuRegister(RAX);
683     CpuRegister value = ref_cpu_reg;
684 
685     bool base_equals_value = (base.AsRegister() == value.AsRegister());
686     Register value_reg = ref_reg;
687     if (kPoisonHeapReferences) {
688       if (base_equals_value) {
689         // If `base` and `value` are the same register location, move
690         // `value_reg` to a temporary register.  This way, poisoning
691         // `value_reg` won't invalidate `base`.
692         value_reg = temp1_.AsRegister();
693         __ movl(CpuRegister(value_reg), base);
694       }
695 
696       // Check that the register allocator did not assign the location
697       // of `expected` (RAX) to `value` nor to `base`, so that heap
698       // poisoning (when enabled) works as intended below.
699       // - If `value` were equal to `expected`, both references would
700       //   be poisoned twice, meaning they would not be poisoned at
701       //   all, as heap poisoning uses address negation.
702       // - If `base` were equal to `expected`, poisoning `expected`
703       //   would invalidate `base`.
704       DCHECK_NE(value_reg, expected.AsRegister());
705       DCHECK_NE(base.AsRegister(), expected.AsRegister());
706 
707       __ PoisonHeapReference(expected);
708       __ PoisonHeapReference(CpuRegister(value_reg));
709     }
710 
711     __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
712 
713     // If heap poisoning is enabled, we need to unpoison the values
714     // that were poisoned earlier.
715     if (kPoisonHeapReferences) {
716       if (base_equals_value) {
717         // `value_reg` has been moved to a temporary register, no need
718         // to unpoison it.
719       } else {
720         __ UnpoisonHeapReference(CpuRegister(value_reg));
721       }
722       // No need to unpoison `expected` (RAX), as it is be overwritten below.
723     }
724 
725     // Restore RAX.
726     __ movq(CpuRegister(RAX), temp2_);
727 
728     __ Bind(&done);
729     __ jmp(GetExitLabel());
730   }
731 
732  private:
733   // The location (register) of the marked object reference.
734   const Location ref_;
735   // The register containing the object holding the marked object reference field.
736   const CpuRegister obj_;
737   // The address of the marked reference field.  The base of this address must be `obj_`.
738   const Address field_addr_;
739 
740   // Should the reference in `ref_` be unpoisoned prior to marking it?
741   const bool unpoison_ref_before_marking_;
742 
743   const CpuRegister temp1_;
744   const CpuRegister temp2_;
745 
746   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
747 };
748 
749 // Slow path generating a read barrier for a heap reference.
750 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
751  public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)752   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
753                                             Location out,
754                                             Location ref,
755                                             Location obj,
756                                             uint32_t offset,
757                                             Location index)
758       : SlowPathCode(instruction),
759         out_(out),
760         ref_(ref),
761         obj_(obj),
762         offset_(offset),
763         index_(index) {
764     DCHECK(kEmitCompilerReadBarrier);
765     // If `obj` is equal to `out` or `ref`, it means the initial
766     // object has been overwritten by (or after) the heap object
767     // reference load to be instrumented, e.g.:
768     //
769     //   __ movl(out, Address(out, offset));
770     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
771     //
772     // In that case, we have lost the information about the original
773     // object, and the emitted read barrier cannot work properly.
774     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
775     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
776 }
777 
EmitNativeCode(CodeGenerator * codegen)778   void EmitNativeCode(CodeGenerator* codegen) override {
779     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
780     LocationSummary* locations = instruction_->GetLocations();
781     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
782     DCHECK(locations->CanCall());
783     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
784     DCHECK(instruction_->IsInstanceFieldGet() ||
785            instruction_->IsPredicatedInstanceFieldGet() ||
786            instruction_->IsStaticFieldGet() ||
787            instruction_->IsArrayGet() ||
788            instruction_->IsInstanceOf() ||
789            instruction_->IsCheckCast() ||
790            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
791         << "Unexpected instruction in read barrier for heap reference slow path: "
792         << instruction_->DebugName();
793 
794     __ Bind(GetEntryLabel());
795     SaveLiveRegisters(codegen, locations);
796 
797     // We may have to change the index's value, but as `index_` is a
798     // constant member (like other "inputs" of this slow path),
799     // introduce a copy of it, `index`.
800     Location index = index_;
801     if (index_.IsValid()) {
802       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
803       if (instruction_->IsArrayGet()) {
804         // Compute real offset and store it in index_.
805         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
806         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
807         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
808           // We are about to change the value of `index_reg` (see the
809           // calls to art::x86_64::X86_64Assembler::shll and
810           // art::x86_64::X86_64Assembler::AddImmediate below), but it
811           // has not been saved by the previous call to
812           // art::SlowPathCode::SaveLiveRegisters, as it is a
813           // callee-save register --
814           // art::SlowPathCode::SaveLiveRegisters does not consider
815           // callee-save registers, as it has been designed with the
816           // assumption that callee-save registers are supposed to be
817           // handled by the called function.  So, as a callee-save
818           // register, `index_reg` _would_ eventually be saved onto
819           // the stack, but it would be too late: we would have
820           // changed its value earlier.  Therefore, we manually save
821           // it here into another freely available register,
822           // `free_reg`, chosen of course among the caller-save
823           // registers (as a callee-save `free_reg` register would
824           // exhibit the same problem).
825           //
826           // Note we could have requested a temporary register from
827           // the register allocator instead; but we prefer not to, as
828           // this is a slow path, and we know we can find a
829           // caller-save register that is available.
830           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
831           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
832           index_reg = free_reg;
833           index = Location::RegisterLocation(index_reg);
834         } else {
835           // The initial register stored in `index_` has already been
836           // saved in the call to art::SlowPathCode::SaveLiveRegisters
837           // (as it is not a callee-save register), so we can freely
838           // use it.
839         }
840         // Shifting the index value contained in `index_reg` by the
841         // scale factor (2) cannot overflow in practice, as the
842         // runtime is unable to allocate object arrays with a size
843         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
844         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
845         static_assert(
846             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
847             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
848         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
849       } else {
850         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
851         // intrinsics, `index_` is not shifted by a scale factor of 2
852         // (as in the case of ArrayGet), as it is actually an offset
853         // to an object field within an object.
854         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
855         DCHECK(instruction_->GetLocations()->Intrinsified());
856         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
857                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
858                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) ||
859                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) ||
860                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire))
861             << instruction_->AsInvoke()->GetIntrinsic();
862         DCHECK_EQ(offset_, 0U);
863         DCHECK(index_.IsRegister());
864       }
865     }
866 
867     // We're moving two or three locations to locations that could
868     // overlap, so we need a parallel move resolver.
869     InvokeRuntimeCallingConvention calling_convention;
870     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
871     parallel_move.AddMove(ref_,
872                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
873                           DataType::Type::kReference,
874                           nullptr);
875     parallel_move.AddMove(obj_,
876                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
877                           DataType::Type::kReference,
878                           nullptr);
879     if (index.IsValid()) {
880       parallel_move.AddMove(index,
881                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
882                             DataType::Type::kInt32,
883                             nullptr);
884       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
885     } else {
886       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
887       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
888     }
889     x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
890                                   instruction_,
891                                   instruction_->GetDexPc(),
892                                   this);
893     CheckEntrypointTypes<
894         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
895     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
896 
897     RestoreLiveRegisters(codegen, locations);
898     __ jmp(GetExitLabel());
899   }
900 
GetDescription() const901   const char* GetDescription() const override {
902     return "ReadBarrierForHeapReferenceSlowPathX86_64";
903   }
904 
905  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)906   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
907     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
908     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
909     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
910       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
911         return static_cast<CpuRegister>(i);
912       }
913     }
914     // We shall never fail to find a free caller-save register, as
915     // there are more than two core caller-save registers on x86-64
916     // (meaning it is possible to find one which is different from
917     // `ref` and `obj`).
918     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
919     LOG(FATAL) << "Could not find a free caller-save register";
920     UNREACHABLE();
921   }
922 
923   const Location out_;
924   const Location ref_;
925   const Location obj_;
926   const uint32_t offset_;
927   // An additional location containing an index to an array.
928   // Only used for HArrayGet and the UnsafeGetObject &
929   // UnsafeGetObjectVolatile intrinsics.
930   const Location index_;
931 
932   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
933 };
934 
935 // Slow path generating a read barrier for a GC root.
936 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
937  public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)938   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
939       : SlowPathCode(instruction), out_(out), root_(root) {
940     DCHECK(kEmitCompilerReadBarrier);
941   }
942 
EmitNativeCode(CodeGenerator * codegen)943   void EmitNativeCode(CodeGenerator* codegen) override {
944     LocationSummary* locations = instruction_->GetLocations();
945     DCHECK(locations->CanCall());
946     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
947     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
948         << "Unexpected instruction in read barrier for GC root slow path: "
949         << instruction_->DebugName();
950 
951     __ Bind(GetEntryLabel());
952     SaveLiveRegisters(codegen, locations);
953 
954     InvokeRuntimeCallingConvention calling_convention;
955     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
956     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
957     x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
958                                   instruction_,
959                                   instruction_->GetDexPc(),
960                                   this);
961     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
962     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
963 
964     RestoreLiveRegisters(codegen, locations);
965     __ jmp(GetExitLabel());
966   }
967 
GetDescription() const968   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
969 
970  private:
971   const Location out_;
972   const Location root_;
973 
974   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
975 };
976 
977 class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
978  public:
MethodEntryExitHooksSlowPathX86_64(HInstruction * instruction)979   explicit MethodEntryExitHooksSlowPathX86_64(HInstruction* instruction)
980       : SlowPathCode(instruction) {}
981 
EmitNativeCode(CodeGenerator * codegen)982   void EmitNativeCode(CodeGenerator* codegen) override {
983     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
984     LocationSummary* locations = instruction_->GetLocations();
985     QuickEntrypointEnum entry_point =
986         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
987     __ Bind(GetEntryLabel());
988     SaveLiveRegisters(codegen, locations);
989     x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
990     RestoreLiveRegisters(codegen, locations);
991     __ jmp(GetExitLabel());
992   }
993 
GetDescription() const994   const char* GetDescription() const override {
995     return "MethodEntryExitHooksSlowPath";
996   }
997 
998  private:
999   DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64);
1000 };
1001 
1002 class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
1003  public:
CompileOptimizedSlowPathX86_64()1004   CompileOptimizedSlowPathX86_64() : SlowPathCode(/* instruction= */ nullptr) {}
1005 
EmitNativeCode(CodeGenerator * codegen)1006   void EmitNativeCode(CodeGenerator* codegen) override {
1007     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1008     __ Bind(GetEntryLabel());
1009     x86_64_codegen->GenerateInvokeRuntime(
1010         GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1011     __ jmp(GetExitLabel());
1012   }
1013 
GetDescription() const1014   const char* GetDescription() const override {
1015     return "CompileOptimizedSlowPath";
1016   }
1017 
1018  private:
1019   DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86_64);
1020 };
1021 
1022 #undef __
1023 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1024 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
1025 
X86_64IntegerCondition(IfCondition cond)1026 inline Condition X86_64IntegerCondition(IfCondition cond) {
1027   switch (cond) {
1028     case kCondEQ: return kEqual;
1029     case kCondNE: return kNotEqual;
1030     case kCondLT: return kLess;
1031     case kCondLE: return kLessEqual;
1032     case kCondGT: return kGreater;
1033     case kCondGE: return kGreaterEqual;
1034     case kCondB:  return kBelow;
1035     case kCondBE: return kBelowEqual;
1036     case kCondA:  return kAbove;
1037     case kCondAE: return kAboveEqual;
1038   }
1039   LOG(FATAL) << "Unreachable";
1040   UNREACHABLE();
1041 }
1042 
1043 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)1044 inline Condition X86_64FPCondition(IfCondition cond) {
1045   switch (cond) {
1046     case kCondEQ: return kEqual;
1047     case kCondNE: return kNotEqual;
1048     case kCondLT: return kBelow;
1049     case kCondLE: return kBelowEqual;
1050     case kCondGT: return kAbove;
1051     case kCondGE: return kAboveEqual;
1052     default:      break;  // should not happen
1053   }
1054   LOG(FATAL) << "Unreachable";
1055   UNREACHABLE();
1056 }
1057 
BlockNonVolatileXmmRegisters(LocationSummary * locations)1058 void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
1059   // We have to ensure that the native code we call directly (such as @CriticalNative
1060   // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
1061   // which are non-volatile for ART, but volatile for Native calls.  This will ensure
1062   // that they are saved in the prologue and properly restored.
1063   for (FloatRegister fp_reg : non_volatile_xmm_regs) {
1064     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
1065   }
1066 }
1067 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)1068 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
1069       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
1070       ArtMethod* method ATTRIBUTE_UNUSED) {
1071   return desired_dispatch_info;
1072 }
1073 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)1074 void CodeGeneratorX86_64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
1075   switch (load_kind) {
1076     case MethodLoadKind::kBootImageLinkTimePcRelative:
1077       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1078       __ leal(temp.AsRegister<CpuRegister>(),
1079               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1080       RecordBootImageMethodPatch(invoke);
1081       break;
1082     case MethodLoadKind::kBootImageRelRo: {
1083       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1084       __ movl(temp.AsRegister<CpuRegister>(),
1085               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1086       RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1087       break;
1088     }
1089     case MethodLoadKind::kBssEntry: {
1090       __ movq(temp.AsRegister<CpuRegister>(),
1091               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1092       RecordMethodBssEntryPatch(invoke);
1093       // No need for memory fence, thanks to the x86-64 memory model.
1094       break;
1095     }
1096     case MethodLoadKind::kJitDirectAddress: {
1097       Load64BitValue(temp.AsRegister<CpuRegister>(),
1098                      reinterpret_cast<int64_t>(invoke->GetResolvedMethod()));
1099       break;
1100     }
1101     case MethodLoadKind::kRuntimeCall: {
1102       // Test situation, don't do anything.
1103       break;
1104     }
1105     default: {
1106       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
1107       UNREACHABLE();
1108     }
1109   }
1110 }
1111 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)1112 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
1113     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
1114   // All registers are assumed to be correctly set up.
1115 
1116   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
1117   switch (invoke->GetMethodLoadKind()) {
1118     case MethodLoadKind::kStringInit: {
1119       // temp = thread->string_init_entrypoint
1120       uint32_t offset =
1121           GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
1122       __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
1123       break;
1124     }
1125     case MethodLoadKind::kRecursive: {
1126       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
1127       break;
1128     }
1129     case MethodLoadKind::kRuntimeCall: {
1130       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1131       return;  // No code pointer retrieval; the runtime performs the call directly.
1132     }
1133     case MethodLoadKind::kBootImageLinkTimePcRelative:
1134       // For kCallCriticalNative we skip loading the method and do the call directly.
1135       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
1136         break;
1137       }
1138       FALLTHROUGH_INTENDED;
1139     default: {
1140       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
1141       break;
1142     }
1143   }
1144 
1145   switch (invoke->GetCodePtrLocation()) {
1146     case CodePtrLocation::kCallSelf:
1147       DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
1148       __ call(&frame_entry_label_);
1149       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1150       break;
1151     case CodePtrLocation::kCallCriticalNative: {
1152       size_t out_frame_size =
1153           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
1154                                     kNativeStackAlignment,
1155                                     GetCriticalNativeDirectCallFrameSize>(invoke);
1156       if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
1157         DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1158         __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1159         RecordBootImageJniEntrypointPatch(invoke);
1160       } else {
1161         // (callee_method + offset_of_jni_entry_point)()
1162         __ call(Address(callee_method.AsRegister<CpuRegister>(),
1163                          ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
1164       }
1165       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1166       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
1167       switch (invoke->GetType()) {
1168         case DataType::Type::kBool:
1169           __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
1170           break;
1171         case DataType::Type::kInt8:
1172           __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
1173           break;
1174         case DataType::Type::kUint16:
1175           __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
1176           break;
1177         case DataType::Type::kInt16:
1178           __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
1179           break;
1180         case DataType::Type::kInt32:
1181         case DataType::Type::kInt64:
1182         case DataType::Type::kFloat32:
1183         case DataType::Type::kFloat64:
1184         case DataType::Type::kVoid:
1185           break;
1186         default:
1187           DCHECK(false) << invoke->GetType();
1188           break;
1189       }
1190       if (out_frame_size != 0u) {
1191         DecreaseFrame(out_frame_size);
1192       }
1193       break;
1194     }
1195     case CodePtrLocation::kCallArtMethod:
1196       // (callee_method + offset_of_quick_compiled_code)()
1197       __ call(Address(callee_method.AsRegister<CpuRegister>(),
1198                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1199                           kX86_64PointerSize).SizeValue()));
1200       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1201       break;
1202   }
1203 
1204   DCHECK(!IsLeafMethod());
1205 }
1206 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1207 void CodeGeneratorX86_64::GenerateVirtualCall(
1208     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1209   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1210   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1211       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1212 
1213   // Use the calling convention instead of the location of the receiver, as
1214   // intrinsics may have put the receiver in a different register. In the intrinsics
1215   // slow path, the arguments have been moved to the right place, so here we are
1216   // guaranteed that the receiver is the first register of the calling convention.
1217   InvokeDexCallingConvention calling_convention;
1218   Register receiver = calling_convention.GetRegisterAt(0);
1219 
1220   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1221   // /* HeapReference<Class> */ temp = receiver->klass_
1222   __ movl(temp, Address(CpuRegister(receiver), class_offset));
1223   MaybeRecordImplicitNullCheck(invoke);
1224   // Instead of simply (possibly) unpoisoning `temp` here, we should
1225   // emit a read barrier for the previous class reference load.
1226   // However this is not required in practice, as this is an
1227   // intermediate/temporary reference and because the current
1228   // concurrent copying collector keeps the from-space memory
1229   // intact/accessible until the end of the marking phase (the
1230   // concurrent copying collector may not in the future).
1231   __ MaybeUnpoisonHeapReference(temp);
1232 
1233   MaybeGenerateInlineCacheCheck(invoke, temp);
1234 
1235   // temp = temp->GetMethodAt(method_offset);
1236   __ movq(temp, Address(temp, method_offset));
1237   // call temp->GetEntryPoint();
1238   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1239       kX86_64PointerSize).SizeValue()));
1240   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1241 }
1242 
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1243 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1244   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1245   __ Bind(&boot_image_other_patches_.back().label);
1246 }
1247 
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1248 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1249   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1250   __ Bind(&boot_image_other_patches_.back().label);
1251 }
1252 
RecordBootImageMethodPatch(HInvoke * invoke)1253 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvoke* invoke) {
1254   boot_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1255                                           invoke->GetResolvedMethodReference().index);
1256   __ Bind(&boot_image_method_patches_.back().label);
1257 }
1258 
RecordMethodBssEntryPatch(HInvoke * invoke)1259 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvoke* invoke) {
1260   DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
1261          GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
1262          ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
1263                          invoke->GetMethodReference().dex_file));
1264   method_bss_entry_patches_.emplace_back(invoke->GetMethodReference().dex_file,
1265                                          invoke->GetMethodReference().index);
1266   __ Bind(&method_bss_entry_patches_.back().label);
1267 }
1268 
RecordBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1269 void CodeGeneratorX86_64::RecordBootImageTypePatch(const DexFile& dex_file,
1270                                                    dex::TypeIndex type_index) {
1271   boot_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1272   __ Bind(&boot_image_type_patches_.back().label);
1273 }
1274 
NewTypeBssEntryPatch(HLoadClass * load_class)1275 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1276   ArenaDeque<PatchInfo<Label>>* patches = nullptr;
1277   switch (load_class->GetLoadKind()) {
1278     case HLoadClass::LoadKind::kBssEntry:
1279       patches = &type_bss_entry_patches_;
1280       break;
1281     case HLoadClass::LoadKind::kBssEntryPublic:
1282       patches = &public_type_bss_entry_patches_;
1283       break;
1284     case HLoadClass::LoadKind::kBssEntryPackage:
1285       patches = &package_type_bss_entry_patches_;
1286       break;
1287     default:
1288       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
1289       UNREACHABLE();
1290   }
1291   patches->emplace_back(&load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1292   return &patches->back().label;
1293 }
1294 
RecordBootImageStringPatch(HLoadString * load_string)1295 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1296   boot_image_string_patches_.emplace_back(
1297       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1298   __ Bind(&boot_image_string_patches_.back().label);
1299 }
1300 
NewStringBssEntryPatch(HLoadString * load_string)1301 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1302   string_bss_entry_patches_.emplace_back(
1303       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1304   return &string_bss_entry_patches_.back().label;
1305 }
1306 
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)1307 void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
1308   boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1309                                                   invoke->GetResolvedMethodReference().index);
1310   __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
1311 }
1312 
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1313 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1314   if (GetCompilerOptions().IsBootImage()) {
1315     __ leal(reg,
1316             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1317     RecordBootImageIntrinsicPatch(boot_image_reference);
1318   } else if (GetCompilerOptions().GetCompilePic()) {
1319     __ movl(reg,
1320             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1321     RecordBootImageRelRoPatch(boot_image_reference);
1322   } else {
1323     DCHECK(GetCompilerOptions().IsJitCompiler());
1324     gc::Heap* heap = Runtime::Current()->GetHeap();
1325     DCHECK(!heap->GetBootImageSpaces().empty());
1326     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1327     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1328   }
1329 }
1330 
LoadIntrinsicDeclaringClass(CpuRegister reg,HInvoke * invoke)1331 void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) {
1332   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
1333   if (GetCompilerOptions().IsBootImage()) {
1334     // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1335     __ leal(reg,
1336             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1337     MethodReference target_method = invoke->GetResolvedMethodReference();
1338     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1339     boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1340     __ Bind(&boot_image_type_patches_.back().label);
1341   } else {
1342     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
1343     LoadBootImageAddress(reg, boot_image_offset);
1344   }
1345 }
1346 
LoadClassRootForIntrinsic(CpuRegister reg,ClassRoot class_root)1347 void CodeGeneratorX86_64::LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root) {
1348   if (GetCompilerOptions().IsBootImage()) {
1349     ScopedObjectAccess soa(Thread::Current());
1350     ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
1351     boot_image_type_patches_.emplace_back(&klass->GetDexFile(), klass->GetDexTypeIndex().index_);
1352     __ Bind(&boot_image_type_patches_.back().label);
1353   } else {
1354     uint32_t boot_image_offset = GetBootImageOffset(class_root);
1355     LoadBootImageAddress(reg, boot_image_offset);
1356   }
1357 }
1358 
1359 // The label points to the end of the "movl" or another instruction but the literal offset
1360 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1361 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1362 
1363 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1364 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1365     const ArenaDeque<PatchInfo<Label>>& infos,
1366     ArenaVector<linker::LinkerPatch>* linker_patches) {
1367   for (const PatchInfo<Label>& info : infos) {
1368     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1369     linker_patches->push_back(
1370         Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1371   }
1372 }
1373 
1374 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1375 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1376                                      const DexFile* target_dex_file,
1377                                      uint32_t pc_insn_offset,
1378                                      uint32_t boot_image_offset) {
1379   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
1380   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1381 }
1382 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1383 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1384   DCHECK(linker_patches->empty());
1385   size_t size =
1386       boot_image_method_patches_.size() +
1387       method_bss_entry_patches_.size() +
1388       boot_image_type_patches_.size() +
1389       type_bss_entry_patches_.size() +
1390       public_type_bss_entry_patches_.size() +
1391       package_type_bss_entry_patches_.size() +
1392       boot_image_string_patches_.size() +
1393       string_bss_entry_patches_.size() +
1394       boot_image_jni_entrypoint_patches_.size() +
1395       boot_image_other_patches_.size();
1396   linker_patches->reserve(size);
1397   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1398     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1399         boot_image_method_patches_, linker_patches);
1400     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1401         boot_image_type_patches_, linker_patches);
1402     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1403         boot_image_string_patches_, linker_patches);
1404   } else {
1405     DCHECK(boot_image_method_patches_.empty());
1406     DCHECK(boot_image_type_patches_.empty());
1407     DCHECK(boot_image_string_patches_.empty());
1408   }
1409   if (GetCompilerOptions().IsBootImage()) {
1410     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1411         boot_image_other_patches_, linker_patches);
1412   } else {
1413     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
1414         boot_image_other_patches_, linker_patches);
1415   }
1416   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1417       method_bss_entry_patches_, linker_patches);
1418   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1419       type_bss_entry_patches_, linker_patches);
1420   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
1421       public_type_bss_entry_patches_, linker_patches);
1422   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
1423       package_type_bss_entry_patches_, linker_patches);
1424   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1425       string_bss_entry_patches_, linker_patches);
1426   EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
1427       boot_image_jni_entrypoint_patches_, linker_patches);
1428   DCHECK_EQ(size, linker_patches->size());
1429 }
1430 
DumpCoreRegister(std::ostream & stream,int reg) const1431 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1432   stream << Register(reg);
1433 }
1434 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1435 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1436   stream << FloatRegister(reg);
1437 }
1438 
GetInstructionSetFeatures() const1439 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1440   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1441 }
1442 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1443 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1444   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1445   return kX86_64WordSize;
1446 }
1447 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1448 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1449   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1450   return kX86_64WordSize;
1451 }
1452 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1453 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1454   if (GetGraph()->HasSIMD()) {
1455     __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1456   } else {
1457     __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1458   }
1459   return GetSlowPathFPWidth();
1460 }
1461 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1462 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1463   if (GetGraph()->HasSIMD()) {
1464     __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1465   } else {
1466     __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1467   }
1468   return GetSlowPathFPWidth();
1469 }
1470 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1471 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1472                                         HInstruction* instruction,
1473                                         uint32_t dex_pc,
1474                                         SlowPathCode* slow_path) {
1475   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1476   GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1477   if (EntrypointRequiresStackMap(entrypoint)) {
1478     RecordPcInfo(instruction, dex_pc, slow_path);
1479   }
1480 }
1481 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1482 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1483                                                               HInstruction* instruction,
1484                                                               SlowPathCode* slow_path) {
1485   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1486   GenerateInvokeRuntime(entry_point_offset);
1487 }
1488 
GenerateInvokeRuntime(int32_t entry_point_offset)1489 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1490   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1491 }
1492 
1493 static constexpr int kNumberOfCpuRegisterPairs = 0;
1494 // Use a fake return address register to mimic Quick.
1495 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1496 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1497                                          const CompilerOptions& compiler_options,
1498                                          OptimizingCompilerStats* stats)
1499     : CodeGenerator(graph,
1500                     kNumberOfCpuRegisters,
1501                     kNumberOfFloatRegisters,
1502                     kNumberOfCpuRegisterPairs,
1503                     ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1504                                         arraysize(kCoreCalleeSaves))
1505                         | (1 << kFakeReturnRegister),
1506                     ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1507                                         arraysize(kFpuCalleeSaves)),
1508                     compiler_options,
1509                     stats),
1510       block_labels_(nullptr),
1511       location_builder_(graph, this),
1512       instruction_visitor_(graph, this),
1513       move_resolver_(graph->GetAllocator(), this),
1514       assembler_(graph->GetAllocator(),
1515                  compiler_options.GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures()),
1516       constant_area_start_(0),
1517       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1518       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1519       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1520       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1521       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1522       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1523       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1524       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1525       boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1526       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1527       jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1528       jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1529       fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1530   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1531 }
1532 
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1533 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1534                                                                CodeGeneratorX86_64* codegen)
1535       : InstructionCodeGenerator(graph, codegen),
1536         assembler_(codegen->GetAssembler()),
1537         codegen_(codegen) {}
1538 
SetupBlockedRegisters() const1539 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1540   // Stack register is always reserved.
1541   blocked_core_registers_[RSP] = true;
1542 
1543   // Block the register used as TMP.
1544   blocked_core_registers_[TMP] = true;
1545 }
1546 
DWARFReg(Register reg)1547 static dwarf::Reg DWARFReg(Register reg) {
1548   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1549 }
1550 
DWARFReg(FloatRegister reg)1551 static dwarf::Reg DWARFReg(FloatRegister reg) {
1552   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1553 }
1554 
VisitMethodEntryHook(HMethodEntryHook * method_hook)1555 void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1556   new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1557 }
1558 
GenerateMethodEntryExitHook(HInstruction * instruction)1559 void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1560   SlowPathCode* slow_path =
1561       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
1562   codegen_->AddSlowPath(slow_path);
1563 
1564   uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1565   int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
1566   __ movq(CpuRegister(TMP), Immediate(address + offset));
1567   __ cmpb(Address(CpuRegister(TMP), 0), Immediate(0));
1568   __ j(kNotEqual, slow_path->GetEntryLabel());
1569   __ Bind(slow_path->GetExitLabel());
1570 }
1571 
VisitMethodEntryHook(HMethodEntryHook * instruction)1572 void InstructionCodeGeneratorX86_64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1573   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1574   DCHECK(codegen_->RequiresCurrentMethod());
1575   GenerateMethodEntryExitHook(instruction);
1576 }
1577 
SetInForReturnValue(HInstruction * instr,LocationSummary * locations)1578 void SetInForReturnValue(HInstruction* instr, LocationSummary* locations) {
1579   switch (instr->InputAt(0)->GetType()) {
1580     case DataType::Type::kReference:
1581     case DataType::Type::kBool:
1582     case DataType::Type::kUint8:
1583     case DataType::Type::kInt8:
1584     case DataType::Type::kUint16:
1585     case DataType::Type::kInt16:
1586     case DataType::Type::kInt32:
1587     case DataType::Type::kInt64:
1588       locations->SetInAt(0, Location::RegisterLocation(RAX));
1589       break;
1590 
1591     case DataType::Type::kFloat32:
1592     case DataType::Type::kFloat64:
1593       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1594       break;
1595 
1596     case DataType::Type::kVoid:
1597       locations->SetInAt(0, Location::NoLocation());
1598       break;
1599 
1600     default:
1601       LOG(FATAL) << "Unexpected return type " << instr->InputAt(0)->GetType();
1602   }
1603 }
1604 
VisitMethodExitHook(HMethodExitHook * method_hook)1605 void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1606   LocationSummary* locations = new (GetGraph()->GetAllocator())
1607       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1608   SetInForReturnValue(method_hook, locations);
1609 }
1610 
VisitMethodExitHook(HMethodExitHook * instruction)1611 void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
1612   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1613   DCHECK(codegen_->RequiresCurrentMethod());
1614   GenerateMethodEntryExitHook(instruction);
1615 }
1616 
MaybeIncrementHotness(bool is_frame_entry)1617 void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
1618   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1619     NearLabel overflow;
1620     Register method = kMethodRegisterArgument;
1621     if (!is_frame_entry) {
1622       CHECK(RequiresCurrentMethod());
1623       method = TMP;
1624       __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1625     }
1626     __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1627             Immediate(interpreter::kNterpHotnessValue));
1628     __ j(kEqual, &overflow);
1629     __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1630             Immediate(-1));
1631     __ Bind(&overflow);
1632   }
1633 
1634   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1635     SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64();
1636     AddSlowPath(slow_path);
1637     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1638     DCHECK(info != nullptr);
1639     CHECK(!HasEmptyFrame());
1640     uint64_t address = reinterpret_cast64<uint64_t>(info);
1641     // Note: if the address was in the 32bit range, we could use
1642     // Address::Absolute and avoid this movq.
1643     __ movq(CpuRegister(TMP), Immediate(address));
1644     // With multiple threads, this can overflow. This is OK, we will eventually get to see
1645     // it reaching 0. Also, at this point we have no register available to look
1646     // at the counter directly.
1647     __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1648             Immediate(-1));
1649     __ j(kEqual, slow_path->GetEntryLabel());
1650     __ Bind(slow_path->GetExitLabel());
1651   }
1652 }
1653 
GenerateFrameEntry()1654 void CodeGeneratorX86_64::GenerateFrameEntry() {
1655   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1656   __ Bind(&frame_entry_label_);
1657   bool skip_overflow_check = IsLeafMethod()
1658       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1659   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1660 
1661 
1662   if (!skip_overflow_check) {
1663     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1664     __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1665     RecordPcInfo(nullptr, 0);
1666   }
1667 
1668   if (!HasEmptyFrame()) {
1669     for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1670       Register reg = kCoreCalleeSaves[i];
1671       if (allocated_registers_.ContainsCoreRegister(reg)) {
1672         __ pushq(CpuRegister(reg));
1673         __ cfi().AdjustCFAOffset(kX86_64WordSize);
1674         __ cfi().RelOffset(DWARFReg(reg), 0);
1675       }
1676     }
1677 
1678     int adjust = GetFrameSize() - GetCoreSpillSize();
1679     IncreaseFrame(adjust);
1680     uint32_t xmm_spill_location = GetFpuSpillStart();
1681     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1682 
1683     for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1684       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1685         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1686         __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1687         __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1688       }
1689     }
1690 
1691     // Save the current method if we need it. Note that we do not
1692     // do this in HCurrentMethod, as the instruction might have been removed
1693     // in the SSA graph.
1694     if (RequiresCurrentMethod()) {
1695       CHECK(!HasEmptyFrame());
1696       __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1697               CpuRegister(kMethodRegisterArgument));
1698     }
1699 
1700     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1701       CHECK(!HasEmptyFrame());
1702       // Initialize should_deoptimize flag to 0.
1703       __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1704     }
1705   }
1706 
1707   MaybeIncrementHotness(/* is_frame_entry= */ true);
1708 }
1709 
GenerateFrameExit()1710 void CodeGeneratorX86_64::GenerateFrameExit() {
1711   __ cfi().RememberState();
1712   if (!HasEmptyFrame()) {
1713     uint32_t xmm_spill_location = GetFpuSpillStart();
1714     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1715     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1716       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1717         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1718         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1719         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1720       }
1721     }
1722 
1723     int adjust = GetFrameSize() - GetCoreSpillSize();
1724     DecreaseFrame(adjust);
1725 
1726     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1727       Register reg = kCoreCalleeSaves[i];
1728       if (allocated_registers_.ContainsCoreRegister(reg)) {
1729         __ popq(CpuRegister(reg));
1730         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1731         __ cfi().Restore(DWARFReg(reg));
1732       }
1733     }
1734   }
1735   __ ret();
1736   __ cfi().RestoreState();
1737   __ cfi().DefCFAOffset(GetFrameSize());
1738 }
1739 
Bind(HBasicBlock * block)1740 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1741   __ Bind(GetLabelOf(block));
1742 }
1743 
Move(Location destination,Location source)1744 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1745   if (source.Equals(destination)) {
1746     return;
1747   }
1748   if (destination.IsRegister()) {
1749     CpuRegister dest = destination.AsRegister<CpuRegister>();
1750     if (source.IsRegister()) {
1751       __ movq(dest, source.AsRegister<CpuRegister>());
1752     } else if (source.IsFpuRegister()) {
1753       __ movd(dest, source.AsFpuRegister<XmmRegister>());
1754     } else if (source.IsStackSlot()) {
1755       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1756     } else if (source.IsConstant()) {
1757       HConstant* constant = source.GetConstant();
1758       if (constant->IsLongConstant()) {
1759         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1760       } else if (constant->IsDoubleConstant()) {
1761         Load64BitValue(dest, GetInt64ValueOf(constant));
1762       } else {
1763         Load32BitValue(dest, GetInt32ValueOf(constant));
1764       }
1765     } else {
1766       DCHECK(source.IsDoubleStackSlot());
1767       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1768     }
1769   } else if (destination.IsFpuRegister()) {
1770     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1771     if (source.IsRegister()) {
1772       __ movd(dest, source.AsRegister<CpuRegister>());
1773     } else if (source.IsFpuRegister()) {
1774       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1775     } else if (source.IsConstant()) {
1776       HConstant* constant = source.GetConstant();
1777       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1778       if (constant->IsFloatConstant()) {
1779         Load32BitValue(dest, static_cast<int32_t>(value));
1780       } else {
1781         Load64BitValue(dest, value);
1782       }
1783     } else if (source.IsStackSlot()) {
1784       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1785     } else {
1786       DCHECK(source.IsDoubleStackSlot());
1787       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1788     }
1789   } else if (destination.IsStackSlot()) {
1790     if (source.IsRegister()) {
1791       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1792               source.AsRegister<CpuRegister>());
1793     } else if (source.IsFpuRegister()) {
1794       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1795                source.AsFpuRegister<XmmRegister>());
1796     } else if (source.IsConstant()) {
1797       HConstant* constant = source.GetConstant();
1798       int32_t value = GetInt32ValueOf(constant);
1799       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1800     } else {
1801       DCHECK(source.IsStackSlot()) << source;
1802       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1803       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1804     }
1805   } else {
1806     DCHECK(destination.IsDoubleStackSlot());
1807     if (source.IsRegister()) {
1808       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1809               source.AsRegister<CpuRegister>());
1810     } else if (source.IsFpuRegister()) {
1811       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1812                source.AsFpuRegister<XmmRegister>());
1813     } else if (source.IsConstant()) {
1814       HConstant* constant = source.GetConstant();
1815       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1816       int64_t value = GetInt64ValueOf(constant);
1817       Store64BitValueToStack(destination, value);
1818     } else {
1819       DCHECK(source.IsDoubleStackSlot());
1820       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1821       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1822     }
1823   }
1824 }
1825 
LoadFromMemoryNoReference(DataType::Type type,Location dst,Address src)1826 void CodeGeneratorX86_64::LoadFromMemoryNoReference(DataType::Type type,
1827                                                     Location dst,
1828                                                     Address src) {
1829   switch (type) {
1830     case DataType::Type::kBool:
1831     case DataType::Type::kUint8:
1832       __ movzxb(dst.AsRegister<CpuRegister>(), src);
1833       break;
1834     case DataType::Type::kInt8:
1835       __ movsxb(dst.AsRegister<CpuRegister>(), src);
1836       break;
1837     case DataType::Type::kUint16:
1838       __ movzxw(dst.AsRegister<CpuRegister>(), src);
1839       break;
1840     case DataType::Type::kInt16:
1841       __ movsxw(dst.AsRegister<CpuRegister>(), src);
1842       break;
1843     case DataType::Type::kInt32:
1844     case DataType::Type::kUint32:
1845       __ movl(dst.AsRegister<CpuRegister>(), src);
1846       break;
1847     case DataType::Type::kInt64:
1848     case DataType::Type::kUint64:
1849       __ movq(dst.AsRegister<CpuRegister>(), src);
1850       break;
1851     case DataType::Type::kFloat32:
1852       __ movss(dst.AsFpuRegister<XmmRegister>(), src);
1853       break;
1854     case DataType::Type::kFloat64:
1855       __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
1856       break;
1857     case DataType::Type::kVoid:
1858     case DataType::Type::kReference:
1859       LOG(FATAL) << "Unreachable type " << type;
1860       UNREACHABLE();
1861   }
1862 }
1863 
MoveConstant(Location location,int32_t value)1864 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1865   DCHECK(location.IsRegister());
1866   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1867 }
1868 
MoveLocation(Location dst,Location src,DataType::Type dst_type ATTRIBUTE_UNUSED)1869 void CodeGeneratorX86_64::MoveLocation(
1870     Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
1871   Move(dst, src);
1872 }
1873 
AddLocationAsTemp(Location location,LocationSummary * locations)1874 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1875   if (location.IsRegister()) {
1876     locations->AddTemp(location);
1877   } else {
1878     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1879   }
1880 }
1881 
HandleGoto(HInstruction * got,HBasicBlock * successor)1882 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1883   if (successor->IsExitBlock()) {
1884     DCHECK(got->GetPrevious()->AlwaysThrows());
1885     return;  // no code needed
1886   }
1887 
1888   HBasicBlock* block = got->GetBlock();
1889   HInstruction* previous = got->GetPrevious();
1890 
1891   HLoopInformation* info = block->GetLoopInformation();
1892   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1893     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1894     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1895     return;
1896   }
1897 
1898   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1899     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1900   }
1901   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1902     __ jmp(codegen_->GetLabelOf(successor));
1903   }
1904 }
1905 
VisitGoto(HGoto * got)1906 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1907   got->SetLocations(nullptr);
1908 }
1909 
VisitGoto(HGoto * got)1910 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1911   HandleGoto(got, got->GetSuccessor());
1912 }
1913 
VisitTryBoundary(HTryBoundary * try_boundary)1914 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1915   try_boundary->SetLocations(nullptr);
1916 }
1917 
VisitTryBoundary(HTryBoundary * try_boundary)1918 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1919   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1920   if (!successor->IsExitBlock()) {
1921     HandleGoto(try_boundary, successor);
1922   }
1923 }
1924 
VisitExit(HExit * exit)1925 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1926   exit->SetLocations(nullptr);
1927 }
1928 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1929 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1930 }
1931 
1932 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1933 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1934                                                      LabelType* true_label,
1935                                                      LabelType* false_label) {
1936   if (cond->IsFPConditionTrueIfNaN()) {
1937     __ j(kUnordered, true_label);
1938   } else if (cond->IsFPConditionFalseIfNaN()) {
1939     __ j(kUnordered, false_label);
1940   }
1941   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1942 }
1943 
GenerateCompareTest(HCondition * condition)1944 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1945   LocationSummary* locations = condition->GetLocations();
1946 
1947   Location left = locations->InAt(0);
1948   Location right = locations->InAt(1);
1949   DataType::Type type = condition->InputAt(0)->GetType();
1950   switch (type) {
1951     case DataType::Type::kBool:
1952     case DataType::Type::kUint8:
1953     case DataType::Type::kInt8:
1954     case DataType::Type::kUint16:
1955     case DataType::Type::kInt16:
1956     case DataType::Type::kInt32:
1957     case DataType::Type::kReference: {
1958       codegen_->GenerateIntCompare(left, right);
1959       break;
1960     }
1961     case DataType::Type::kInt64: {
1962       codegen_->GenerateLongCompare(left, right);
1963       break;
1964     }
1965     case DataType::Type::kFloat32: {
1966       if (right.IsFpuRegister()) {
1967         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1968       } else if (right.IsConstant()) {
1969         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1970                    codegen_->LiteralFloatAddress(
1971                      right.GetConstant()->AsFloatConstant()->GetValue()));
1972       } else {
1973         DCHECK(right.IsStackSlot());
1974         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1975                    Address(CpuRegister(RSP), right.GetStackIndex()));
1976       }
1977       break;
1978     }
1979     case DataType::Type::kFloat64: {
1980       if (right.IsFpuRegister()) {
1981         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1982       } else if (right.IsConstant()) {
1983         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1984                    codegen_->LiteralDoubleAddress(
1985                      right.GetConstant()->AsDoubleConstant()->GetValue()));
1986       } else {
1987         DCHECK(right.IsDoubleStackSlot());
1988         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1989                    Address(CpuRegister(RSP), right.GetStackIndex()));
1990       }
1991       break;
1992     }
1993     default:
1994       LOG(FATAL) << "Unexpected condition type " << type;
1995   }
1996 }
1997 
1998 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1999 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
2000                                                                   LabelType* true_target_in,
2001                                                                   LabelType* false_target_in) {
2002   // Generated branching requires both targets to be explicit. If either of the
2003   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2004   LabelType fallthrough_target;
2005   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2006   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2007 
2008   // Generate the comparison to set the CC.
2009   GenerateCompareTest(condition);
2010 
2011   // Now generate the correct jump(s).
2012   DataType::Type type = condition->InputAt(0)->GetType();
2013   switch (type) {
2014     case DataType::Type::kInt64: {
2015       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2016       break;
2017     }
2018     case DataType::Type::kFloat32: {
2019       GenerateFPJumps(condition, true_target, false_target);
2020       break;
2021     }
2022     case DataType::Type::kFloat64: {
2023       GenerateFPJumps(condition, true_target, false_target);
2024       break;
2025     }
2026     default:
2027       LOG(FATAL) << "Unexpected condition type " << type;
2028   }
2029 
2030   if (false_target != &fallthrough_target) {
2031     __ jmp(false_target);
2032   }
2033 
2034   if (fallthrough_target.IsLinked()) {
2035     __ Bind(&fallthrough_target);
2036   }
2037 }
2038 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)2039 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
2040   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2041   // are set only strictly before `branch`. We can't use the eflags on long
2042   // conditions if they are materialized due to the complex branching.
2043   return cond->IsCondition() &&
2044          cond->GetNext() == branch &&
2045          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
2046 }
2047 
2048 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2049 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
2050                                                            size_t condition_input_index,
2051                                                            LabelType* true_target,
2052                                                            LabelType* false_target) {
2053   HInstruction* cond = instruction->InputAt(condition_input_index);
2054 
2055   if (true_target == nullptr && false_target == nullptr) {
2056     // Nothing to do. The code always falls through.
2057     return;
2058   } else if (cond->IsIntConstant()) {
2059     // Constant condition, statically compared against "true" (integer value 1).
2060     if (cond->AsIntConstant()->IsTrue()) {
2061       if (true_target != nullptr) {
2062         __ jmp(true_target);
2063       }
2064     } else {
2065       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2066       if (false_target != nullptr) {
2067         __ jmp(false_target);
2068       }
2069     }
2070     return;
2071   }
2072 
2073   // The following code generates these patterns:
2074   //  (1) true_target == nullptr && false_target != nullptr
2075   //        - opposite condition true => branch to false_target
2076   //  (2) true_target != nullptr && false_target == nullptr
2077   //        - condition true => branch to true_target
2078   //  (3) true_target != nullptr && false_target != nullptr
2079   //        - condition true => branch to true_target
2080   //        - branch to false_target
2081   if (IsBooleanValueOrMaterializedCondition(cond)) {
2082     if (AreEflagsSetFrom(cond, instruction)) {
2083       if (true_target == nullptr) {
2084         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
2085       } else {
2086         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
2087       }
2088     } else {
2089       // Materialized condition, compare against 0.
2090       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2091       if (lhs.IsRegister()) {
2092         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
2093       } else {
2094         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
2095       }
2096       if (true_target == nullptr) {
2097         __ j(kEqual, false_target);
2098       } else {
2099         __ j(kNotEqual, true_target);
2100       }
2101     }
2102   } else {
2103     // Condition has not been materialized, use its inputs as the
2104     // comparison and its condition as the branch condition.
2105     HCondition* condition = cond->AsCondition();
2106 
2107     // If this is a long or FP comparison that has been folded into
2108     // the HCondition, generate the comparison directly.
2109     DataType::Type type = condition->InputAt(0)->GetType();
2110     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2111       GenerateCompareTestAndBranch(condition, true_target, false_target);
2112       return;
2113     }
2114 
2115     Location lhs = condition->GetLocations()->InAt(0);
2116     Location rhs = condition->GetLocations()->InAt(1);
2117     codegen_->GenerateIntCompare(lhs, rhs);
2118       if (true_target == nullptr) {
2119       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
2120     } else {
2121       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2122     }
2123   }
2124 
2125   // If neither branch falls through (case 3), the conditional branch to `true_target`
2126   // was already emitted (case 2) and we need to emit a jump to `false_target`.
2127   if (true_target != nullptr && false_target != nullptr) {
2128     __ jmp(false_target);
2129   }
2130 }
2131 
VisitIf(HIf * if_instr)2132 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
2133   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2134   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2135     locations->SetInAt(0, Location::Any());
2136   }
2137 }
2138 
VisitIf(HIf * if_instr)2139 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
2140   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2141   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2142   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2143       nullptr : codegen_->GetLabelOf(true_successor);
2144   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2145       nullptr : codegen_->GetLabelOf(false_successor);
2146   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2147 }
2148 
VisitDeoptimize(HDeoptimize * deoptimize)2149 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2150   LocationSummary* locations = new (GetGraph()->GetAllocator())
2151       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2152   InvokeRuntimeCallingConvention calling_convention;
2153   RegisterSet caller_saves = RegisterSet::Empty();
2154   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2155   locations->SetCustomSlowPathCallerSaves(caller_saves);
2156   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2157     locations->SetInAt(0, Location::Any());
2158   }
2159 }
2160 
VisitDeoptimize(HDeoptimize * deoptimize)2161 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2162   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
2163   GenerateTestAndBranch<Label>(deoptimize,
2164                                /* condition_input_index= */ 0,
2165                                slow_path->GetEntryLabel(),
2166                                /* false_target= */ nullptr);
2167 }
2168 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2169 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2170   LocationSummary* locations = new (GetGraph()->GetAllocator())
2171       LocationSummary(flag, LocationSummary::kNoCall);
2172   locations->SetOut(Location::RequiresRegister());
2173 }
2174 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2175 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2176   __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
2177           Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2178 }
2179 
SelectCanUseCMOV(HSelect * select)2180 static bool SelectCanUseCMOV(HSelect* select) {
2181   // There are no conditional move instructions for XMMs.
2182   if (DataType::IsFloatingPointType(select->GetType())) {
2183     return false;
2184   }
2185 
2186   // A FP condition doesn't generate the single CC that we need.
2187   HInstruction* condition = select->GetCondition();
2188   if (condition->IsCondition() &&
2189       DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
2190     return false;
2191   }
2192 
2193   // We can generate a CMOV for this Select.
2194   return true;
2195 }
2196 
VisitSelect(HSelect * select)2197 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
2198   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2199   if (DataType::IsFloatingPointType(select->GetType())) {
2200     locations->SetInAt(0, Location::RequiresFpuRegister());
2201     locations->SetInAt(1, Location::Any());
2202   } else {
2203     locations->SetInAt(0, Location::RequiresRegister());
2204     if (SelectCanUseCMOV(select)) {
2205       if (select->InputAt(1)->IsConstant()) {
2206         locations->SetInAt(1, Location::RequiresRegister());
2207       } else {
2208         locations->SetInAt(1, Location::Any());
2209       }
2210     } else {
2211       locations->SetInAt(1, Location::Any());
2212     }
2213   }
2214   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2215     locations->SetInAt(2, Location::RequiresRegister());
2216   }
2217   locations->SetOut(Location::SameAsFirstInput());
2218 }
2219 
VisitSelect(HSelect * select)2220 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
2221   LocationSummary* locations = select->GetLocations();
2222   if (SelectCanUseCMOV(select)) {
2223     // If both the condition and the source types are integer, we can generate
2224     // a CMOV to implement Select.
2225     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
2226     Location value_true_loc = locations->InAt(1);
2227     DCHECK(locations->InAt(0).Equals(locations->Out()));
2228 
2229     HInstruction* select_condition = select->GetCondition();
2230     Condition cond = kNotEqual;
2231 
2232     // Figure out how to test the 'condition'.
2233     if (select_condition->IsCondition()) {
2234       HCondition* condition = select_condition->AsCondition();
2235       if (!condition->IsEmittedAtUseSite()) {
2236         // This was a previously materialized condition.
2237         // Can we use the existing condition code?
2238         if (AreEflagsSetFrom(condition, select)) {
2239           // Materialization was the previous instruction.  Condition codes are right.
2240           cond = X86_64IntegerCondition(condition->GetCondition());
2241         } else {
2242           // No, we have to recreate the condition code.
2243           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2244           __ testl(cond_reg, cond_reg);
2245         }
2246       } else {
2247         GenerateCompareTest(condition);
2248         cond = X86_64IntegerCondition(condition->GetCondition());
2249       }
2250     } else {
2251       // Must be a Boolean condition, which needs to be compared to 0.
2252       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2253       __ testl(cond_reg, cond_reg);
2254     }
2255 
2256     // If the condition is true, overwrite the output, which already contains false.
2257     // Generate the correct sized CMOV.
2258     bool is_64_bit = DataType::Is64BitType(select->GetType());
2259     if (value_true_loc.IsRegister()) {
2260       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
2261     } else {
2262       __ cmov(cond,
2263               value_false,
2264               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
2265     }
2266   } else {
2267     NearLabel false_target;
2268     GenerateTestAndBranch<NearLabel>(select,
2269                                      /* condition_input_index= */ 2,
2270                                      /* true_target= */ nullptr,
2271                                      &false_target);
2272     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2273     __ Bind(&false_target);
2274   }
2275 }
2276 
VisitNativeDebugInfo(HNativeDebugInfo * info)2277 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
2278   new (GetGraph()->GetAllocator()) LocationSummary(info);
2279 }
2280 
VisitNativeDebugInfo(HNativeDebugInfo *)2281 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
2282   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
2283 }
2284 
IncreaseFrame(size_t adjustment)2285 void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
2286   __ subq(CpuRegister(RSP), Immediate(adjustment));
2287   __ cfi().AdjustCFAOffset(adjustment);
2288 }
2289 
DecreaseFrame(size_t adjustment)2290 void CodeGeneratorX86_64::DecreaseFrame(size_t adjustment) {
2291   __ addq(CpuRegister(RSP), Immediate(adjustment));
2292   __ cfi().AdjustCFAOffset(-adjustment);
2293 }
2294 
GenerateNop()2295 void CodeGeneratorX86_64::GenerateNop() {
2296   __ nop();
2297 }
2298 
HandleCondition(HCondition * cond)2299 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
2300   LocationSummary* locations =
2301       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2302   // Handle the long/FP comparisons made in instruction simplification.
2303   switch (cond->InputAt(0)->GetType()) {
2304     case DataType::Type::kInt64:
2305       locations->SetInAt(0, Location::RequiresRegister());
2306       locations->SetInAt(1, Location::Any());
2307       break;
2308     case DataType::Type::kFloat32:
2309     case DataType::Type::kFloat64:
2310       locations->SetInAt(0, Location::RequiresFpuRegister());
2311       locations->SetInAt(1, Location::Any());
2312       break;
2313     default:
2314       locations->SetInAt(0, Location::RequiresRegister());
2315       locations->SetInAt(1, Location::Any());
2316       break;
2317   }
2318   if (!cond->IsEmittedAtUseSite()) {
2319     locations->SetOut(Location::RequiresRegister());
2320   }
2321 }
2322 
HandleCondition(HCondition * cond)2323 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2324   if (cond->IsEmittedAtUseSite()) {
2325     return;
2326   }
2327 
2328   LocationSummary* locations = cond->GetLocations();
2329   Location lhs = locations->InAt(0);
2330   Location rhs = locations->InAt(1);
2331   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2332   NearLabel true_label, false_label;
2333 
2334   switch (cond->InputAt(0)->GetType()) {
2335     default:
2336       // Integer case.
2337 
2338       // Clear output register: setcc only sets the low byte.
2339       __ xorl(reg, reg);
2340 
2341       codegen_->GenerateIntCompare(lhs, rhs);
2342       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2343       return;
2344     case DataType::Type::kInt64:
2345       // Clear output register: setcc only sets the low byte.
2346       __ xorl(reg, reg);
2347 
2348       codegen_->GenerateLongCompare(lhs, rhs);
2349       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2350       return;
2351     case DataType::Type::kFloat32: {
2352       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2353       if (rhs.IsConstant()) {
2354         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2355         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2356       } else if (rhs.IsStackSlot()) {
2357         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2358       } else {
2359         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2360       }
2361       GenerateFPJumps(cond, &true_label, &false_label);
2362       break;
2363     }
2364     case DataType::Type::kFloat64: {
2365       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2366       if (rhs.IsConstant()) {
2367         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2368         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2369       } else if (rhs.IsDoubleStackSlot()) {
2370         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2371       } else {
2372         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2373       }
2374       GenerateFPJumps(cond, &true_label, &false_label);
2375       break;
2376     }
2377   }
2378 
2379   // Convert the jumps into the result.
2380   NearLabel done_label;
2381 
2382   // False case: result = 0.
2383   __ Bind(&false_label);
2384   __ xorl(reg, reg);
2385   __ jmp(&done_label);
2386 
2387   // True case: result = 1.
2388   __ Bind(&true_label);
2389   __ movl(reg, Immediate(1));
2390   __ Bind(&done_label);
2391 }
2392 
VisitEqual(HEqual * comp)2393 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2394   HandleCondition(comp);
2395 }
2396 
VisitEqual(HEqual * comp)2397 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2398   HandleCondition(comp);
2399 }
2400 
VisitNotEqual(HNotEqual * comp)2401 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2402   HandleCondition(comp);
2403 }
2404 
VisitNotEqual(HNotEqual * comp)2405 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2406   HandleCondition(comp);
2407 }
2408 
VisitLessThan(HLessThan * comp)2409 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2410   HandleCondition(comp);
2411 }
2412 
VisitLessThan(HLessThan * comp)2413 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2414   HandleCondition(comp);
2415 }
2416 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2417 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2418   HandleCondition(comp);
2419 }
2420 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2421 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2422   HandleCondition(comp);
2423 }
2424 
VisitGreaterThan(HGreaterThan * comp)2425 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2426   HandleCondition(comp);
2427 }
2428 
VisitGreaterThan(HGreaterThan * comp)2429 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2430   HandleCondition(comp);
2431 }
2432 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2433 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2434   HandleCondition(comp);
2435 }
2436 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2437 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2438   HandleCondition(comp);
2439 }
2440 
VisitBelow(HBelow * comp)2441 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2442   HandleCondition(comp);
2443 }
2444 
VisitBelow(HBelow * comp)2445 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2446   HandleCondition(comp);
2447 }
2448 
VisitBelowOrEqual(HBelowOrEqual * comp)2449 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2450   HandleCondition(comp);
2451 }
2452 
VisitBelowOrEqual(HBelowOrEqual * comp)2453 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2454   HandleCondition(comp);
2455 }
2456 
VisitAbove(HAbove * comp)2457 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2458   HandleCondition(comp);
2459 }
2460 
VisitAbove(HAbove * comp)2461 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2462   HandleCondition(comp);
2463 }
2464 
VisitAboveOrEqual(HAboveOrEqual * comp)2465 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2466   HandleCondition(comp);
2467 }
2468 
VisitAboveOrEqual(HAboveOrEqual * comp)2469 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2470   HandleCondition(comp);
2471 }
2472 
VisitCompare(HCompare * compare)2473 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2474   LocationSummary* locations =
2475       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2476   switch (compare->InputAt(0)->GetType()) {
2477     case DataType::Type::kBool:
2478     case DataType::Type::kUint8:
2479     case DataType::Type::kInt8:
2480     case DataType::Type::kUint16:
2481     case DataType::Type::kInt16:
2482     case DataType::Type::kInt32:
2483     case DataType::Type::kInt64: {
2484       locations->SetInAt(0, Location::RequiresRegister());
2485       locations->SetInAt(1, Location::Any());
2486       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2487       break;
2488     }
2489     case DataType::Type::kFloat32:
2490     case DataType::Type::kFloat64: {
2491       locations->SetInAt(0, Location::RequiresFpuRegister());
2492       locations->SetInAt(1, Location::Any());
2493       locations->SetOut(Location::RequiresRegister());
2494       break;
2495     }
2496     default:
2497       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2498   }
2499 }
2500 
VisitCompare(HCompare * compare)2501 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2502   LocationSummary* locations = compare->GetLocations();
2503   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2504   Location left = locations->InAt(0);
2505   Location right = locations->InAt(1);
2506 
2507   NearLabel less, greater, done;
2508   DataType::Type type = compare->InputAt(0)->GetType();
2509   Condition less_cond = kLess;
2510 
2511   switch (type) {
2512     case DataType::Type::kBool:
2513     case DataType::Type::kUint8:
2514     case DataType::Type::kInt8:
2515     case DataType::Type::kUint16:
2516     case DataType::Type::kInt16:
2517     case DataType::Type::kInt32: {
2518       codegen_->GenerateIntCompare(left, right);
2519       break;
2520     }
2521     case DataType::Type::kInt64: {
2522       codegen_->GenerateLongCompare(left, right);
2523       break;
2524     }
2525     case DataType::Type::kFloat32: {
2526       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2527       if (right.IsConstant()) {
2528         float value = right.GetConstant()->AsFloatConstant()->GetValue();
2529         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2530       } else if (right.IsStackSlot()) {
2531         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2532       } else {
2533         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2534       }
2535       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2536       less_cond = kBelow;  //  ucomis{s,d} sets CF
2537       break;
2538     }
2539     case DataType::Type::kFloat64: {
2540       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2541       if (right.IsConstant()) {
2542         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2543         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2544       } else if (right.IsDoubleStackSlot()) {
2545         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2546       } else {
2547         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2548       }
2549       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2550       less_cond = kBelow;  //  ucomis{s,d} sets CF
2551       break;
2552     }
2553     default:
2554       LOG(FATAL) << "Unexpected compare type " << type;
2555   }
2556 
2557   __ movl(out, Immediate(0));
2558   __ j(kEqual, &done);
2559   __ j(less_cond, &less);
2560 
2561   __ Bind(&greater);
2562   __ movl(out, Immediate(1));
2563   __ jmp(&done);
2564 
2565   __ Bind(&less);
2566   __ movl(out, Immediate(-1));
2567 
2568   __ Bind(&done);
2569 }
2570 
VisitIntConstant(HIntConstant * constant)2571 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2572   LocationSummary* locations =
2573       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2574   locations->SetOut(Location::ConstantLocation(constant));
2575 }
2576 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2577 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2578   // Will be generated at use site.
2579 }
2580 
VisitNullConstant(HNullConstant * constant)2581 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2582   LocationSummary* locations =
2583       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2584   locations->SetOut(Location::ConstantLocation(constant));
2585 }
2586 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2587 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2588   // Will be generated at use site.
2589 }
2590 
VisitLongConstant(HLongConstant * constant)2591 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2592   LocationSummary* locations =
2593       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2594   locations->SetOut(Location::ConstantLocation(constant));
2595 }
2596 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2597 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2598   // Will be generated at use site.
2599 }
2600 
VisitFloatConstant(HFloatConstant * constant)2601 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2602   LocationSummary* locations =
2603       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2604   locations->SetOut(Location::ConstantLocation(constant));
2605 }
2606 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2607 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2608   // Will be generated at use site.
2609 }
2610 
VisitDoubleConstant(HDoubleConstant * constant)2611 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2612   LocationSummary* locations =
2613       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2614   locations->SetOut(Location::ConstantLocation(constant));
2615 }
2616 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2617 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2618     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2619   // Will be generated at use site.
2620 }
2621 
VisitConstructorFence(HConstructorFence * constructor_fence)2622 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2623   constructor_fence->SetLocations(nullptr);
2624 }
2625 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2626 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2627     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2628   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2629 }
2630 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2631 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2632   memory_barrier->SetLocations(nullptr);
2633 }
2634 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2635 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2636   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2637 }
2638 
VisitReturnVoid(HReturnVoid * ret)2639 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2640   ret->SetLocations(nullptr);
2641 }
2642 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2643 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2644   codegen_->GenerateFrameExit();
2645 }
2646 
VisitReturn(HReturn * ret)2647 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2648   LocationSummary* locations =
2649       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2650   SetInForReturnValue(ret, locations);
2651 }
2652 
VisitReturn(HReturn * ret)2653 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2654   switch (ret->InputAt(0)->GetType()) {
2655     case DataType::Type::kReference:
2656     case DataType::Type::kBool:
2657     case DataType::Type::kUint8:
2658     case DataType::Type::kInt8:
2659     case DataType::Type::kUint16:
2660     case DataType::Type::kInt16:
2661     case DataType::Type::kInt32:
2662     case DataType::Type::kInt64:
2663       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2664       break;
2665 
2666     case DataType::Type::kFloat32: {
2667       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2668                 XMM0);
2669       // To simplify callers of an OSR method, we put the return value in both
2670       // floating point and core register.
2671       if (GetGraph()->IsCompilingOsr()) {
2672         __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false);
2673       }
2674       break;
2675     }
2676     case DataType::Type::kFloat64: {
2677       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2678                 XMM0);
2679       // To simplify callers of an OSR method, we put the return value in both
2680       // floating point and core register.
2681       if (GetGraph()->IsCompilingOsr()) {
2682         __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true);
2683       }
2684       break;
2685     }
2686 
2687     default:
2688       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2689   }
2690   codegen_->GenerateFrameExit();
2691 }
2692 
GetReturnLocation(DataType::Type type) const2693 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2694   switch (type) {
2695     case DataType::Type::kReference:
2696     case DataType::Type::kBool:
2697     case DataType::Type::kUint8:
2698     case DataType::Type::kInt8:
2699     case DataType::Type::kUint16:
2700     case DataType::Type::kInt16:
2701     case DataType::Type::kUint32:
2702     case DataType::Type::kInt32:
2703     case DataType::Type::kUint64:
2704     case DataType::Type::kInt64:
2705       return Location::RegisterLocation(RAX);
2706 
2707     case DataType::Type::kVoid:
2708       return Location::NoLocation();
2709 
2710     case DataType::Type::kFloat64:
2711     case DataType::Type::kFloat32:
2712       return Location::FpuRegisterLocation(XMM0);
2713   }
2714 
2715   UNREACHABLE();
2716 }
2717 
GetMethodLocation() const2718 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2719   return Location::RegisterLocation(kMethodRegisterArgument);
2720 }
2721 
GetNextLocation(DataType::Type type)2722 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2723   switch (type) {
2724     case DataType::Type::kReference:
2725     case DataType::Type::kBool:
2726     case DataType::Type::kUint8:
2727     case DataType::Type::kInt8:
2728     case DataType::Type::kUint16:
2729     case DataType::Type::kInt16:
2730     case DataType::Type::kInt32: {
2731       uint32_t index = gp_index_++;
2732       stack_index_++;
2733       if (index < calling_convention.GetNumberOfRegisters()) {
2734         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2735       } else {
2736         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2737       }
2738     }
2739 
2740     case DataType::Type::kInt64: {
2741       uint32_t index = gp_index_;
2742       stack_index_ += 2;
2743       if (index < calling_convention.GetNumberOfRegisters()) {
2744         gp_index_ += 1;
2745         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2746       } else {
2747         gp_index_ += 2;
2748         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2749       }
2750     }
2751 
2752     case DataType::Type::kFloat32: {
2753       uint32_t index = float_index_++;
2754       stack_index_++;
2755       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2756         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2757       } else {
2758         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2759       }
2760     }
2761 
2762     case DataType::Type::kFloat64: {
2763       uint32_t index = float_index_++;
2764       stack_index_ += 2;
2765       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2766         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2767       } else {
2768         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2769       }
2770     }
2771 
2772     case DataType::Type::kUint32:
2773     case DataType::Type::kUint64:
2774     case DataType::Type::kVoid:
2775       LOG(FATAL) << "Unexpected parameter type " << type;
2776       UNREACHABLE();
2777   }
2778   return Location::NoLocation();
2779 }
2780 
GetNextLocation(DataType::Type type)2781 Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2782   DCHECK_NE(type, DataType::Type::kReference);
2783 
2784   Location location = Location::NoLocation();
2785   if (DataType::IsFloatingPointType(type)) {
2786     if (fpr_index_ < kParameterFloatRegistersLength) {
2787       location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
2788       ++fpr_index_;
2789     }
2790   } else {
2791     // Native ABI uses the same registers as managed, except that the method register RDI
2792     // is a normal argument.
2793     if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
2794       location = Location::RegisterLocation(
2795           gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
2796       ++gpr_index_;
2797     }
2798   }
2799   if (location.IsInvalid()) {
2800     if (DataType::Is64BitType(type)) {
2801       location = Location::DoubleStackSlot(stack_offset_);
2802     } else {
2803       location = Location::StackSlot(stack_offset_);
2804     }
2805     stack_offset_ += kFramePointerSize;
2806 
2807     if (for_register_allocation_) {
2808       location = Location::Any();
2809     }
2810   }
2811   return location;
2812 }
2813 
GetReturnLocation(DataType::Type type) const2814 Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
2815     const {
2816   // We perform conversion to the managed ABI return register after the call if needed.
2817   InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
2818   return dex_calling_convention.GetReturnLocation(type);
2819 }
2820 
GetMethodLocation() const2821 Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
2822   // Pass the method in the hidden argument RAX.
2823   return Location::RegisterLocation(RAX);
2824 }
2825 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2826 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2827   // The trampoline uses the same calling convention as dex calling conventions,
2828   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2829   // the method_idx.
2830   HandleInvoke(invoke);
2831 }
2832 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2833 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2834   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2835 }
2836 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2837 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2838   // Explicit clinit checks triggered by static invokes must have been pruned by
2839   // art::PrepareForRegisterAllocation.
2840   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2841 
2842   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2843   if (intrinsic.TryDispatch(invoke)) {
2844     return;
2845   }
2846 
2847   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2848     CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
2849         /*for_register_allocation=*/ true);
2850     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2851     CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
2852   } else {
2853     HandleInvoke(invoke);
2854   }
2855 }
2856 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2857 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2858   if (invoke->GetLocations()->Intrinsified()) {
2859     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2860     intrinsic.Dispatch(invoke);
2861     return true;
2862   }
2863   return false;
2864 }
2865 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2866 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2867   // Explicit clinit checks triggered by static invokes must have been pruned by
2868   // art::PrepareForRegisterAllocation.
2869   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2870 
2871   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2872     return;
2873   }
2874 
2875   LocationSummary* locations = invoke->GetLocations();
2876   codegen_->GenerateStaticOrDirectCall(
2877       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2878 }
2879 
HandleInvoke(HInvoke * invoke)2880 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2881   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2882   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2883 }
2884 
VisitInvokeVirtual(HInvokeVirtual * invoke)2885 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2886   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2887   if (intrinsic.TryDispatch(invoke)) {
2888     return;
2889   }
2890 
2891   HandleInvoke(invoke);
2892 }
2893 
VisitInvokeVirtual(HInvokeVirtual * invoke)2894 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2895   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2896     return;
2897   }
2898 
2899   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2900   DCHECK(!codegen_->IsLeafMethod());
2901 }
2902 
VisitInvokeInterface(HInvokeInterface * invoke)2903 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2904   HandleInvoke(invoke);
2905   // Add the hidden argument.
2906   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2907     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2908                                     Location::RegisterLocation(RAX));
2909   }
2910   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2911 }
2912 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)2913 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
2914                                                         CpuRegister klass) {
2915   DCHECK_EQ(RDI, klass.AsRegister());
2916   // We know the destination of an intrinsic, so no need to record inline
2917   // caches.
2918   if (!instruction->GetLocations()->Intrinsified() &&
2919       GetGraph()->IsCompilingBaseline() &&
2920       !Runtime::Current()->IsAotCompiler()) {
2921     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2922     DCHECK(info != nullptr);
2923     InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2924     uint64_t address = reinterpret_cast64<uint64_t>(cache);
2925     NearLabel done;
2926     __ movq(CpuRegister(TMP), Immediate(address));
2927     // Fast path for a monomorphic cache.
2928     __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
2929     __ j(kEqual, &done);
2930     GenerateInvokeRuntime(
2931         GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
2932     __ Bind(&done);
2933   }
2934 }
2935 
VisitInvokeInterface(HInvokeInterface * invoke)2936 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2937   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2938   LocationSummary* locations = invoke->GetLocations();
2939   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2940   Location receiver = locations->InAt(0);
2941   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2942 
2943   if (receiver.IsStackSlot()) {
2944     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2945     // /* HeapReference<Class> */ temp = temp->klass_
2946     __ movl(temp, Address(temp, class_offset));
2947   } else {
2948     // /* HeapReference<Class> */ temp = receiver->klass_
2949     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2950   }
2951   codegen_->MaybeRecordImplicitNullCheck(invoke);
2952   // Instead of simply (possibly) unpoisoning `temp` here, we should
2953   // emit a read barrier for the previous class reference load.
2954   // However this is not required in practice, as this is an
2955   // intermediate/temporary reference and because the current
2956   // concurrent copying collector keeps the from-space memory
2957   // intact/accessible until the end of the marking phase (the
2958   // concurrent copying collector may not in the future).
2959   __ MaybeUnpoisonHeapReference(temp);
2960 
2961   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2962 
2963   if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
2964       invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2965     Location hidden_reg = locations->GetTemp(1);
2966     // Set the hidden argument. This is safe to do this here, as RAX
2967     // won't be modified thereafter, before the `call` instruction.
2968     // We also do it after MaybeGenerateInlineCache that may use RAX.
2969     DCHECK_EQ(RAX, hidden_reg.AsRegister<Register>());
2970     codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
2971   }
2972 
2973   // temp = temp->GetAddressOfIMT()
2974   __ movq(temp,
2975       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2976   // temp = temp->GetImtEntryAt(method_offset);
2977   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2978       invoke->GetImtIndex(), kX86_64PointerSize));
2979   // temp = temp->GetImtEntryAt(method_offset);
2980   __ movq(temp, Address(temp, method_offset));
2981   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2982     // We pass the method from the IMT in case of a conflict. This will ensure
2983     // we go into the runtime to resolve the actual method.
2984     Location hidden_reg = locations->GetTemp(1);
2985     __ movq(hidden_reg.AsRegister<CpuRegister>(), temp);
2986   }
2987   // call temp->GetEntryPoint();
2988   __ call(Address(
2989       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2990 
2991   DCHECK(!codegen_->IsLeafMethod());
2992   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2993 }
2994 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2995 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2996   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2997   if (intrinsic.TryDispatch(invoke)) {
2998     return;
2999   }
3000   HandleInvoke(invoke);
3001 }
3002 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3003 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3004   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3005     return;
3006   }
3007   codegen_->GenerateInvokePolymorphicCall(invoke);
3008 }
3009 
VisitInvokeCustom(HInvokeCustom * invoke)3010 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3011   HandleInvoke(invoke);
3012 }
3013 
VisitInvokeCustom(HInvokeCustom * invoke)3014 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3015   codegen_->GenerateInvokeCustomCall(invoke);
3016 }
3017 
VisitNeg(HNeg * neg)3018 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
3019   LocationSummary* locations =
3020       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3021   switch (neg->GetResultType()) {
3022     case DataType::Type::kInt32:
3023     case DataType::Type::kInt64:
3024       locations->SetInAt(0, Location::RequiresRegister());
3025       locations->SetOut(Location::SameAsFirstInput());
3026       break;
3027 
3028     case DataType::Type::kFloat32:
3029     case DataType::Type::kFloat64:
3030       locations->SetInAt(0, Location::RequiresFpuRegister());
3031       locations->SetOut(Location::SameAsFirstInput());
3032       locations->AddTemp(Location::RequiresFpuRegister());
3033       break;
3034 
3035     default:
3036       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3037   }
3038 }
3039 
VisitNeg(HNeg * neg)3040 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
3041   LocationSummary* locations = neg->GetLocations();
3042   Location out = locations->Out();
3043   Location in = locations->InAt(0);
3044   switch (neg->GetResultType()) {
3045     case DataType::Type::kInt32:
3046       DCHECK(in.IsRegister());
3047       DCHECK(in.Equals(out));
3048       __ negl(out.AsRegister<CpuRegister>());
3049       break;
3050 
3051     case DataType::Type::kInt64:
3052       DCHECK(in.IsRegister());
3053       DCHECK(in.Equals(out));
3054       __ negq(out.AsRegister<CpuRegister>());
3055       break;
3056 
3057     case DataType::Type::kFloat32: {
3058       DCHECK(in.Equals(out));
3059       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3060       // Implement float negation with an exclusive or with value
3061       // 0x80000000 (mask for bit 31, representing the sign of a
3062       // single-precision floating-point number).
3063       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
3064       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3065       break;
3066     }
3067 
3068     case DataType::Type::kFloat64: {
3069       DCHECK(in.Equals(out));
3070       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3071       // Implement double negation with an exclusive or with value
3072       // 0x8000000000000000 (mask for bit 63, representing the sign of
3073       // a double-precision floating-point number).
3074       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
3075       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3076       break;
3077     }
3078 
3079     default:
3080       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3081   }
3082 }
3083 
VisitTypeConversion(HTypeConversion * conversion)3084 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3085   LocationSummary* locations =
3086       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
3087   DataType::Type result_type = conversion->GetResultType();
3088   DataType::Type input_type = conversion->GetInputType();
3089   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3090       << input_type << " -> " << result_type;
3091 
3092   switch (result_type) {
3093     case DataType::Type::kUint8:
3094     case DataType::Type::kInt8:
3095     case DataType::Type::kUint16:
3096     case DataType::Type::kInt16:
3097       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3098       locations->SetInAt(0, Location::Any());
3099       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3100       break;
3101 
3102     case DataType::Type::kInt32:
3103       switch (input_type) {
3104         case DataType::Type::kInt64:
3105           locations->SetInAt(0, Location::Any());
3106           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3107           break;
3108 
3109         case DataType::Type::kFloat32:
3110           locations->SetInAt(0, Location::RequiresFpuRegister());
3111           locations->SetOut(Location::RequiresRegister());
3112           break;
3113 
3114         case DataType::Type::kFloat64:
3115           locations->SetInAt(0, Location::RequiresFpuRegister());
3116           locations->SetOut(Location::RequiresRegister());
3117           break;
3118 
3119         default:
3120           LOG(FATAL) << "Unexpected type conversion from " << input_type
3121                      << " to " << result_type;
3122       }
3123       break;
3124 
3125     case DataType::Type::kInt64:
3126       switch (input_type) {
3127         case DataType::Type::kBool:
3128         case DataType::Type::kUint8:
3129         case DataType::Type::kInt8:
3130         case DataType::Type::kUint16:
3131         case DataType::Type::kInt16:
3132         case DataType::Type::kInt32:
3133           // TODO: We would benefit from a (to-be-implemented)
3134           // Location::RegisterOrStackSlot requirement for this input.
3135           locations->SetInAt(0, Location::RequiresRegister());
3136           locations->SetOut(Location::RequiresRegister());
3137           break;
3138 
3139         case DataType::Type::kFloat32:
3140           locations->SetInAt(0, Location::RequiresFpuRegister());
3141           locations->SetOut(Location::RequiresRegister());
3142           break;
3143 
3144         case DataType::Type::kFloat64:
3145           locations->SetInAt(0, Location::RequiresFpuRegister());
3146           locations->SetOut(Location::RequiresRegister());
3147           break;
3148 
3149         default:
3150           LOG(FATAL) << "Unexpected type conversion from " << input_type
3151                      << " to " << result_type;
3152       }
3153       break;
3154 
3155     case DataType::Type::kFloat32:
3156       switch (input_type) {
3157         case DataType::Type::kBool:
3158         case DataType::Type::kUint8:
3159         case DataType::Type::kInt8:
3160         case DataType::Type::kUint16:
3161         case DataType::Type::kInt16:
3162         case DataType::Type::kInt32:
3163           locations->SetInAt(0, Location::Any());
3164           locations->SetOut(Location::RequiresFpuRegister());
3165           break;
3166 
3167         case DataType::Type::kInt64:
3168           locations->SetInAt(0, Location::Any());
3169           locations->SetOut(Location::RequiresFpuRegister());
3170           break;
3171 
3172         case DataType::Type::kFloat64:
3173           locations->SetInAt(0, Location::Any());
3174           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3175           break;
3176 
3177         default:
3178           LOG(FATAL) << "Unexpected type conversion from " << input_type
3179                      << " to " << result_type;
3180       }
3181       break;
3182 
3183     case DataType::Type::kFloat64:
3184       switch (input_type) {
3185         case DataType::Type::kBool:
3186         case DataType::Type::kUint8:
3187         case DataType::Type::kInt8:
3188         case DataType::Type::kUint16:
3189         case DataType::Type::kInt16:
3190         case DataType::Type::kInt32:
3191           locations->SetInAt(0, Location::Any());
3192           locations->SetOut(Location::RequiresFpuRegister());
3193           break;
3194 
3195         case DataType::Type::kInt64:
3196           locations->SetInAt(0, Location::Any());
3197           locations->SetOut(Location::RequiresFpuRegister());
3198           break;
3199 
3200         case DataType::Type::kFloat32:
3201           locations->SetInAt(0, Location::Any());
3202           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3203           break;
3204 
3205         default:
3206           LOG(FATAL) << "Unexpected type conversion from " << input_type
3207                      << " to " << result_type;
3208       }
3209       break;
3210 
3211     default:
3212       LOG(FATAL) << "Unexpected type conversion from " << input_type
3213                  << " to " << result_type;
3214   }
3215 }
3216 
VisitTypeConversion(HTypeConversion * conversion)3217 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3218   LocationSummary* locations = conversion->GetLocations();
3219   Location out = locations->Out();
3220   Location in = locations->InAt(0);
3221   DataType::Type result_type = conversion->GetResultType();
3222   DataType::Type input_type = conversion->GetInputType();
3223   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3224       << input_type << " -> " << result_type;
3225   switch (result_type) {
3226     case DataType::Type::kUint8:
3227       switch (input_type) {
3228         case DataType::Type::kInt8:
3229         case DataType::Type::kUint16:
3230         case DataType::Type::kInt16:
3231         case DataType::Type::kInt32:
3232         case DataType::Type::kInt64:
3233           if (in.IsRegister()) {
3234             __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3235           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3236             __ movzxb(out.AsRegister<CpuRegister>(),
3237                       Address(CpuRegister(RSP), in.GetStackIndex()));
3238           } else {
3239             __ movl(out.AsRegister<CpuRegister>(),
3240                     Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
3241           }
3242           break;
3243 
3244         default:
3245           LOG(FATAL) << "Unexpected type conversion from " << input_type
3246                      << " to " << result_type;
3247       }
3248       break;
3249 
3250     case DataType::Type::kInt8:
3251       switch (input_type) {
3252         case DataType::Type::kUint8:
3253         case DataType::Type::kUint16:
3254         case DataType::Type::kInt16:
3255         case DataType::Type::kInt32:
3256         case DataType::Type::kInt64:
3257           if (in.IsRegister()) {
3258             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3259           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3260             __ movsxb(out.AsRegister<CpuRegister>(),
3261                       Address(CpuRegister(RSP), in.GetStackIndex()));
3262           } else {
3263             __ movl(out.AsRegister<CpuRegister>(),
3264                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
3265           }
3266           break;
3267 
3268         default:
3269           LOG(FATAL) << "Unexpected type conversion from " << input_type
3270                      << " to " << result_type;
3271       }
3272       break;
3273 
3274     case DataType::Type::kUint16:
3275       switch (input_type) {
3276         case DataType::Type::kInt8:
3277         case DataType::Type::kInt16:
3278         case DataType::Type::kInt32:
3279         case DataType::Type::kInt64:
3280           if (in.IsRegister()) {
3281             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3282           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3283             __ movzxw(out.AsRegister<CpuRegister>(),
3284                       Address(CpuRegister(RSP), in.GetStackIndex()));
3285           } else {
3286             __ movl(out.AsRegister<CpuRegister>(),
3287                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
3288           }
3289           break;
3290 
3291         default:
3292           LOG(FATAL) << "Unexpected type conversion from " << input_type
3293                      << " to " << result_type;
3294       }
3295       break;
3296 
3297     case DataType::Type::kInt16:
3298       switch (input_type) {
3299         case DataType::Type::kUint16:
3300         case DataType::Type::kInt32:
3301         case DataType::Type::kInt64:
3302           if (in.IsRegister()) {
3303             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3304           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3305             __ movsxw(out.AsRegister<CpuRegister>(),
3306                       Address(CpuRegister(RSP), in.GetStackIndex()));
3307           } else {
3308             __ movl(out.AsRegister<CpuRegister>(),
3309                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
3310           }
3311           break;
3312 
3313         default:
3314           LOG(FATAL) << "Unexpected type conversion from " << input_type
3315                      << " to " << result_type;
3316       }
3317       break;
3318 
3319     case DataType::Type::kInt32:
3320       switch (input_type) {
3321         case DataType::Type::kInt64:
3322           if (in.IsRegister()) {
3323             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3324           } else if (in.IsDoubleStackSlot()) {
3325             __ movl(out.AsRegister<CpuRegister>(),
3326                     Address(CpuRegister(RSP), in.GetStackIndex()));
3327           } else {
3328             DCHECK(in.IsConstant());
3329             DCHECK(in.GetConstant()->IsLongConstant());
3330             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3331             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3332           }
3333           break;
3334 
3335         case DataType::Type::kFloat32: {
3336           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3337           CpuRegister output = out.AsRegister<CpuRegister>();
3338           NearLabel done, nan;
3339 
3340           __ movl(output, Immediate(kPrimIntMax));
3341           // if input >= (float)INT_MAX goto done
3342           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
3343           __ j(kAboveEqual, &done);
3344           // if input == NaN goto nan
3345           __ j(kUnordered, &nan);
3346           // output = float-to-int-truncate(input)
3347           __ cvttss2si(output, input, false);
3348           __ jmp(&done);
3349           __ Bind(&nan);
3350           //  output = 0
3351           __ xorl(output, output);
3352           __ Bind(&done);
3353           break;
3354         }
3355 
3356         case DataType::Type::kFloat64: {
3357           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3358           CpuRegister output = out.AsRegister<CpuRegister>();
3359           NearLabel done, nan;
3360 
3361           __ movl(output, Immediate(kPrimIntMax));
3362           // if input >= (double)INT_MAX goto done
3363           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3364           __ j(kAboveEqual, &done);
3365           // if input == NaN goto nan
3366           __ j(kUnordered, &nan);
3367           // output = double-to-int-truncate(input)
3368           __ cvttsd2si(output, input);
3369           __ jmp(&done);
3370           __ Bind(&nan);
3371           //  output = 0
3372           __ xorl(output, output);
3373           __ Bind(&done);
3374           break;
3375         }
3376 
3377         default:
3378           LOG(FATAL) << "Unexpected type conversion from " << input_type
3379                      << " to " << result_type;
3380       }
3381       break;
3382 
3383     case DataType::Type::kInt64:
3384       switch (input_type) {
3385         DCHECK(out.IsRegister());
3386         case DataType::Type::kBool:
3387         case DataType::Type::kUint8:
3388         case DataType::Type::kInt8:
3389         case DataType::Type::kUint16:
3390         case DataType::Type::kInt16:
3391         case DataType::Type::kInt32:
3392           DCHECK(in.IsRegister());
3393           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3394           break;
3395 
3396         case DataType::Type::kFloat32: {
3397           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3398           CpuRegister output = out.AsRegister<CpuRegister>();
3399           NearLabel done, nan;
3400 
3401           codegen_->Load64BitValue(output, kPrimLongMax);
3402           // if input >= (float)LONG_MAX goto done
3403           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3404           __ j(kAboveEqual, &done);
3405           // if input == NaN goto nan
3406           __ j(kUnordered, &nan);
3407           // output = float-to-long-truncate(input)
3408           __ cvttss2si(output, input, true);
3409           __ jmp(&done);
3410           __ Bind(&nan);
3411           //  output = 0
3412           __ xorl(output, output);
3413           __ Bind(&done);
3414           break;
3415         }
3416 
3417         case DataType::Type::kFloat64: {
3418           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3419           CpuRegister output = out.AsRegister<CpuRegister>();
3420           NearLabel done, nan;
3421 
3422           codegen_->Load64BitValue(output, kPrimLongMax);
3423           // if input >= (double)LONG_MAX goto done
3424           __ comisd(input, codegen_->LiteralDoubleAddress(
3425                 static_cast<double>(kPrimLongMax)));
3426           __ j(kAboveEqual, &done);
3427           // if input == NaN goto nan
3428           __ j(kUnordered, &nan);
3429           // output = double-to-long-truncate(input)
3430           __ cvttsd2si(output, input, true);
3431           __ jmp(&done);
3432           __ Bind(&nan);
3433           //  output = 0
3434           __ xorl(output, output);
3435           __ Bind(&done);
3436           break;
3437         }
3438 
3439         default:
3440           LOG(FATAL) << "Unexpected type conversion from " << input_type
3441                      << " to " << result_type;
3442       }
3443       break;
3444 
3445     case DataType::Type::kFloat32:
3446       switch (input_type) {
3447         case DataType::Type::kBool:
3448         case DataType::Type::kUint8:
3449         case DataType::Type::kInt8:
3450         case DataType::Type::kUint16:
3451         case DataType::Type::kInt16:
3452         case DataType::Type::kInt32:
3453           if (in.IsRegister()) {
3454             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3455           } else if (in.IsConstant()) {
3456             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3457             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3458             codegen_->Load32BitValue(dest, static_cast<float>(v));
3459           } else {
3460             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3461                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3462           }
3463           break;
3464 
3465         case DataType::Type::kInt64:
3466           if (in.IsRegister()) {
3467             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3468           } else if (in.IsConstant()) {
3469             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3470             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3471             codegen_->Load32BitValue(dest, static_cast<float>(v));
3472           } else {
3473             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3474                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3475           }
3476           break;
3477 
3478         case DataType::Type::kFloat64:
3479           if (in.IsFpuRegister()) {
3480             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3481           } else if (in.IsConstant()) {
3482             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3483             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3484             codegen_->Load32BitValue(dest, static_cast<float>(v));
3485           } else {
3486             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3487                         Address(CpuRegister(RSP), in.GetStackIndex()));
3488           }
3489           break;
3490 
3491         default:
3492           LOG(FATAL) << "Unexpected type conversion from " << input_type
3493                      << " to " << result_type;
3494       }
3495       break;
3496 
3497     case DataType::Type::kFloat64:
3498       switch (input_type) {
3499         case DataType::Type::kBool:
3500         case DataType::Type::kUint8:
3501         case DataType::Type::kInt8:
3502         case DataType::Type::kUint16:
3503         case DataType::Type::kInt16:
3504         case DataType::Type::kInt32:
3505           if (in.IsRegister()) {
3506             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3507           } else if (in.IsConstant()) {
3508             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3509             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3510             codegen_->Load64BitValue(dest, static_cast<double>(v));
3511           } else {
3512             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3513                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3514           }
3515           break;
3516 
3517         case DataType::Type::kInt64:
3518           if (in.IsRegister()) {
3519             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3520           } else if (in.IsConstant()) {
3521             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3522             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3523             codegen_->Load64BitValue(dest, static_cast<double>(v));
3524           } else {
3525             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3526                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3527           }
3528           break;
3529 
3530         case DataType::Type::kFloat32:
3531           if (in.IsFpuRegister()) {
3532             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3533           } else if (in.IsConstant()) {
3534             float v = in.GetConstant()->AsFloatConstant()->GetValue();
3535             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3536             codegen_->Load64BitValue(dest, static_cast<double>(v));
3537           } else {
3538             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3539                         Address(CpuRegister(RSP), in.GetStackIndex()));
3540           }
3541           break;
3542 
3543         default:
3544           LOG(FATAL) << "Unexpected type conversion from " << input_type
3545                      << " to " << result_type;
3546       }
3547       break;
3548 
3549     default:
3550       LOG(FATAL) << "Unexpected type conversion from " << input_type
3551                  << " to " << result_type;
3552   }
3553 }
3554 
VisitAdd(HAdd * add)3555 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3556   LocationSummary* locations =
3557       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3558   switch (add->GetResultType()) {
3559     case DataType::Type::kInt32: {
3560       locations->SetInAt(0, Location::RequiresRegister());
3561       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3562       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3563       break;
3564     }
3565 
3566     case DataType::Type::kInt64: {
3567       locations->SetInAt(0, Location::RequiresRegister());
3568       // We can use a leaq or addq if the constant can fit in an immediate.
3569       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3570       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3571       break;
3572     }
3573 
3574     case DataType::Type::kFloat64:
3575     case DataType::Type::kFloat32: {
3576       locations->SetInAt(0, Location::RequiresFpuRegister());
3577       locations->SetInAt(1, Location::Any());
3578       locations->SetOut(Location::SameAsFirstInput());
3579       break;
3580     }
3581 
3582     default:
3583       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3584   }
3585 }
3586 
VisitAdd(HAdd * add)3587 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3588   LocationSummary* locations = add->GetLocations();
3589   Location first = locations->InAt(0);
3590   Location second = locations->InAt(1);
3591   Location out = locations->Out();
3592 
3593   switch (add->GetResultType()) {
3594     case DataType::Type::kInt32: {
3595       if (second.IsRegister()) {
3596         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3597           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3598         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3599           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3600         } else {
3601           __ leal(out.AsRegister<CpuRegister>(), Address(
3602               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3603         }
3604       } else if (second.IsConstant()) {
3605         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3606           __ addl(out.AsRegister<CpuRegister>(),
3607                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3608         } else {
3609           __ leal(out.AsRegister<CpuRegister>(), Address(
3610               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3611         }
3612       } else {
3613         DCHECK(first.Equals(locations->Out()));
3614         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3615       }
3616       break;
3617     }
3618 
3619     case DataType::Type::kInt64: {
3620       if (second.IsRegister()) {
3621         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3622           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3623         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3624           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3625         } else {
3626           __ leaq(out.AsRegister<CpuRegister>(), Address(
3627               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3628         }
3629       } else {
3630         DCHECK(second.IsConstant());
3631         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3632         int32_t int32_value = Low32Bits(value);
3633         DCHECK_EQ(int32_value, value);
3634         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3635           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3636         } else {
3637           __ leaq(out.AsRegister<CpuRegister>(), Address(
3638               first.AsRegister<CpuRegister>(), int32_value));
3639         }
3640       }
3641       break;
3642     }
3643 
3644     case DataType::Type::kFloat32: {
3645       if (second.IsFpuRegister()) {
3646         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3647       } else if (second.IsConstant()) {
3648         __ addss(first.AsFpuRegister<XmmRegister>(),
3649                  codegen_->LiteralFloatAddress(
3650                      second.GetConstant()->AsFloatConstant()->GetValue()));
3651       } else {
3652         DCHECK(second.IsStackSlot());
3653         __ addss(first.AsFpuRegister<XmmRegister>(),
3654                  Address(CpuRegister(RSP), second.GetStackIndex()));
3655       }
3656       break;
3657     }
3658 
3659     case DataType::Type::kFloat64: {
3660       if (second.IsFpuRegister()) {
3661         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3662       } else if (second.IsConstant()) {
3663         __ addsd(first.AsFpuRegister<XmmRegister>(),
3664                  codegen_->LiteralDoubleAddress(
3665                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3666       } else {
3667         DCHECK(second.IsDoubleStackSlot());
3668         __ addsd(first.AsFpuRegister<XmmRegister>(),
3669                  Address(CpuRegister(RSP), second.GetStackIndex()));
3670       }
3671       break;
3672     }
3673 
3674     default:
3675       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3676   }
3677 }
3678 
VisitSub(HSub * sub)3679 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3680   LocationSummary* locations =
3681       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3682   switch (sub->GetResultType()) {
3683     case DataType::Type::kInt32: {
3684       locations->SetInAt(0, Location::RequiresRegister());
3685       locations->SetInAt(1, Location::Any());
3686       locations->SetOut(Location::SameAsFirstInput());
3687       break;
3688     }
3689     case DataType::Type::kInt64: {
3690       locations->SetInAt(0, Location::RequiresRegister());
3691       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3692       locations->SetOut(Location::SameAsFirstInput());
3693       break;
3694     }
3695     case DataType::Type::kFloat32:
3696     case DataType::Type::kFloat64: {
3697       locations->SetInAt(0, Location::RequiresFpuRegister());
3698       locations->SetInAt(1, Location::Any());
3699       locations->SetOut(Location::SameAsFirstInput());
3700       break;
3701     }
3702     default:
3703       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3704   }
3705 }
3706 
VisitSub(HSub * sub)3707 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3708   LocationSummary* locations = sub->GetLocations();
3709   Location first = locations->InAt(0);
3710   Location second = locations->InAt(1);
3711   DCHECK(first.Equals(locations->Out()));
3712   switch (sub->GetResultType()) {
3713     case DataType::Type::kInt32: {
3714       if (second.IsRegister()) {
3715         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3716       } else if (second.IsConstant()) {
3717         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3718         __ subl(first.AsRegister<CpuRegister>(), imm);
3719       } else {
3720         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3721       }
3722       break;
3723     }
3724     case DataType::Type::kInt64: {
3725       if (second.IsConstant()) {
3726         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3727         DCHECK(IsInt<32>(value));
3728         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3729       } else {
3730         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3731       }
3732       break;
3733     }
3734 
3735     case DataType::Type::kFloat32: {
3736       if (second.IsFpuRegister()) {
3737         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3738       } else if (second.IsConstant()) {
3739         __ subss(first.AsFpuRegister<XmmRegister>(),
3740                  codegen_->LiteralFloatAddress(
3741                      second.GetConstant()->AsFloatConstant()->GetValue()));
3742       } else {
3743         DCHECK(second.IsStackSlot());
3744         __ subss(first.AsFpuRegister<XmmRegister>(),
3745                  Address(CpuRegister(RSP), second.GetStackIndex()));
3746       }
3747       break;
3748     }
3749 
3750     case DataType::Type::kFloat64: {
3751       if (second.IsFpuRegister()) {
3752         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3753       } else if (second.IsConstant()) {
3754         __ subsd(first.AsFpuRegister<XmmRegister>(),
3755                  codegen_->LiteralDoubleAddress(
3756                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3757       } else {
3758         DCHECK(second.IsDoubleStackSlot());
3759         __ subsd(first.AsFpuRegister<XmmRegister>(),
3760                  Address(CpuRegister(RSP), second.GetStackIndex()));
3761       }
3762       break;
3763     }
3764 
3765     default:
3766       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3767   }
3768 }
3769 
VisitMul(HMul * mul)3770 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3771   LocationSummary* locations =
3772       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3773   switch (mul->GetResultType()) {
3774     case DataType::Type::kInt32: {
3775       locations->SetInAt(0, Location::RequiresRegister());
3776       locations->SetInAt(1, Location::Any());
3777       if (mul->InputAt(1)->IsIntConstant()) {
3778         // Can use 3 operand multiply.
3779         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3780       } else {
3781         locations->SetOut(Location::SameAsFirstInput());
3782       }
3783       break;
3784     }
3785     case DataType::Type::kInt64: {
3786       locations->SetInAt(0, Location::RequiresRegister());
3787       locations->SetInAt(1, Location::Any());
3788       if (mul->InputAt(1)->IsLongConstant() &&
3789           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3790         // Can use 3 operand multiply.
3791         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3792       } else {
3793         locations->SetOut(Location::SameAsFirstInput());
3794       }
3795       break;
3796     }
3797     case DataType::Type::kFloat32:
3798     case DataType::Type::kFloat64: {
3799       locations->SetInAt(0, Location::RequiresFpuRegister());
3800       locations->SetInAt(1, Location::Any());
3801       locations->SetOut(Location::SameAsFirstInput());
3802       break;
3803     }
3804 
3805     default:
3806       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3807   }
3808 }
3809 
VisitMul(HMul * mul)3810 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3811   LocationSummary* locations = mul->GetLocations();
3812   Location first = locations->InAt(0);
3813   Location second = locations->InAt(1);
3814   Location out = locations->Out();
3815   switch (mul->GetResultType()) {
3816     case DataType::Type::kInt32:
3817       // The constant may have ended up in a register, so test explicitly to avoid
3818       // problems where the output may not be the same as the first operand.
3819       if (mul->InputAt(1)->IsIntConstant()) {
3820         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3821         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3822       } else if (second.IsRegister()) {
3823         DCHECK(first.Equals(out));
3824         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3825       } else {
3826         DCHECK(first.Equals(out));
3827         DCHECK(second.IsStackSlot());
3828         __ imull(first.AsRegister<CpuRegister>(),
3829                  Address(CpuRegister(RSP), second.GetStackIndex()));
3830       }
3831       break;
3832     case DataType::Type::kInt64: {
3833       // The constant may have ended up in a register, so test explicitly to avoid
3834       // problems where the output may not be the same as the first operand.
3835       if (mul->InputAt(1)->IsLongConstant()) {
3836         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3837         if (IsInt<32>(value)) {
3838           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3839                    Immediate(static_cast<int32_t>(value)));
3840         } else {
3841           // Have to use the constant area.
3842           DCHECK(first.Equals(out));
3843           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3844         }
3845       } else if (second.IsRegister()) {
3846         DCHECK(first.Equals(out));
3847         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3848       } else {
3849         DCHECK(second.IsDoubleStackSlot());
3850         DCHECK(first.Equals(out));
3851         __ imulq(first.AsRegister<CpuRegister>(),
3852                  Address(CpuRegister(RSP), second.GetStackIndex()));
3853       }
3854       break;
3855     }
3856 
3857     case DataType::Type::kFloat32: {
3858       DCHECK(first.Equals(out));
3859       if (second.IsFpuRegister()) {
3860         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3861       } else if (second.IsConstant()) {
3862         __ mulss(first.AsFpuRegister<XmmRegister>(),
3863                  codegen_->LiteralFloatAddress(
3864                      second.GetConstant()->AsFloatConstant()->GetValue()));
3865       } else {
3866         DCHECK(second.IsStackSlot());
3867         __ mulss(first.AsFpuRegister<XmmRegister>(),
3868                  Address(CpuRegister(RSP), second.GetStackIndex()));
3869       }
3870       break;
3871     }
3872 
3873     case DataType::Type::kFloat64: {
3874       DCHECK(first.Equals(out));
3875       if (second.IsFpuRegister()) {
3876         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3877       } else if (second.IsConstant()) {
3878         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3879                  codegen_->LiteralDoubleAddress(
3880                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3881       } else {
3882         DCHECK(second.IsDoubleStackSlot());
3883         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3884                  Address(CpuRegister(RSP), second.GetStackIndex()));
3885       }
3886       break;
3887     }
3888 
3889     default:
3890       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3891   }
3892 }
3893 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3894 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3895                                                      uint32_t stack_adjustment, bool is_float) {
3896   if (source.IsStackSlot()) {
3897     DCHECK(is_float);
3898     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3899   } else if (source.IsDoubleStackSlot()) {
3900     DCHECK(!is_float);
3901     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3902   } else {
3903     // Write the value to the temporary location on the stack and load to FP stack.
3904     if (is_float) {
3905       Location stack_temp = Location::StackSlot(temp_offset);
3906       codegen_->Move(stack_temp, source);
3907       __ flds(Address(CpuRegister(RSP), temp_offset));
3908     } else {
3909       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3910       codegen_->Move(stack_temp, source);
3911       __ fldl(Address(CpuRegister(RSP), temp_offset));
3912     }
3913   }
3914 }
3915 
GenerateRemFP(HRem * rem)3916 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3917   DataType::Type type = rem->GetResultType();
3918   bool is_float = type == DataType::Type::kFloat32;
3919   size_t elem_size = DataType::Size(type);
3920   LocationSummary* locations = rem->GetLocations();
3921   Location first = locations->InAt(0);
3922   Location second = locations->InAt(1);
3923   Location out = locations->Out();
3924 
3925   // Create stack space for 2 elements.
3926   // TODO: enhance register allocator to ask for stack temporaries.
3927   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3928 
3929   // Load the values to the FP stack in reverse order, using temporaries if needed.
3930   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3931   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3932 
3933   // Loop doing FPREM until we stabilize.
3934   NearLabel retry;
3935   __ Bind(&retry);
3936   __ fprem();
3937 
3938   // Move FP status to AX.
3939   __ fstsw();
3940 
3941   // And see if the argument reduction is complete. This is signaled by the
3942   // C2 FPU flag bit set to 0.
3943   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3944   __ j(kNotEqual, &retry);
3945 
3946   // We have settled on the final value. Retrieve it into an XMM register.
3947   // Store FP top of stack to real stack.
3948   if (is_float) {
3949     __ fsts(Address(CpuRegister(RSP), 0));
3950   } else {
3951     __ fstl(Address(CpuRegister(RSP), 0));
3952   }
3953 
3954   // Pop the 2 items from the FP stack.
3955   __ fucompp();
3956 
3957   // Load the value from the stack into an XMM register.
3958   DCHECK(out.IsFpuRegister()) << out;
3959   if (is_float) {
3960     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3961   } else {
3962     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3963   }
3964 
3965   // And remove the temporary stack space we allocated.
3966   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3967 }
3968 
DivRemOneOrMinusOne(HBinaryOperation * instruction)3969 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3970   DCHECK(instruction->IsDiv() || instruction->IsRem());
3971 
3972   LocationSummary* locations = instruction->GetLocations();
3973   Location second = locations->InAt(1);
3974   DCHECK(second.IsConstant());
3975 
3976   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3977   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3978   int64_t imm = Int64FromConstant(second.GetConstant());
3979 
3980   DCHECK(imm == 1 || imm == -1);
3981 
3982   switch (instruction->GetResultType()) {
3983     case DataType::Type::kInt32: {
3984       if (instruction->IsRem()) {
3985         __ xorl(output_register, output_register);
3986       } else {
3987         __ movl(output_register, input_register);
3988         if (imm == -1) {
3989           __ negl(output_register);
3990         }
3991       }
3992       break;
3993     }
3994 
3995     case DataType::Type::kInt64: {
3996       if (instruction->IsRem()) {
3997         __ xorl(output_register, output_register);
3998       } else {
3999         __ movq(output_register, input_register);
4000         if (imm == -1) {
4001           __ negq(output_register);
4002         }
4003       }
4004       break;
4005     }
4006 
4007     default:
4008       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
4009   }
4010 }
RemByPowerOfTwo(HRem * instruction)4011 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
4012   LocationSummary* locations = instruction->GetLocations();
4013   Location second = locations->InAt(1);
4014   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4015   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4016   int64_t imm = Int64FromConstant(second.GetConstant());
4017   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4018   uint64_t abs_imm = AbsOrMin(imm);
4019   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4020   if (instruction->GetResultType() == DataType::Type::kInt32) {
4021     NearLabel done;
4022     __ movl(out, numerator);
4023     __ andl(out, Immediate(abs_imm-1));
4024     __ j(Condition::kZero, &done);
4025     __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4026     __ testl(numerator, numerator);
4027     __ cmov(Condition::kLess, out, tmp, false);
4028     __ Bind(&done);
4029 
4030   } else {
4031     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4032     codegen_->Load64BitValue(tmp, abs_imm - 1);
4033     NearLabel done;
4034 
4035     __ movq(out, numerator);
4036     __ andq(out, tmp);
4037     __ j(Condition::kZero, &done);
4038     __ movq(tmp, numerator);
4039     __ sarq(tmp, Immediate(63));
4040     __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
4041     __ orq(out, tmp);
4042     __ Bind(&done);
4043   }
4044 }
DivByPowerOfTwo(HDiv * instruction)4045 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
4046   LocationSummary* locations = instruction->GetLocations();
4047   Location second = locations->InAt(1);
4048 
4049   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4050   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4051 
4052   int64_t imm = Int64FromConstant(second.GetConstant());
4053   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4054   uint64_t abs_imm = AbsOrMin(imm);
4055 
4056   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4057 
4058   if (instruction->GetResultType() == DataType::Type::kInt32) {
4059     // When denominator is equal to 2, we can add signed bit and numerator to tmp.
4060     // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
4061     if (abs_imm == 2) {
4062       __ leal(tmp, Address(numerator, 0));
4063       __ shrl(tmp, Immediate(31));
4064       __ addl(tmp, numerator);
4065     } else {
4066       __ leal(tmp, Address(numerator, abs_imm - 1));
4067       __ testl(numerator, numerator);
4068       __ cmov(kGreaterEqual, tmp, numerator);
4069     }
4070     int shift = CTZ(imm);
4071     __ sarl(tmp, Immediate(shift));
4072 
4073     if (imm < 0) {
4074       __ negl(tmp);
4075     }
4076 
4077     __ movl(output_register, tmp);
4078   } else {
4079     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4080     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
4081     if (abs_imm == 2) {
4082       __ movq(rdx, numerator);
4083       __ shrq(rdx, Immediate(63));
4084       __ addq(rdx, numerator);
4085     } else {
4086       codegen_->Load64BitValue(rdx, abs_imm - 1);
4087       __ addq(rdx, numerator);
4088       __ testq(numerator, numerator);
4089       __ cmov(kGreaterEqual, rdx, numerator);
4090     }
4091     int shift = CTZ(imm);
4092     __ sarq(rdx, Immediate(shift));
4093 
4094     if (imm < 0) {
4095       __ negq(rdx);
4096     }
4097 
4098     __ movq(output_register, rdx);
4099   }
4100 }
4101 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4102 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4103   DCHECK(instruction->IsDiv() || instruction->IsRem());
4104 
4105   LocationSummary* locations = instruction->GetLocations();
4106   Location second = locations->InAt(1);
4107 
4108   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
4109       : locations->GetTemp(0).AsRegister<CpuRegister>();
4110   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
4111   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
4112       : locations->Out().AsRegister<CpuRegister>();
4113   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4114 
4115   DCHECK_EQ(RAX, eax.AsRegister());
4116   DCHECK_EQ(RDX, edx.AsRegister());
4117   if (instruction->IsDiv()) {
4118     DCHECK_EQ(RAX, out.AsRegister());
4119   } else {
4120     DCHECK_EQ(RDX, out.AsRegister());
4121   }
4122 
4123   int64_t magic;
4124   int shift;
4125 
4126   // TODO: can these branches be written as one?
4127   if (instruction->GetResultType() == DataType::Type::kInt32) {
4128     int imm = second.GetConstant()->AsIntConstant()->GetValue();
4129 
4130     CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
4131 
4132     __ movl(numerator, eax);
4133 
4134     __ movl(eax, Immediate(magic));
4135     __ imull(numerator);
4136 
4137     if (imm > 0 && magic < 0) {
4138       __ addl(edx, numerator);
4139     } else if (imm < 0 && magic > 0) {
4140       __ subl(edx, numerator);
4141     }
4142 
4143     if (shift != 0) {
4144       __ sarl(edx, Immediate(shift));
4145     }
4146 
4147     __ movl(eax, edx);
4148     __ shrl(edx, Immediate(31));
4149     __ addl(edx, eax);
4150 
4151     if (instruction->IsRem()) {
4152       __ movl(eax, numerator);
4153       __ imull(edx, Immediate(imm));
4154       __ subl(eax, edx);
4155       __ movl(edx, eax);
4156     } else {
4157       __ movl(eax, edx);
4158     }
4159   } else {
4160     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
4161 
4162     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4163 
4164     CpuRegister rax = eax;
4165     CpuRegister rdx = edx;
4166 
4167     CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
4168 
4169     // Save the numerator.
4170     __ movq(numerator, rax);
4171 
4172     // RAX = magic
4173     codegen_->Load64BitValue(rax, magic);
4174 
4175     // RDX:RAX = magic * numerator
4176     __ imulq(numerator);
4177 
4178     if (imm > 0 && magic < 0) {
4179       // RDX += numerator
4180       __ addq(rdx, numerator);
4181     } else if (imm < 0 && magic > 0) {
4182       // RDX -= numerator
4183       __ subq(rdx, numerator);
4184     }
4185 
4186     // Shift if needed.
4187     if (shift != 0) {
4188       __ sarq(rdx, Immediate(shift));
4189     }
4190 
4191     // RDX += 1 if RDX < 0
4192     __ movq(rax, rdx);
4193     __ shrq(rdx, Immediate(63));
4194     __ addq(rdx, rax);
4195 
4196     if (instruction->IsRem()) {
4197       __ movq(rax, numerator);
4198 
4199       if (IsInt<32>(imm)) {
4200         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
4201       } else {
4202         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
4203       }
4204 
4205       __ subq(rax, rdx);
4206       __ movq(rdx, rax);
4207     } else {
4208       __ movq(rax, rdx);
4209     }
4210   }
4211 }
4212 
GenerateDivRemIntegral(HBinaryOperation * instruction)4213 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4214   DCHECK(instruction->IsDiv() || instruction->IsRem());
4215   DataType::Type type = instruction->GetResultType();
4216   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4217 
4218   bool is_div = instruction->IsDiv();
4219   LocationSummary* locations = instruction->GetLocations();
4220 
4221   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4222   Location second = locations->InAt(1);
4223 
4224   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
4225   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
4226 
4227   if (second.IsConstant()) {
4228     int64_t imm = Int64FromConstant(second.GetConstant());
4229 
4230     if (imm == 0) {
4231       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4232     } else if (imm == 1 || imm == -1) {
4233       DivRemOneOrMinusOne(instruction);
4234     } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4235       if (is_div) {
4236         DivByPowerOfTwo(instruction->AsDiv());
4237       } else {
4238         RemByPowerOfTwo(instruction->AsRem());
4239       }
4240     } else {
4241       DCHECK(imm <= -2 || imm >= 2);
4242       GenerateDivRemWithAnyConstant(instruction);
4243     }
4244   } else {
4245     SlowPathCode* slow_path =
4246         new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
4247             instruction, out.AsRegister(), type, is_div);
4248     codegen_->AddSlowPath(slow_path);
4249 
4250     CpuRegister second_reg = second.AsRegister<CpuRegister>();
4251     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
4252     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
4253     // so it's safe to just use negl instead of more complex comparisons.
4254     if (type == DataType::Type::kInt32) {
4255       __ cmpl(second_reg, Immediate(-1));
4256       __ j(kEqual, slow_path->GetEntryLabel());
4257       // edx:eax <- sign-extended of eax
4258       __ cdq();
4259       // eax = quotient, edx = remainder
4260       __ idivl(second_reg);
4261     } else {
4262       __ cmpq(second_reg, Immediate(-1));
4263       __ j(kEqual, slow_path->GetEntryLabel());
4264       // rdx:rax <- sign-extended of rax
4265       __ cqo();
4266       // rax = quotient, rdx = remainder
4267       __ idivq(second_reg);
4268     }
4269     __ Bind(slow_path->GetExitLabel());
4270   }
4271 }
4272 
VisitDiv(HDiv * div)4273 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
4274   LocationSummary* locations =
4275       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
4276   switch (div->GetResultType()) {
4277     case DataType::Type::kInt32:
4278     case DataType::Type::kInt64: {
4279       locations->SetInAt(0, Location::RegisterLocation(RAX));
4280       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4281       locations->SetOut(Location::SameAsFirstInput());
4282       // Intel uses edx:eax as the dividend.
4283       locations->AddTemp(Location::RegisterLocation(RDX));
4284       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
4285       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
4286       // output and request another temp.
4287       if (div->InputAt(1)->IsConstant()) {
4288         locations->AddTemp(Location::RequiresRegister());
4289       }
4290       break;
4291     }
4292 
4293     case DataType::Type::kFloat32:
4294     case DataType::Type::kFloat64: {
4295       locations->SetInAt(0, Location::RequiresFpuRegister());
4296       locations->SetInAt(1, Location::Any());
4297       locations->SetOut(Location::SameAsFirstInput());
4298       break;
4299     }
4300 
4301     default:
4302       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4303   }
4304 }
4305 
VisitDiv(HDiv * div)4306 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
4307   LocationSummary* locations = div->GetLocations();
4308   Location first = locations->InAt(0);
4309   Location second = locations->InAt(1);
4310   DCHECK(first.Equals(locations->Out()));
4311 
4312   DataType::Type type = div->GetResultType();
4313   switch (type) {
4314     case DataType::Type::kInt32:
4315     case DataType::Type::kInt64: {
4316       GenerateDivRemIntegral(div);
4317       break;
4318     }
4319 
4320     case DataType::Type::kFloat32: {
4321       if (second.IsFpuRegister()) {
4322         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4323       } else if (second.IsConstant()) {
4324         __ divss(first.AsFpuRegister<XmmRegister>(),
4325                  codegen_->LiteralFloatAddress(
4326                      second.GetConstant()->AsFloatConstant()->GetValue()));
4327       } else {
4328         DCHECK(second.IsStackSlot());
4329         __ divss(first.AsFpuRegister<XmmRegister>(),
4330                  Address(CpuRegister(RSP), second.GetStackIndex()));
4331       }
4332       break;
4333     }
4334 
4335     case DataType::Type::kFloat64: {
4336       if (second.IsFpuRegister()) {
4337         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4338       } else if (second.IsConstant()) {
4339         __ divsd(first.AsFpuRegister<XmmRegister>(),
4340                  codegen_->LiteralDoubleAddress(
4341                      second.GetConstant()->AsDoubleConstant()->GetValue()));
4342       } else {
4343         DCHECK(second.IsDoubleStackSlot());
4344         __ divsd(first.AsFpuRegister<XmmRegister>(),
4345                  Address(CpuRegister(RSP), second.GetStackIndex()));
4346       }
4347       break;
4348     }
4349 
4350     default:
4351       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4352   }
4353 }
4354 
VisitRem(HRem * rem)4355 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4356   DataType::Type type = rem->GetResultType();
4357   LocationSummary* locations =
4358     new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4359 
4360   switch (type) {
4361     case DataType::Type::kInt32:
4362     case DataType::Type::kInt64: {
4363       locations->SetInAt(0, Location::RegisterLocation(RAX));
4364       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4365       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4366       locations->SetOut(Location::RegisterLocation(RDX));
4367       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4368       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4369       // output and request another temp.
4370       if (rem->InputAt(1)->IsConstant()) {
4371         locations->AddTemp(Location::RequiresRegister());
4372       }
4373       break;
4374     }
4375 
4376     case DataType::Type::kFloat32:
4377     case DataType::Type::kFloat64: {
4378       locations->SetInAt(0, Location::Any());
4379       locations->SetInAt(1, Location::Any());
4380       locations->SetOut(Location::RequiresFpuRegister());
4381       locations->AddTemp(Location::RegisterLocation(RAX));
4382       break;
4383     }
4384 
4385     default:
4386       LOG(FATAL) << "Unexpected rem type " << type;
4387   }
4388 }
4389 
VisitRem(HRem * rem)4390 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4391   DataType::Type type = rem->GetResultType();
4392   switch (type) {
4393     case DataType::Type::kInt32:
4394     case DataType::Type::kInt64: {
4395       GenerateDivRemIntegral(rem);
4396       break;
4397     }
4398     case DataType::Type::kFloat32:
4399     case DataType::Type::kFloat64: {
4400       GenerateRemFP(rem);
4401       break;
4402     }
4403     default:
4404       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4405   }
4406 }
4407 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4408 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4409   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4410   switch (minmax->GetResultType()) {
4411     case DataType::Type::kInt32:
4412     case DataType::Type::kInt64:
4413       locations->SetInAt(0, Location::RequiresRegister());
4414       locations->SetInAt(1, Location::RequiresRegister());
4415       locations->SetOut(Location::SameAsFirstInput());
4416       break;
4417     case DataType::Type::kFloat32:
4418     case DataType::Type::kFloat64:
4419       locations->SetInAt(0, Location::RequiresFpuRegister());
4420       locations->SetInAt(1, Location::RequiresFpuRegister());
4421       // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4422       // the second input to be the output (we can simply swap inputs).
4423       locations->SetOut(Location::SameAsFirstInput());
4424       break;
4425     default:
4426       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4427   }
4428 }
4429 
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4430 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4431                                                        bool is_min,
4432                                                        DataType::Type type) {
4433   Location op1_loc = locations->InAt(0);
4434   Location op2_loc = locations->InAt(1);
4435 
4436   // Shortcut for same input locations.
4437   if (op1_loc.Equals(op2_loc)) {
4438     // Can return immediately, as op1_loc == out_loc.
4439     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4440     //       a copy here.
4441     DCHECK(locations->Out().Equals(op1_loc));
4442     return;
4443   }
4444 
4445   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4446   CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4447 
4448   //  (out := op1)
4449   //  out <=? op2
4450   //  if out is min jmp done
4451   //  out := op2
4452   // done:
4453 
4454   if (type == DataType::Type::kInt64) {
4455     __ cmpq(out, op2);
4456     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4457   } else {
4458     DCHECK_EQ(type, DataType::Type::kInt32);
4459     __ cmpl(out, op2);
4460     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4461   }
4462 }
4463 
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4464 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4465                                                       bool is_min,
4466                                                       DataType::Type type) {
4467   Location op1_loc = locations->InAt(0);
4468   Location op2_loc = locations->InAt(1);
4469   Location out_loc = locations->Out();
4470   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4471 
4472   // Shortcut for same input locations.
4473   if (op1_loc.Equals(op2_loc)) {
4474     DCHECK(out_loc.Equals(op1_loc));
4475     return;
4476   }
4477 
4478   //  (out := op1)
4479   //  out <=? op2
4480   //  if Nan jmp Nan_label
4481   //  if out is min jmp done
4482   //  if op2 is min jmp op2_label
4483   //  handle -0/+0
4484   //  jmp done
4485   // Nan_label:
4486   //  out := NaN
4487   // op2_label:
4488   //  out := op2
4489   // done:
4490   //
4491   // This removes one jmp, but needs to copy one input (op1) to out.
4492   //
4493   // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4494 
4495   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4496 
4497   NearLabel nan, done, op2_label;
4498   if (type == DataType::Type::kFloat64) {
4499     __ ucomisd(out, op2);
4500   } else {
4501     DCHECK_EQ(type, DataType::Type::kFloat32);
4502     __ ucomiss(out, op2);
4503   }
4504 
4505   __ j(Condition::kParityEven, &nan);
4506 
4507   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4508   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4509 
4510   // Handle 0.0/-0.0.
4511   if (is_min) {
4512     if (type == DataType::Type::kFloat64) {
4513       __ orpd(out, op2);
4514     } else {
4515       __ orps(out, op2);
4516     }
4517   } else {
4518     if (type == DataType::Type::kFloat64) {
4519       __ andpd(out, op2);
4520     } else {
4521       __ andps(out, op2);
4522     }
4523   }
4524   __ jmp(&done);
4525 
4526   // NaN handling.
4527   __ Bind(&nan);
4528   if (type == DataType::Type::kFloat64) {
4529     __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4530   } else {
4531     __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4532   }
4533   __ jmp(&done);
4534 
4535   // out := op2;
4536   __ Bind(&op2_label);
4537   if (type == DataType::Type::kFloat64) {
4538     __ movsd(out, op2);
4539   } else {
4540     __ movss(out, op2);
4541   }
4542 
4543   // Done.
4544   __ Bind(&done);
4545 }
4546 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4547 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4548   DataType::Type type = minmax->GetResultType();
4549   switch (type) {
4550     case DataType::Type::kInt32:
4551     case DataType::Type::kInt64:
4552       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4553       break;
4554     case DataType::Type::kFloat32:
4555     case DataType::Type::kFloat64:
4556       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4557       break;
4558     default:
4559       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4560   }
4561 }
4562 
VisitMin(HMin * min)4563 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4564   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4565 }
4566 
VisitMin(HMin * min)4567 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4568   GenerateMinMax(min, /*is_min*/ true);
4569 }
4570 
VisitMax(HMax * max)4571 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4572   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4573 }
4574 
VisitMax(HMax * max)4575 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4576   GenerateMinMax(max, /*is_min*/ false);
4577 }
4578 
VisitAbs(HAbs * abs)4579 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4580   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4581   switch (abs->GetResultType()) {
4582     case DataType::Type::kInt32:
4583     case DataType::Type::kInt64:
4584       locations->SetInAt(0, Location::RequiresRegister());
4585       locations->SetOut(Location::SameAsFirstInput());
4586       locations->AddTemp(Location::RequiresRegister());
4587       break;
4588     case DataType::Type::kFloat32:
4589     case DataType::Type::kFloat64:
4590       locations->SetInAt(0, Location::RequiresFpuRegister());
4591       locations->SetOut(Location::SameAsFirstInput());
4592       locations->AddTemp(Location::RequiresFpuRegister());
4593       break;
4594     default:
4595       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4596   }
4597 }
4598 
VisitAbs(HAbs * abs)4599 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4600   LocationSummary* locations = abs->GetLocations();
4601   switch (abs->GetResultType()) {
4602     case DataType::Type::kInt32: {
4603       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4604       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4605       // Create mask.
4606       __ movl(mask, out);
4607       __ sarl(mask, Immediate(31));
4608       // Add mask.
4609       __ addl(out, mask);
4610       __ xorl(out, mask);
4611       break;
4612     }
4613     case DataType::Type::kInt64: {
4614       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4615       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4616       // Create mask.
4617       __ movq(mask, out);
4618       __ sarq(mask, Immediate(63));
4619       // Add mask.
4620       __ addq(out, mask);
4621       __ xorq(out, mask);
4622       break;
4623     }
4624     case DataType::Type::kFloat32: {
4625       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4626       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4627       __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4628       __ andps(out, mask);
4629       break;
4630     }
4631     case DataType::Type::kFloat64: {
4632       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4633       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4634       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4635       __ andpd(out, mask);
4636       break;
4637     }
4638     default:
4639       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4640   }
4641 }
4642 
VisitDivZeroCheck(HDivZeroCheck * instruction)4643 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4644   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4645   locations->SetInAt(0, Location::Any());
4646 }
4647 
VisitDivZeroCheck(HDivZeroCheck * instruction)4648 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4649   SlowPathCode* slow_path =
4650       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4651   codegen_->AddSlowPath(slow_path);
4652 
4653   LocationSummary* locations = instruction->GetLocations();
4654   Location value = locations->InAt(0);
4655 
4656   switch (instruction->GetType()) {
4657     case DataType::Type::kBool:
4658     case DataType::Type::kUint8:
4659     case DataType::Type::kInt8:
4660     case DataType::Type::kUint16:
4661     case DataType::Type::kInt16:
4662     case DataType::Type::kInt32: {
4663       if (value.IsRegister()) {
4664         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4665         __ j(kEqual, slow_path->GetEntryLabel());
4666       } else if (value.IsStackSlot()) {
4667         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4668         __ j(kEqual, slow_path->GetEntryLabel());
4669       } else {
4670         DCHECK(value.IsConstant()) << value;
4671         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4672           __ jmp(slow_path->GetEntryLabel());
4673         }
4674       }
4675       break;
4676     }
4677     case DataType::Type::kInt64: {
4678       if (value.IsRegister()) {
4679         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4680         __ j(kEqual, slow_path->GetEntryLabel());
4681       } else if (value.IsDoubleStackSlot()) {
4682         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4683         __ j(kEqual, slow_path->GetEntryLabel());
4684       } else {
4685         DCHECK(value.IsConstant()) << value;
4686         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4687           __ jmp(slow_path->GetEntryLabel());
4688         }
4689       }
4690       break;
4691     }
4692     default:
4693       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4694   }
4695 }
4696 
HandleShift(HBinaryOperation * op)4697 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4698   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4699 
4700   LocationSummary* locations =
4701       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4702 
4703   switch (op->GetResultType()) {
4704     case DataType::Type::kInt32:
4705     case DataType::Type::kInt64: {
4706       locations->SetInAt(0, Location::RequiresRegister());
4707       // The shift count needs to be in CL.
4708       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4709       locations->SetOut(Location::SameAsFirstInput());
4710       break;
4711     }
4712     default:
4713       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4714   }
4715 }
4716 
HandleShift(HBinaryOperation * op)4717 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4718   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4719 
4720   LocationSummary* locations = op->GetLocations();
4721   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4722   Location second = locations->InAt(1);
4723 
4724   switch (op->GetResultType()) {
4725     case DataType::Type::kInt32: {
4726       if (second.IsRegister()) {
4727         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4728         if (op->IsShl()) {
4729           __ shll(first_reg, second_reg);
4730         } else if (op->IsShr()) {
4731           __ sarl(first_reg, second_reg);
4732         } else {
4733           __ shrl(first_reg, second_reg);
4734         }
4735       } else {
4736         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4737         if (op->IsShl()) {
4738           __ shll(first_reg, imm);
4739         } else if (op->IsShr()) {
4740           __ sarl(first_reg, imm);
4741         } else {
4742           __ shrl(first_reg, imm);
4743         }
4744       }
4745       break;
4746     }
4747     case DataType::Type::kInt64: {
4748       if (second.IsRegister()) {
4749         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4750         if (op->IsShl()) {
4751           __ shlq(first_reg, second_reg);
4752         } else if (op->IsShr()) {
4753           __ sarq(first_reg, second_reg);
4754         } else {
4755           __ shrq(first_reg, second_reg);
4756         }
4757       } else {
4758         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4759         if (op->IsShl()) {
4760           __ shlq(first_reg, imm);
4761         } else if (op->IsShr()) {
4762           __ sarq(first_reg, imm);
4763         } else {
4764           __ shrq(first_reg, imm);
4765         }
4766       }
4767       break;
4768     }
4769     default:
4770       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4771       UNREACHABLE();
4772   }
4773 }
4774 
VisitRor(HRor * ror)4775 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
4776   LocationSummary* locations =
4777       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4778 
4779   switch (ror->GetResultType()) {
4780     case DataType::Type::kInt32:
4781     case DataType::Type::kInt64: {
4782       locations->SetInAt(0, Location::RequiresRegister());
4783       // The shift count needs to be in CL (unless it is a constant).
4784       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
4785       locations->SetOut(Location::SameAsFirstInput());
4786       break;
4787     }
4788     default:
4789       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4790       UNREACHABLE();
4791   }
4792 }
4793 
VisitRor(HRor * ror)4794 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
4795   LocationSummary* locations = ror->GetLocations();
4796   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4797   Location second = locations->InAt(1);
4798 
4799   switch (ror->GetResultType()) {
4800     case DataType::Type::kInt32:
4801       if (second.IsRegister()) {
4802         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4803         __ rorl(first_reg, second_reg);
4804       } else {
4805         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4806         __ rorl(first_reg, imm);
4807       }
4808       break;
4809     case DataType::Type::kInt64:
4810       if (second.IsRegister()) {
4811         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4812         __ rorq(first_reg, second_reg);
4813       } else {
4814         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4815         __ rorq(first_reg, imm);
4816       }
4817       break;
4818     default:
4819       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4820       UNREACHABLE();
4821   }
4822 }
4823 
VisitShl(HShl * shl)4824 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4825   HandleShift(shl);
4826 }
4827 
VisitShl(HShl * shl)4828 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4829   HandleShift(shl);
4830 }
4831 
VisitShr(HShr * shr)4832 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4833   HandleShift(shr);
4834 }
4835 
VisitShr(HShr * shr)4836 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4837   HandleShift(shr);
4838 }
4839 
VisitUShr(HUShr * ushr)4840 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4841   HandleShift(ushr);
4842 }
4843 
VisitUShr(HUShr * ushr)4844 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4845   HandleShift(ushr);
4846 }
4847 
VisitNewInstance(HNewInstance * instruction)4848 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4849   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4850       instruction, LocationSummary::kCallOnMainOnly);
4851   InvokeRuntimeCallingConvention calling_convention;
4852   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4853   locations->SetOut(Location::RegisterLocation(RAX));
4854 }
4855 
VisitNewInstance(HNewInstance * instruction)4856 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4857   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4858   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4859   DCHECK(!codegen_->IsLeafMethod());
4860 }
4861 
VisitNewArray(HNewArray * instruction)4862 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4863   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4864       instruction, LocationSummary::kCallOnMainOnly);
4865   InvokeRuntimeCallingConvention calling_convention;
4866   locations->SetOut(Location::RegisterLocation(RAX));
4867   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4868   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4869 }
4870 
VisitNewArray(HNewArray * instruction)4871 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4872   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4873   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4874   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4875   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4876   DCHECK(!codegen_->IsLeafMethod());
4877 }
4878 
VisitParameterValue(HParameterValue * instruction)4879 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4880   LocationSummary* locations =
4881       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4882   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4883   if (location.IsStackSlot()) {
4884     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4885   } else if (location.IsDoubleStackSlot()) {
4886     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4887   }
4888   locations->SetOut(location);
4889 }
4890 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4891 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4892     HParameterValue* instruction ATTRIBUTE_UNUSED) {
4893   // Nothing to do, the parameter is already at its location.
4894 }
4895 
VisitCurrentMethod(HCurrentMethod * instruction)4896 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4897   LocationSummary* locations =
4898       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4899   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4900 }
4901 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4902 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4903     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4904   // Nothing to do, the method is already at its location.
4905 }
4906 
VisitClassTableGet(HClassTableGet * instruction)4907 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4908   LocationSummary* locations =
4909       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4910   locations->SetInAt(0, Location::RequiresRegister());
4911   locations->SetOut(Location::RequiresRegister());
4912 }
4913 
VisitClassTableGet(HClassTableGet * instruction)4914 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4915   LocationSummary* locations = instruction->GetLocations();
4916   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4917     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4918         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4919     __ movq(locations->Out().AsRegister<CpuRegister>(),
4920             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4921   } else {
4922     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4923         instruction->GetIndex(), kX86_64PointerSize));
4924     __ movq(locations->Out().AsRegister<CpuRegister>(),
4925             Address(locations->InAt(0).AsRegister<CpuRegister>(),
4926             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4927     __ movq(locations->Out().AsRegister<CpuRegister>(),
4928             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4929   }
4930 }
4931 
VisitNot(HNot * not_)4932 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4933   LocationSummary* locations =
4934       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4935   locations->SetInAt(0, Location::RequiresRegister());
4936   locations->SetOut(Location::SameAsFirstInput());
4937 }
4938 
VisitNot(HNot * not_)4939 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4940   LocationSummary* locations = not_->GetLocations();
4941   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4942             locations->Out().AsRegister<CpuRegister>().AsRegister());
4943   Location out = locations->Out();
4944   switch (not_->GetResultType()) {
4945     case DataType::Type::kInt32:
4946       __ notl(out.AsRegister<CpuRegister>());
4947       break;
4948 
4949     case DataType::Type::kInt64:
4950       __ notq(out.AsRegister<CpuRegister>());
4951       break;
4952 
4953     default:
4954       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4955   }
4956 }
4957 
VisitBooleanNot(HBooleanNot * bool_not)4958 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4959   LocationSummary* locations =
4960       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4961   locations->SetInAt(0, Location::RequiresRegister());
4962   locations->SetOut(Location::SameAsFirstInput());
4963 }
4964 
VisitBooleanNot(HBooleanNot * bool_not)4965 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4966   LocationSummary* locations = bool_not->GetLocations();
4967   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4968             locations->Out().AsRegister<CpuRegister>().AsRegister());
4969   Location out = locations->Out();
4970   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4971 }
4972 
VisitPhi(HPhi * instruction)4973 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4974   LocationSummary* locations =
4975       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4976   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4977     locations->SetInAt(i, Location::Any());
4978   }
4979   locations->SetOut(Location::Any());
4980 }
4981 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4982 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4983   LOG(FATAL) << "Unimplemented";
4984 }
4985 
GenerateMemoryBarrier(MemBarrierKind kind)4986 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4987   /*
4988    * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4989    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4990    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4991    */
4992   switch (kind) {
4993     case MemBarrierKind::kAnyAny: {
4994       MemoryFence();
4995       break;
4996     }
4997     case MemBarrierKind::kAnyStore:
4998     case MemBarrierKind::kLoadAny:
4999     case MemBarrierKind::kStoreStore: {
5000       // nop
5001       break;
5002     }
5003     case MemBarrierKind::kNTStoreStore:
5004       // Non-Temporal Store/Store needs an explicit fence.
5005       MemoryFence(/* non-temporal= */ true);
5006       break;
5007   }
5008 }
5009 
HandleFieldGet(HInstruction * instruction)5010 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
5011   DCHECK(instruction->IsInstanceFieldGet() ||
5012          instruction->IsStaticFieldGet() ||
5013          instruction->IsPredicatedInstanceFieldGet());
5014 
5015   bool object_field_get_with_read_barrier =
5016       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5017   bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
5018   LocationSummary* locations =
5019       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5020                                                        object_field_get_with_read_barrier
5021                                                            ? LocationSummary::kCallOnSlowPath
5022                                                            : LocationSummary::kNoCall);
5023   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5024     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5025   }
5026   // receiver_input
5027   locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
5028   if (is_predicated) {
5029     if (DataType::IsFloatingPointType(instruction->GetType())) {
5030       locations->SetInAt(0, Location::RequiresFpuRegister());
5031     } else {
5032       locations->SetInAt(0, Location::RequiresRegister());
5033     }
5034   }
5035   if (DataType::IsFloatingPointType(instruction->GetType())) {
5036     locations->SetOut(is_predicated ? Location::SameAsFirstInput()
5037                                     : Location::RequiresFpuRegister());
5038   } else {
5039     // The output overlaps for an object field get when read barriers are
5040     // enabled: we do not want the move to overwrite the object's location, as
5041     // we need it to emit the read barrier. For predicated instructions we can
5042     // always overlap since the output is SameAsFirst and the default value.
5043     locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(),
5044                       object_field_get_with_read_barrier || is_predicated
5045                           ? Location::kOutputOverlap
5046                           : Location::kNoOutputOverlap);
5047   }
5048 }
5049 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5050 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
5051                                                     const FieldInfo& field_info) {
5052   DCHECK(instruction->IsInstanceFieldGet() ||
5053          instruction->IsStaticFieldGet() ||
5054          instruction->IsPredicatedInstanceFieldGet());
5055 
5056   LocationSummary* locations = instruction->GetLocations();
5057   Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
5058   CpuRegister base = base_loc.AsRegister<CpuRegister>();
5059   Location out = locations->Out();
5060   bool is_volatile = field_info.IsVolatile();
5061   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5062   DataType::Type load_type = instruction->GetType();
5063   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5064 
5065   if (load_type == DataType::Type::kReference) {
5066     // /* HeapReference<Object> */ out = *(base + offset)
5067     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5068       // Note that a potential implicit null check is handled in this
5069       // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
5070       codegen_->GenerateFieldLoadWithBakerReadBarrier(
5071           instruction, out, base, offset, /* needs_null_check= */ true);
5072       if (is_volatile) {
5073         codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5074       }
5075     } else {
5076       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
5077       codegen_->MaybeRecordImplicitNullCheck(instruction);
5078       if (is_volatile) {
5079         codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5080       }
5081       // If read barriers are enabled, emit read barriers other than
5082       // Baker's using a slow path (and also unpoison the loaded
5083       // reference, if heap poisoning is enabled).
5084       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5085     }
5086   } else {
5087     codegen_->LoadFromMemoryNoReference(load_type, out, Address(base, offset));
5088     codegen_->MaybeRecordImplicitNullCheck(instruction);
5089     if (is_volatile) {
5090       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5091     }
5092   }
5093 }
5094 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5095 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
5096                                             const FieldInfo& field_info) {
5097   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5098 
5099   LocationSummary* locations =
5100       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5101   DataType::Type field_type = field_info.GetFieldType();
5102   bool is_volatile = field_info.IsVolatile();
5103   bool needs_write_barrier =
5104       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5105 
5106   locations->SetInAt(0, Location::RequiresRegister());
5107   if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
5108     if (is_volatile) {
5109       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5110       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
5111     } else {
5112       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5113     }
5114   } else {
5115     if (is_volatile) {
5116       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5117       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
5118     } else {
5119       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5120     }
5121   }
5122   if (needs_write_barrier) {
5123     // Temporary registers for the write barrier.
5124     locations->AddTemp(Location::RequiresRegister());
5125     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
5126   } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5127     // Temporary register for the reference poisoning.
5128     locations->AddTemp(Location::RequiresRegister());
5129   }
5130 }
5131 
Bswap(Location value,DataType::Type type,CpuRegister * temp)5132 void InstructionCodeGeneratorX86_64::Bswap(Location value,
5133                                            DataType::Type type,
5134                                            CpuRegister* temp) {
5135   switch (type) {
5136     case DataType::Type::kInt16:
5137       // This should sign-extend, even if reimplemented with an XCHG of 8-bit registers.
5138       __ bswapl(value.AsRegister<CpuRegister>());
5139       __ sarl(value.AsRegister<CpuRegister>(), Immediate(16));
5140       break;
5141     case DataType::Type::kUint16:
5142       // TODO: Can be done with an XCHG of 8-bit registers. This is straight from Quick.
5143       __ bswapl(value.AsRegister<CpuRegister>());
5144       __ shrl(value.AsRegister<CpuRegister>(), Immediate(16));
5145       break;
5146     case DataType::Type::kInt32:
5147     case DataType::Type::kUint32:
5148       __ bswapl(value.AsRegister<CpuRegister>());
5149       break;
5150     case DataType::Type::kInt64:
5151     case DataType::Type::kUint64:
5152       __ bswapq(value.AsRegister<CpuRegister>());
5153       break;
5154     case DataType::Type::kFloat32: {
5155       DCHECK_NE(temp, nullptr);
5156       __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
5157       __ bswapl(*temp);
5158       __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ false);
5159       break;
5160     }
5161     case DataType::Type::kFloat64: {
5162       DCHECK_NE(temp, nullptr);
5163       __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
5164       __ bswapq(*temp);
5165       __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ true);
5166       break;
5167     }
5168     default:
5169       LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
5170       UNREACHABLE();
5171   }
5172 }
5173 
HandleFieldSet(HInstruction * instruction,uint32_t value_index,uint32_t extra_temp_index,DataType::Type field_type,Address field_addr,CpuRegister base,bool is_volatile,bool is_atomic,bool value_can_be_null,bool byte_swap)5174 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5175                                                     uint32_t value_index,
5176                                                     uint32_t extra_temp_index,
5177                                                     DataType::Type field_type,
5178                                                     Address field_addr,
5179                                                     CpuRegister base,
5180                                                     bool is_volatile,
5181                                                     bool is_atomic,
5182                                                     bool value_can_be_null,
5183                                                     bool byte_swap) {
5184   LocationSummary* locations = instruction->GetLocations();
5185   Location value = locations->InAt(value_index);
5186 
5187   if (is_volatile) {
5188     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5189   }
5190 
5191   bool maybe_record_implicit_null_check_done = false;
5192 
5193   if (value.IsConstant()) {
5194     switch (field_type) {
5195       case DataType::Type::kBool:
5196       case DataType::Type::kUint8:
5197       case DataType::Type::kInt8:
5198         __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5199         break;
5200       case DataType::Type::kUint16:
5201       case DataType::Type::kInt16: {
5202         int16_t v = CodeGenerator::GetInt16ValueOf(value.GetConstant());
5203         if (byte_swap) {
5204           v = BSWAP(v);
5205         }
5206         __ movw(field_addr, Immediate(v));
5207         break;
5208       }
5209       case DataType::Type::kUint32:
5210       case DataType::Type::kInt32:
5211       case DataType::Type::kFloat32:
5212       case DataType::Type::kReference: {
5213         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5214         if (byte_swap) {
5215           v = BSWAP(v);
5216         }
5217         DCHECK_IMPLIES(field_type == DataType::Type::kReference, v == 0);
5218         // Note: if heap poisoning is enabled, no need to poison
5219         // (negate) `v` if it is a reference, as it would be null.
5220         __ movl(field_addr, Immediate(v));
5221         break;
5222       }
5223       case DataType::Type::kUint64:
5224       case DataType::Type::kInt64:
5225       case DataType::Type::kFloat64: {
5226         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5227         if (byte_swap) {
5228           v = BSWAP(v);
5229         }
5230         if (is_atomic) {
5231           // Move constant into a register, then atomically store the register to memory.
5232           CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5233           __ movq(temp, Immediate(v));
5234           __ movq(field_addr, temp);
5235         } else {
5236           Address field_addr2 = Address::displace(field_addr, sizeof(int32_t));
5237           codegen_->MoveInt64ToAddress(field_addr, field_addr2, v, instruction);
5238         }
5239         maybe_record_implicit_null_check_done = true;
5240         break;
5241       }
5242       case DataType::Type::kVoid:
5243         LOG(FATAL) << "Unreachable type " << field_type;
5244         UNREACHABLE();
5245     }
5246   } else {
5247     if (byte_swap) {
5248       // Swap byte order in-place in the input register (we will restore it later).
5249       CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5250       Bswap(value, field_type, &temp);
5251     }
5252 
5253     switch (field_type) {
5254       case DataType::Type::kBool:
5255       case DataType::Type::kUint8:
5256       case DataType::Type::kInt8:
5257         __ movb(field_addr, value.AsRegister<CpuRegister>());
5258         break;
5259       case DataType::Type::kUint16:
5260       case DataType::Type::kInt16:
5261         __ movw(field_addr, value.AsRegister<CpuRegister>());
5262         break;
5263       case DataType::Type::kUint32:
5264       case DataType::Type::kInt32:
5265       case DataType::Type::kReference:
5266         if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5267           CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5268           __ movl(temp, value.AsRegister<CpuRegister>());
5269           __ PoisonHeapReference(temp);
5270           __ movl(field_addr, temp);
5271         } else {
5272           __ movl(field_addr, value.AsRegister<CpuRegister>());
5273         }
5274         break;
5275       case DataType::Type::kUint64:
5276       case DataType::Type::kInt64:
5277         __ movq(field_addr, value.AsRegister<CpuRegister>());
5278         break;
5279       case DataType::Type::kFloat32:
5280         __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
5281         break;
5282       case DataType::Type::kFloat64:
5283         __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
5284         break;
5285       case DataType::Type::kVoid:
5286         LOG(FATAL) << "Unreachable type " << field_type;
5287         UNREACHABLE();
5288     }
5289 
5290     if (byte_swap) {
5291       // Restore byte order.
5292       CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5293       Bswap(value, field_type, &temp);
5294     }
5295   }
5296 
5297   if (!maybe_record_implicit_null_check_done) {
5298     codegen_->MaybeRecordImplicitNullCheck(instruction);
5299   }
5300 
5301   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index))) {
5302     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5303     CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5304     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
5305   }
5306 
5307   if (is_volatile) {
5308     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5309   }
5310 }
5311 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5312 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5313                                                     const FieldInfo& field_info,
5314                                                     bool value_can_be_null) {
5315   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5316 
5317   LocationSummary* locations = instruction->GetLocations();
5318   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
5319   bool is_volatile = field_info.IsVolatile();
5320   DataType::Type field_type = field_info.GetFieldType();
5321   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5322   bool is_predicated =
5323       instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
5324 
5325   NearLabel pred_is_null;
5326   if (is_predicated) {
5327     __ testl(base, base);
5328     __ j(kZero, &pred_is_null);
5329   }
5330 
5331   HandleFieldSet(instruction,
5332                  /*value_index=*/ 1,
5333                  /*extra_temp_index=*/ 1,
5334                  field_type,
5335                  Address(base, offset),
5336                  base,
5337                  is_volatile,
5338                  /*is_atomic=*/ false,
5339                  value_can_be_null);
5340 
5341   if (is_predicated) {
5342     __ Bind(&pred_is_null);
5343   }
5344 }
5345 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5346 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5347   HandleFieldSet(instruction, instruction->GetFieldInfo());
5348 }
5349 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5350 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5351   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5352 }
5353 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)5354 void LocationsBuilderX86_64::VisitPredicatedInstanceFieldGet(
5355     HPredicatedInstanceFieldGet* instruction) {
5356   HandleFieldGet(instruction);
5357 }
5358 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5359 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5360   HandleFieldGet(instruction);
5361 }
5362 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)5363 void InstructionCodeGeneratorX86_64::VisitPredicatedInstanceFieldGet(
5364     HPredicatedInstanceFieldGet* instruction) {
5365   NearLabel finish;
5366   LocationSummary* locations = instruction->GetLocations();
5367   CpuRegister target = locations->InAt(1).AsRegister<CpuRegister>();
5368   __ testl(target, target);
5369   __ j(kZero, &finish);
5370   HandleFieldGet(instruction, instruction->GetFieldInfo());
5371   __ Bind(&finish);
5372 }
5373 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5374 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5375   HandleFieldGet(instruction, instruction->GetFieldInfo());
5376 }
5377 
VisitStaticFieldGet(HStaticFieldGet * instruction)5378 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5379   HandleFieldGet(instruction);
5380 }
5381 
VisitStaticFieldGet(HStaticFieldGet * instruction)5382 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5383   HandleFieldGet(instruction, instruction->GetFieldInfo());
5384 }
5385 
VisitStaticFieldSet(HStaticFieldSet * instruction)5386 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5387   HandleFieldSet(instruction, instruction->GetFieldInfo());
5388 }
5389 
VisitStaticFieldSet(HStaticFieldSet * instruction)5390 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5391   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5392 }
5393 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5394 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5395   codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
5396 }
5397 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5398 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5399   __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
5400   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5401 }
5402 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5403 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
5404     HUnresolvedInstanceFieldGet* instruction) {
5405   FieldAccessCallingConventionX86_64 calling_convention;
5406   codegen_->CreateUnresolvedFieldLocationSummary(
5407       instruction, instruction->GetFieldType(), calling_convention);
5408 }
5409 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5410 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
5411     HUnresolvedInstanceFieldGet* instruction) {
5412   FieldAccessCallingConventionX86_64 calling_convention;
5413   codegen_->GenerateUnresolvedFieldAccess(instruction,
5414                                           instruction->GetFieldType(),
5415                                           instruction->GetFieldIndex(),
5416                                           instruction->GetDexPc(),
5417                                           calling_convention);
5418 }
5419 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5420 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5421     HUnresolvedInstanceFieldSet* instruction) {
5422   FieldAccessCallingConventionX86_64 calling_convention;
5423   codegen_->CreateUnresolvedFieldLocationSummary(
5424       instruction, instruction->GetFieldType(), calling_convention);
5425 }
5426 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5427 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5428     HUnresolvedInstanceFieldSet* instruction) {
5429   FieldAccessCallingConventionX86_64 calling_convention;
5430   codegen_->GenerateUnresolvedFieldAccess(instruction,
5431                                           instruction->GetFieldType(),
5432                                           instruction->GetFieldIndex(),
5433                                           instruction->GetDexPc(),
5434                                           calling_convention);
5435 }
5436 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5437 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5438     HUnresolvedStaticFieldGet* instruction) {
5439   FieldAccessCallingConventionX86_64 calling_convention;
5440   codegen_->CreateUnresolvedFieldLocationSummary(
5441       instruction, instruction->GetFieldType(), calling_convention);
5442 }
5443 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5444 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5445     HUnresolvedStaticFieldGet* instruction) {
5446   FieldAccessCallingConventionX86_64 calling_convention;
5447   codegen_->GenerateUnresolvedFieldAccess(instruction,
5448                                           instruction->GetFieldType(),
5449                                           instruction->GetFieldIndex(),
5450                                           instruction->GetDexPc(),
5451                                           calling_convention);
5452 }
5453 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5454 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5455     HUnresolvedStaticFieldSet* instruction) {
5456   FieldAccessCallingConventionX86_64 calling_convention;
5457   codegen_->CreateUnresolvedFieldLocationSummary(
5458       instruction, instruction->GetFieldType(), calling_convention);
5459 }
5460 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5461 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5462     HUnresolvedStaticFieldSet* instruction) {
5463   FieldAccessCallingConventionX86_64 calling_convention;
5464   codegen_->GenerateUnresolvedFieldAccess(instruction,
5465                                           instruction->GetFieldType(),
5466                                           instruction->GetFieldIndex(),
5467                                           instruction->GetDexPc(),
5468                                           calling_convention);
5469 }
5470 
VisitNullCheck(HNullCheck * instruction)5471 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5472   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5473   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5474       ? Location::RequiresRegister()
5475       : Location::Any();
5476   locations->SetInAt(0, loc);
5477 }
5478 
GenerateImplicitNullCheck(HNullCheck * instruction)5479 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5480   if (CanMoveNullCheckToUser(instruction)) {
5481     return;
5482   }
5483   LocationSummary* locations = instruction->GetLocations();
5484   Location obj = locations->InAt(0);
5485 
5486   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5487   RecordPcInfo(instruction, instruction->GetDexPc());
5488 }
5489 
GenerateExplicitNullCheck(HNullCheck * instruction)5490 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5491   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5492   AddSlowPath(slow_path);
5493 
5494   LocationSummary* locations = instruction->GetLocations();
5495   Location obj = locations->InAt(0);
5496 
5497   if (obj.IsRegister()) {
5498     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5499   } else if (obj.IsStackSlot()) {
5500     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5501   } else {
5502     DCHECK(obj.IsConstant()) << obj;
5503     DCHECK(obj.GetConstant()->IsNullConstant());
5504     __ jmp(slow_path->GetEntryLabel());
5505     return;
5506   }
5507   __ j(kEqual, slow_path->GetEntryLabel());
5508 }
5509 
VisitNullCheck(HNullCheck * instruction)5510 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5511   codegen_->GenerateNullCheck(instruction);
5512 }
5513 
VisitArrayGet(HArrayGet * instruction)5514 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5515   bool object_array_get_with_read_barrier =
5516       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5517   LocationSummary* locations =
5518       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5519                                                        object_array_get_with_read_barrier
5520                                                            ? LocationSummary::kCallOnSlowPath
5521                                                            : LocationSummary::kNoCall);
5522   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5523     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5524   }
5525   locations->SetInAt(0, Location::RequiresRegister());
5526   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5527   if (DataType::IsFloatingPointType(instruction->GetType())) {
5528     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5529   } else {
5530     // The output overlaps for an object array get when read barriers
5531     // are enabled: we do not want the move to overwrite the array's
5532     // location, as we need it to emit the read barrier.
5533     locations->SetOut(
5534         Location::RequiresRegister(),
5535         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5536   }
5537 }
5538 
VisitArrayGet(HArrayGet * instruction)5539 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5540   LocationSummary* locations = instruction->GetLocations();
5541   Location obj_loc = locations->InAt(0);
5542   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5543   Location index = locations->InAt(1);
5544   Location out_loc = locations->Out();
5545   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5546 
5547   DataType::Type type = instruction->GetType();
5548   if (type == DataType::Type::kReference) {
5549     static_assert(
5550         sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5551         "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5552     // /* HeapReference<Object> */ out =
5553     //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
5554     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5555       // Note that a potential implicit null check is handled in this
5556       // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5557       codegen_->GenerateArrayLoadWithBakerReadBarrier(
5558           instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5559     } else {
5560       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5561       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5562       codegen_->MaybeRecordImplicitNullCheck(instruction);
5563       // If read barriers are enabled, emit read barriers other than
5564       // Baker's using a slow path (and also unpoison the loaded
5565       // reference, if heap poisoning is enabled).
5566       if (index.IsConstant()) {
5567         uint32_t offset =
5568             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5569         codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5570       } else {
5571         codegen_->MaybeGenerateReadBarrierSlow(
5572             instruction, out_loc, out_loc, obj_loc, data_offset, index);
5573       }
5574     }
5575   } else {
5576     if (type == DataType::Type::kUint16
5577         && mirror::kUseStringCompression
5578         && instruction->IsStringCharAt()) {
5579       // Branch cases into compressed and uncompressed for each index's type.
5580       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5581       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5582       NearLabel done, not_compressed;
5583       __ testb(Address(obj, count_offset), Immediate(1));
5584       codegen_->MaybeRecordImplicitNullCheck(instruction);
5585       static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5586                     "Expecting 0=compressed, 1=uncompressed");
5587       __ j(kNotZero, &not_compressed);
5588       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5589       __ jmp(&done);
5590       __ Bind(&not_compressed);
5591       __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5592       __ Bind(&done);
5593     } else {
5594       ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
5595       Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, scale, data_offset);
5596       codegen_->LoadFromMemoryNoReference(type, out_loc, src);
5597     }
5598     codegen_->MaybeRecordImplicitNullCheck(instruction);
5599   }
5600 }
5601 
VisitArraySet(HArraySet * instruction)5602 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5603   DataType::Type value_type = instruction->GetComponentType();
5604 
5605   bool needs_write_barrier =
5606       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5607   bool needs_type_check = instruction->NeedsTypeCheck();
5608 
5609   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5610       instruction,
5611       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5612 
5613   locations->SetInAt(0, Location::RequiresRegister());
5614   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5615   if (DataType::IsFloatingPointType(value_type)) {
5616     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5617   } else {
5618     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5619   }
5620 
5621   if (needs_write_barrier) {
5622     // Temporary registers for the write barrier.
5623     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
5624     locations->AddTemp(Location::RequiresRegister());
5625   }
5626 }
5627 
VisitArraySet(HArraySet * instruction)5628 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5629   LocationSummary* locations = instruction->GetLocations();
5630   Location array_loc = locations->InAt(0);
5631   CpuRegister array = array_loc.AsRegister<CpuRegister>();
5632   Location index = locations->InAt(1);
5633   Location value = locations->InAt(2);
5634   DataType::Type value_type = instruction->GetComponentType();
5635   bool needs_type_check = instruction->NeedsTypeCheck();
5636   bool needs_write_barrier =
5637       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5638 
5639   switch (value_type) {
5640     case DataType::Type::kBool:
5641     case DataType::Type::kUint8:
5642     case DataType::Type::kInt8: {
5643       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5644       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5645       if (value.IsRegister()) {
5646         __ movb(address, value.AsRegister<CpuRegister>());
5647       } else {
5648         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5649       }
5650       codegen_->MaybeRecordImplicitNullCheck(instruction);
5651       break;
5652     }
5653 
5654     case DataType::Type::kUint16:
5655     case DataType::Type::kInt16: {
5656       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5657       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5658       if (value.IsRegister()) {
5659         __ movw(address, value.AsRegister<CpuRegister>());
5660       } else {
5661         DCHECK(value.IsConstant()) << value;
5662         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5663       }
5664       codegen_->MaybeRecordImplicitNullCheck(instruction);
5665       break;
5666     }
5667 
5668     case DataType::Type::kReference: {
5669       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5670       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5671 
5672       if (!value.IsRegister()) {
5673         // Just setting null.
5674         DCHECK(instruction->InputAt(2)->IsNullConstant());
5675         DCHECK(value.IsConstant()) << value;
5676         __ movl(address, Immediate(0));
5677         codegen_->MaybeRecordImplicitNullCheck(instruction);
5678         DCHECK(!needs_write_barrier);
5679         DCHECK(!needs_type_check);
5680         break;
5681       }
5682 
5683       DCHECK(needs_write_barrier);
5684       CpuRegister register_value = value.AsRegister<CpuRegister>();
5685       Location temp_loc = locations->GetTemp(0);
5686       CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5687 
5688       bool can_value_be_null = instruction->GetValueCanBeNull();
5689       NearLabel do_store;
5690       if (can_value_be_null) {
5691         __ testl(register_value, register_value);
5692         __ j(kEqual, &do_store);
5693       }
5694 
5695       SlowPathCode* slow_path = nullptr;
5696       if (needs_type_check) {
5697         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
5698         codegen_->AddSlowPath(slow_path);
5699 
5700         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5701         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5702         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5703 
5704         // Note that when Baker read barriers are enabled, the type
5705         // checks are performed without read barriers.  This is fine,
5706         // even in the case where a class object is in the from-space
5707         // after the flip, as a comparison involving such a type would
5708         // not produce a false positive; it may of course produce a
5709         // false negative, in which case we would take the ArraySet
5710         // slow path.
5711 
5712         // /* HeapReference<Class> */ temp = array->klass_
5713         __ movl(temp, Address(array, class_offset));
5714         codegen_->MaybeRecordImplicitNullCheck(instruction);
5715         __ MaybeUnpoisonHeapReference(temp);
5716 
5717         // /* HeapReference<Class> */ temp = temp->component_type_
5718         __ movl(temp, Address(temp, component_offset));
5719         // If heap poisoning is enabled, no need to unpoison `temp`
5720         // nor the object reference in `register_value->klass`, as
5721         // we are comparing two poisoned references.
5722         __ cmpl(temp, Address(register_value, class_offset));
5723 
5724         if (instruction->StaticTypeOfArrayIsObjectArray()) {
5725           NearLabel do_put;
5726           __ j(kEqual, &do_put);
5727           // If heap poisoning is enabled, the `temp` reference has
5728           // not been unpoisoned yet; unpoison it now.
5729           __ MaybeUnpoisonHeapReference(temp);
5730 
5731           // If heap poisoning is enabled, no need to unpoison the
5732           // heap reference loaded below, as it is only used for a
5733           // comparison with null.
5734           __ cmpl(Address(temp, super_offset), Immediate(0));
5735           __ j(kNotEqual, slow_path->GetEntryLabel());
5736           __ Bind(&do_put);
5737         } else {
5738           __ j(kNotEqual, slow_path->GetEntryLabel());
5739         }
5740       }
5741 
5742       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5743       codegen_->MarkGCCard(
5744           temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false);
5745 
5746       if (can_value_be_null) {
5747         DCHECK(do_store.IsLinked());
5748         __ Bind(&do_store);
5749       }
5750 
5751       Location source = value;
5752       if (kPoisonHeapReferences) {
5753         __ movl(temp, register_value);
5754         __ PoisonHeapReference(temp);
5755         source = temp_loc;
5756       }
5757 
5758       __ movl(address, source.AsRegister<CpuRegister>());
5759 
5760       if (can_value_be_null || !needs_type_check) {
5761         codegen_->MaybeRecordImplicitNullCheck(instruction);
5762       }
5763 
5764       if (slow_path != nullptr) {
5765         __ Bind(slow_path->GetExitLabel());
5766       }
5767 
5768       break;
5769     }
5770 
5771     case DataType::Type::kInt32: {
5772       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5773       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5774       if (value.IsRegister()) {
5775         __ movl(address, value.AsRegister<CpuRegister>());
5776       } else {
5777         DCHECK(value.IsConstant()) << value;
5778         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5779         __ movl(address, Immediate(v));
5780       }
5781       codegen_->MaybeRecordImplicitNullCheck(instruction);
5782       break;
5783     }
5784 
5785     case DataType::Type::kInt64: {
5786       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5787       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5788       if (value.IsRegister()) {
5789         __ movq(address, value.AsRegister<CpuRegister>());
5790         codegen_->MaybeRecordImplicitNullCheck(instruction);
5791       } else {
5792         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5793         Address address_high =
5794             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5795         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5796       }
5797       break;
5798     }
5799 
5800     case DataType::Type::kFloat32: {
5801       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5802       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5803       if (value.IsFpuRegister()) {
5804         __ movss(address, value.AsFpuRegister<XmmRegister>());
5805       } else {
5806         DCHECK(value.IsConstant());
5807         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5808         __ movl(address, Immediate(v));
5809       }
5810       codegen_->MaybeRecordImplicitNullCheck(instruction);
5811       break;
5812     }
5813 
5814     case DataType::Type::kFloat64: {
5815       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5816       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5817       if (value.IsFpuRegister()) {
5818         __ movsd(address, value.AsFpuRegister<XmmRegister>());
5819         codegen_->MaybeRecordImplicitNullCheck(instruction);
5820       } else {
5821         int64_t v =
5822             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5823         Address address_high =
5824             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5825         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5826       }
5827       break;
5828     }
5829 
5830     case DataType::Type::kUint32:
5831     case DataType::Type::kUint64:
5832     case DataType::Type::kVoid:
5833       LOG(FATAL) << "Unreachable type " << instruction->GetType();
5834       UNREACHABLE();
5835   }
5836 }
5837 
VisitArrayLength(HArrayLength * instruction)5838 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5839   LocationSummary* locations =
5840       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5841   locations->SetInAt(0, Location::RequiresRegister());
5842   if (!instruction->IsEmittedAtUseSite()) {
5843     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5844   }
5845 }
5846 
VisitArrayLength(HArrayLength * instruction)5847 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5848   if (instruction->IsEmittedAtUseSite()) {
5849     return;
5850   }
5851 
5852   LocationSummary* locations = instruction->GetLocations();
5853   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5854   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5855   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5856   __ movl(out, Address(obj, offset));
5857   codegen_->MaybeRecordImplicitNullCheck(instruction);
5858   // Mask out most significant bit in case the array is String's array of char.
5859   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5860     __ shrl(out, Immediate(1));
5861   }
5862 }
5863 
VisitBoundsCheck(HBoundsCheck * instruction)5864 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5865   RegisterSet caller_saves = RegisterSet::Empty();
5866   InvokeRuntimeCallingConvention calling_convention;
5867   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5868   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5869   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5870   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5871   HInstruction* length = instruction->InputAt(1);
5872   if (!length->IsEmittedAtUseSite()) {
5873     locations->SetInAt(1, Location::RegisterOrConstant(length));
5874   }
5875 }
5876 
VisitBoundsCheck(HBoundsCheck * instruction)5877 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5878   LocationSummary* locations = instruction->GetLocations();
5879   Location index_loc = locations->InAt(0);
5880   Location length_loc = locations->InAt(1);
5881   SlowPathCode* slow_path =
5882       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
5883 
5884   if (length_loc.IsConstant()) {
5885     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5886     if (index_loc.IsConstant()) {
5887       // BCE will remove the bounds check if we are guarenteed to pass.
5888       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5889       if (index < 0 || index >= length) {
5890         codegen_->AddSlowPath(slow_path);
5891         __ jmp(slow_path->GetEntryLabel());
5892       } else {
5893         // Some optimization after BCE may have generated this, and we should not
5894         // generate a bounds check if it is a valid range.
5895       }
5896       return;
5897     }
5898 
5899     // We have to reverse the jump condition because the length is the constant.
5900     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5901     __ cmpl(index_reg, Immediate(length));
5902     codegen_->AddSlowPath(slow_path);
5903     __ j(kAboveEqual, slow_path->GetEntryLabel());
5904   } else {
5905     HInstruction* array_length = instruction->InputAt(1);
5906     if (array_length->IsEmittedAtUseSite()) {
5907       // Address the length field in the array.
5908       DCHECK(array_length->IsArrayLength());
5909       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5910       Location array_loc = array_length->GetLocations()->InAt(0);
5911       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5912       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5913         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5914         // the string compression flag) with the in-memory length and avoid the temporary.
5915         CpuRegister length_reg = CpuRegister(TMP);
5916         __ movl(length_reg, array_len);
5917         codegen_->MaybeRecordImplicitNullCheck(array_length);
5918         __ shrl(length_reg, Immediate(1));
5919         codegen_->GenerateIntCompare(length_reg, index_loc);
5920       } else {
5921         // Checking the bound for general case:
5922         // Array of char or String's array when the compression feature off.
5923         if (index_loc.IsConstant()) {
5924           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5925           __ cmpl(array_len, Immediate(value));
5926         } else {
5927           __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5928         }
5929         codegen_->MaybeRecordImplicitNullCheck(array_length);
5930       }
5931     } else {
5932       codegen_->GenerateIntCompare(length_loc, index_loc);
5933     }
5934     codegen_->AddSlowPath(slow_path);
5935     __ j(kBelowEqual, slow_path->GetEntryLabel());
5936   }
5937 }
5938 
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5939 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5940                                      CpuRegister card,
5941                                      CpuRegister object,
5942                                      CpuRegister value,
5943                                      bool value_can_be_null) {
5944   NearLabel is_null;
5945   if (value_can_be_null) {
5946     __ testl(value, value);
5947     __ j(kEqual, &is_null);
5948   }
5949   // Load the address of the card table into `card`.
5950   __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5951                                         /* no_rip= */ true));
5952   // Calculate the offset (in the card table) of the card corresponding to
5953   // `object`.
5954   __ movq(temp, object);
5955   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5956   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5957   // `object`'s card.
5958   //
5959   // Register `card` contains the address of the card table. Note that the card
5960   // table's base is biased during its creation so that it always starts at an
5961   // address whose least-significant byte is equal to `kCardDirty` (see
5962   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5963   // below writes the `kCardDirty` (byte) value into the `object`'s card
5964   // (located at `card + object >> kCardShift`).
5965   //
5966   // This dual use of the value in register `card` (1. to calculate the location
5967   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5968   // (no need to explicitly load `kCardDirty` as an immediate value).
5969   __ movb(Address(temp, card, TIMES_1, 0), card);
5970   if (value_can_be_null) {
5971     __ Bind(&is_null);
5972   }
5973 }
5974 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5975 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5976   LOG(FATAL) << "Unimplemented";
5977 }
5978 
VisitParallelMove(HParallelMove * instruction)5979 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5980   if (instruction->GetNext()->IsSuspendCheck() &&
5981       instruction->GetBlock()->GetLoopInformation() != nullptr) {
5982     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5983     // The back edge will generate the suspend check.
5984     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5985   }
5986 
5987   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5988 }
5989 
VisitSuspendCheck(HSuspendCheck * instruction)5990 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5991   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5992       instruction, LocationSummary::kCallOnSlowPath);
5993   // In suspend check slow path, usually there are no caller-save registers at all.
5994   // If SIMD instructions are present, however, we force spilling all live SIMD
5995   // registers in full width (since the runtime only saves/restores lower part).
5996   locations->SetCustomSlowPathCallerSaves(
5997       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5998 }
5999 
VisitSuspendCheck(HSuspendCheck * instruction)6000 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6001   HBasicBlock* block = instruction->GetBlock();
6002   if (block->GetLoopInformation() != nullptr) {
6003     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6004     // The back edge will generate the suspend check.
6005     return;
6006   }
6007   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6008     // The goto will generate the suspend check.
6009     return;
6010   }
6011   GenerateSuspendCheck(instruction, nullptr);
6012 }
6013 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6014 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
6015                                                           HBasicBlock* successor) {
6016   SuspendCheckSlowPathX86_64* slow_path =
6017       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
6018   if (slow_path == nullptr) {
6019     slow_path =
6020         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
6021     instruction->SetSlowPath(slow_path);
6022     codegen_->AddSlowPath(slow_path);
6023     if (successor != nullptr) {
6024       DCHECK(successor->IsLoopHeader());
6025     }
6026   } else {
6027     DCHECK_EQ(slow_path->GetSuccessor(), successor);
6028   }
6029 
6030   __ gs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
6031                                    /* no_rip= */ true),
6032                  Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6033   if (successor == nullptr) {
6034     __ j(kNotZero, slow_path->GetEntryLabel());
6035     __ Bind(slow_path->GetReturnLabel());
6036   } else {
6037     __ j(kZero, codegen_->GetLabelOf(successor));
6038     __ jmp(slow_path->GetEntryLabel());
6039   }
6040 }
6041 
GetAssembler() const6042 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
6043   return codegen_->GetAssembler();
6044 }
6045 
EmitMove(size_t index)6046 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
6047   MoveOperands* move = moves_[index];
6048   Location source = move->GetSource();
6049   Location destination = move->GetDestination();
6050 
6051   if (source.IsRegister()) {
6052     if (destination.IsRegister()) {
6053       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
6054     } else if (destination.IsStackSlot()) {
6055       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
6056               source.AsRegister<CpuRegister>());
6057     } else {
6058       DCHECK(destination.IsDoubleStackSlot());
6059       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
6060               source.AsRegister<CpuRegister>());
6061     }
6062   } else if (source.IsStackSlot()) {
6063     if (destination.IsRegister()) {
6064       __ movl(destination.AsRegister<CpuRegister>(),
6065               Address(CpuRegister(RSP), source.GetStackIndex()));
6066     } else if (destination.IsFpuRegister()) {
6067       __ movss(destination.AsFpuRegister<XmmRegister>(),
6068               Address(CpuRegister(RSP), source.GetStackIndex()));
6069     } else {
6070       DCHECK(destination.IsStackSlot());
6071       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6072       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6073     }
6074   } else if (source.IsDoubleStackSlot()) {
6075     if (destination.IsRegister()) {
6076       __ movq(destination.AsRegister<CpuRegister>(),
6077               Address(CpuRegister(RSP), source.GetStackIndex()));
6078     } else if (destination.IsFpuRegister()) {
6079       __ movsd(destination.AsFpuRegister<XmmRegister>(),
6080                Address(CpuRegister(RSP), source.GetStackIndex()));
6081     } else {
6082       DCHECK(destination.IsDoubleStackSlot()) << destination;
6083       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6084       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6085     }
6086   } else if (source.IsSIMDStackSlot()) {
6087     if (destination.IsFpuRegister()) {
6088       __ movups(destination.AsFpuRegister<XmmRegister>(),
6089                 Address(CpuRegister(RSP), source.GetStackIndex()));
6090     } else {
6091       DCHECK(destination.IsSIMDStackSlot());
6092       size_t high = kX86_64WordSize;
6093       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6094       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6095       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
6096       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
6097     }
6098   } else if (source.IsConstant()) {
6099     HConstant* constant = source.GetConstant();
6100     if (constant->IsIntConstant() || constant->IsNullConstant()) {
6101       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6102       if (destination.IsRegister()) {
6103         if (value == 0) {
6104           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6105         } else {
6106           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
6107         }
6108       } else {
6109         DCHECK(destination.IsStackSlot()) << destination;
6110         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
6111       }
6112     } else if (constant->IsLongConstant()) {
6113       int64_t value = constant->AsLongConstant()->GetValue();
6114       if (destination.IsRegister()) {
6115         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
6116       } else {
6117         DCHECK(destination.IsDoubleStackSlot()) << destination;
6118         codegen_->Store64BitValueToStack(destination, value);
6119       }
6120     } else if (constant->IsFloatConstant()) {
6121       float fp_value = constant->AsFloatConstant()->GetValue();
6122       if (destination.IsFpuRegister()) {
6123         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6124         codegen_->Load32BitValue(dest, fp_value);
6125       } else {
6126         DCHECK(destination.IsStackSlot()) << destination;
6127         Immediate imm(bit_cast<int32_t, float>(fp_value));
6128         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
6129       }
6130     } else {
6131       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
6132       double fp_value =  constant->AsDoubleConstant()->GetValue();
6133       int64_t value = bit_cast<int64_t, double>(fp_value);
6134       if (destination.IsFpuRegister()) {
6135         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6136         codegen_->Load64BitValue(dest, fp_value);
6137       } else {
6138         DCHECK(destination.IsDoubleStackSlot()) << destination;
6139         codegen_->Store64BitValueToStack(destination, value);
6140       }
6141     }
6142   } else if (source.IsFpuRegister()) {
6143     if (destination.IsFpuRegister()) {
6144       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6145     } else if (destination.IsStackSlot()) {
6146       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
6147                source.AsFpuRegister<XmmRegister>());
6148     } else if (destination.IsDoubleStackSlot()) {
6149       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
6150                source.AsFpuRegister<XmmRegister>());
6151     } else {
6152        DCHECK(destination.IsSIMDStackSlot());
6153       __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
6154                 source.AsFpuRegister<XmmRegister>());
6155     }
6156   }
6157 }
6158 
Exchange32(CpuRegister reg,int mem)6159 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
6160   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6161   __ movl(Address(CpuRegister(RSP), mem), reg);
6162   __ movl(reg, CpuRegister(TMP));
6163 }
6164 
Exchange64(CpuRegister reg1,CpuRegister reg2)6165 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
6166   __ movq(CpuRegister(TMP), reg1);
6167   __ movq(reg1, reg2);
6168   __ movq(reg2, CpuRegister(TMP));
6169 }
6170 
Exchange64(CpuRegister reg,int mem)6171 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
6172   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6173   __ movq(Address(CpuRegister(RSP), mem), reg);
6174   __ movq(reg, CpuRegister(TMP));
6175 }
6176 
Exchange32(XmmRegister reg,int mem)6177 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
6178   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6179   __ movss(Address(CpuRegister(RSP), mem), reg);
6180   __ movd(reg, CpuRegister(TMP));
6181 }
6182 
Exchange64(XmmRegister reg,int mem)6183 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
6184   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6185   __ movsd(Address(CpuRegister(RSP), mem), reg);
6186   __ movd(reg, CpuRegister(TMP));
6187 }
6188 
Exchange128(XmmRegister reg,int mem)6189 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
6190   size_t extra_slot = 2 * kX86_64WordSize;
6191   __ subq(CpuRegister(RSP), Immediate(extra_slot));
6192   __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
6193   ExchangeMemory64(0, mem + extra_slot, 2);
6194   __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
6195   __ addq(CpuRegister(RSP), Immediate(extra_slot));
6196 }
6197 
ExchangeMemory32(int mem1,int mem2)6198 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
6199   ScratchRegisterScope ensure_scratch(
6200       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6201 
6202   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6203   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
6204   __ movl(CpuRegister(ensure_scratch.GetRegister()),
6205           Address(CpuRegister(RSP), mem2 + stack_offset));
6206   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
6207   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
6208           CpuRegister(ensure_scratch.GetRegister()));
6209 }
6210 
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)6211 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
6212   ScratchRegisterScope ensure_scratch(
6213       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6214 
6215   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6216 
6217   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6218   for (int i = 0; i < num_of_qwords; i++) {
6219     __ movq(CpuRegister(TMP),
6220             Address(CpuRegister(RSP), mem1 + stack_offset));
6221     __ movq(CpuRegister(ensure_scratch.GetRegister()),
6222             Address(CpuRegister(RSP), mem2 + stack_offset));
6223     __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
6224             CpuRegister(TMP));
6225     __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
6226             CpuRegister(ensure_scratch.GetRegister()));
6227     stack_offset += kX86_64WordSize;
6228   }
6229 }
6230 
EmitSwap(size_t index)6231 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
6232   MoveOperands* move = moves_[index];
6233   Location source = move->GetSource();
6234   Location destination = move->GetDestination();
6235 
6236   if (source.IsRegister() && destination.IsRegister()) {
6237     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6238   } else if (source.IsRegister() && destination.IsStackSlot()) {
6239     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6240   } else if (source.IsStackSlot() && destination.IsRegister()) {
6241     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6242   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6243     ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
6244   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
6245     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6246   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
6247     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6248   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
6249     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
6250   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6251     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
6252     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6253     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
6254   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6255     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6256   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
6257     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6258   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6259     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6260   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
6261     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6262   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6263     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
6264   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6265     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6266   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6267     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6268   } else {
6269     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
6270   }
6271 }
6272 
6273 
SpillScratch(int reg)6274 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
6275   __ pushq(CpuRegister(reg));
6276 }
6277 
6278 
RestoreScratch(int reg)6279 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
6280   __ popq(CpuRegister(reg));
6281 }
6282 
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)6283 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
6284     SlowPathCode* slow_path, CpuRegister class_reg) {
6285   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
6286   const size_t status_byte_offset =
6287       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
6288   constexpr uint32_t shifted_visibly_initialized_value =
6289       enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
6290 
6291   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
6292   __ j(kBelow, slow_path->GetEntryLabel());
6293   __ Bind(slow_path->GetExitLabel());
6294 }
6295 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)6296 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6297                                                                        CpuRegister temp) {
6298   uint32_t path_to_root = check->GetBitstringPathToRoot();
6299   uint32_t mask = check->GetBitstringMask();
6300   DCHECK(IsPowerOfTwo(mask + 1));
6301   size_t mask_bits = WhichPowerOf2(mask + 1);
6302 
6303   if (mask_bits == 16u) {
6304     // Compare the bitstring in memory.
6305     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6306   } else {
6307     // /* uint32_t */ temp = temp->status_
6308     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6309     // Compare the bitstring bits using SUB.
6310     __ subl(temp, Immediate(path_to_root));
6311     // Shift out bits that do not contribute to the comparison.
6312     __ shll(temp, Immediate(32u - mask_bits));
6313   }
6314 }
6315 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6316 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
6317     HLoadClass::LoadKind desired_class_load_kind) {
6318   switch (desired_class_load_kind) {
6319     case HLoadClass::LoadKind::kInvalid:
6320       LOG(FATAL) << "UNREACHABLE";
6321       UNREACHABLE();
6322     case HLoadClass::LoadKind::kReferrersClass:
6323       break;
6324     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6325     case HLoadClass::LoadKind::kBootImageRelRo:
6326     case HLoadClass::LoadKind::kBssEntry:
6327     case HLoadClass::LoadKind::kBssEntryPublic:
6328     case HLoadClass::LoadKind::kBssEntryPackage:
6329       DCHECK(!GetCompilerOptions().IsJitCompiler());
6330       break;
6331     case HLoadClass::LoadKind::kJitBootImageAddress:
6332     case HLoadClass::LoadKind::kJitTableAddress:
6333       DCHECK(GetCompilerOptions().IsJitCompiler());
6334       break;
6335     case HLoadClass::LoadKind::kRuntimeCall:
6336       break;
6337   }
6338   return desired_class_load_kind;
6339 }
6340 
VisitLoadClass(HLoadClass * cls)6341 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
6342   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6343   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6344     // Custom calling convention: RAX serves as both input and output.
6345     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6346         cls,
6347         Location::RegisterLocation(RAX),
6348         Location::RegisterLocation(RAX));
6349     return;
6350   }
6351   DCHECK_EQ(cls->NeedsAccessCheck(),
6352             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6353                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6354 
6355   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6356   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6357       ? LocationSummary::kCallOnSlowPath
6358       : LocationSummary::kNoCall;
6359   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6360   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6361     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6362   }
6363 
6364   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6365     locations->SetInAt(0, Location::RequiresRegister());
6366   }
6367   locations->SetOut(Location::RequiresRegister());
6368   if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6369     if (!kUseReadBarrier || kUseBakerReadBarrier) {
6370       // Rely on the type resolution and/or initialization to save everything.
6371       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6372     } else {
6373       // For non-Baker read barrier we have a temp-clobbering call.
6374     }
6375   }
6376 }
6377 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6378 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6379                                                  dex::TypeIndex type_index,
6380                                                  Handle<mirror::Class> handle) {
6381   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6382   // Add a patch entry and return the label.
6383   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6384   PatchInfo<Label>* info = &jit_class_patches_.back();
6385   return &info->label;
6386 }
6387 
6388 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6389 // move.
VisitLoadClass(HLoadClass * cls)6390 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6391   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6392   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6393     codegen_->GenerateLoadClassRuntimeCall(cls);
6394     return;
6395   }
6396   DCHECK_EQ(cls->NeedsAccessCheck(),
6397             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6398                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6399 
6400   LocationSummary* locations = cls->GetLocations();
6401   Location out_loc = locations->Out();
6402   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6403 
6404   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6405       ? kWithoutReadBarrier
6406       : kCompilerReadBarrierOption;
6407   bool generate_null_check = false;
6408   switch (load_kind) {
6409     case HLoadClass::LoadKind::kReferrersClass: {
6410       DCHECK(!cls->CanCallRuntime());
6411       DCHECK(!cls->MustGenerateClinitCheck());
6412       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6413       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6414       GenerateGcRootFieldLoad(
6415           cls,
6416           out_loc,
6417           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6418           /* fixup_label= */ nullptr,
6419           read_barrier_option);
6420       break;
6421     }
6422     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6423       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6424              codegen_->GetCompilerOptions().IsBootImageExtension());
6425       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6426       __ leal(out,
6427               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6428       codegen_->RecordBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6429       break;
6430     case HLoadClass::LoadKind::kBootImageRelRo: {
6431       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6432       __ movl(out,
6433               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6434       codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(cls));
6435       break;
6436     }
6437     case HLoadClass::LoadKind::kBssEntry:
6438     case HLoadClass::LoadKind::kBssEntryPublic:
6439     case HLoadClass::LoadKind::kBssEntryPackage: {
6440       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6441                                           /* no_rip= */ false);
6442       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6443       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6444       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6445       // No need for memory fence, thanks to the x86-64 memory model.
6446       generate_null_check = true;
6447       break;
6448     }
6449     case HLoadClass::LoadKind::kJitBootImageAddress: {
6450       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6451       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6452       DCHECK_NE(address, 0u);
6453       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6454       break;
6455     }
6456     case HLoadClass::LoadKind::kJitTableAddress: {
6457       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6458                                           /* no_rip= */ true);
6459       Label* fixup_label =
6460           codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6461       // /* GcRoot<mirror::Class> */ out = *address
6462       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6463       break;
6464     }
6465     default:
6466       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6467       UNREACHABLE();
6468   }
6469 
6470   if (generate_null_check || cls->MustGenerateClinitCheck()) {
6471     DCHECK(cls->CanCallRuntime());
6472     SlowPathCode* slow_path =
6473         new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6474     codegen_->AddSlowPath(slow_path);
6475     if (generate_null_check) {
6476       __ testl(out, out);
6477       __ j(kEqual, slow_path->GetEntryLabel());
6478     }
6479     if (cls->MustGenerateClinitCheck()) {
6480       GenerateClassInitializationCheck(slow_path, out);
6481     } else {
6482       __ Bind(slow_path->GetExitLabel());
6483     }
6484   }
6485 }
6486 
VisitClinitCheck(HClinitCheck * check)6487 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6488   LocationSummary* locations =
6489       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6490   locations->SetInAt(0, Location::RequiresRegister());
6491   if (check->HasUses()) {
6492     locations->SetOut(Location::SameAsFirstInput());
6493   }
6494   // Rely on the type initialization to save everything we need.
6495   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6496 }
6497 
VisitLoadMethodHandle(HLoadMethodHandle * load)6498 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6499   // Custom calling convention: RAX serves as both input and output.
6500   Location location = Location::RegisterLocation(RAX);
6501   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6502 }
6503 
VisitLoadMethodHandle(HLoadMethodHandle * load)6504 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6505   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6506 }
6507 
VisitLoadMethodType(HLoadMethodType * load)6508 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6509   // Custom calling convention: RAX serves as both input and output.
6510   Location location = Location::RegisterLocation(RAX);
6511   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6512 }
6513 
VisitLoadMethodType(HLoadMethodType * load)6514 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6515   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6516 }
6517 
VisitClinitCheck(HClinitCheck * check)6518 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6519   // We assume the class to not be null.
6520   SlowPathCode* slow_path =
6521       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6522   codegen_->AddSlowPath(slow_path);
6523   GenerateClassInitializationCheck(slow_path,
6524                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6525 }
6526 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6527 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6528     HLoadString::LoadKind desired_string_load_kind) {
6529   switch (desired_string_load_kind) {
6530     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6531     case HLoadString::LoadKind::kBootImageRelRo:
6532     case HLoadString::LoadKind::kBssEntry:
6533       DCHECK(!GetCompilerOptions().IsJitCompiler());
6534       break;
6535     case HLoadString::LoadKind::kJitBootImageAddress:
6536     case HLoadString::LoadKind::kJitTableAddress:
6537       DCHECK(GetCompilerOptions().IsJitCompiler());
6538       break;
6539     case HLoadString::LoadKind::kRuntimeCall:
6540       break;
6541   }
6542   return desired_string_load_kind;
6543 }
6544 
VisitLoadString(HLoadString * load)6545 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6546   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6547   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6548   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6549     locations->SetOut(Location::RegisterLocation(RAX));
6550   } else {
6551     locations->SetOut(Location::RequiresRegister());
6552     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6553       if (!kUseReadBarrier || kUseBakerReadBarrier) {
6554         // Rely on the pResolveString to save everything.
6555         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6556       } else {
6557         // For non-Baker read barrier we have a temp-clobbering call.
6558       }
6559     }
6560   }
6561 }
6562 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6563 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6564                                                   dex::StringIndex string_index,
6565                                                   Handle<mirror::String> handle) {
6566   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6567   // Add a patch entry and return the label.
6568   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6569   PatchInfo<Label>* info = &jit_string_patches_.back();
6570   return &info->label;
6571 }
6572 
6573 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6574 // move.
VisitLoadString(HLoadString * load)6575 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6576   LocationSummary* locations = load->GetLocations();
6577   Location out_loc = locations->Out();
6578   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6579 
6580   switch (load->GetLoadKind()) {
6581     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6582       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6583              codegen_->GetCompilerOptions().IsBootImageExtension());
6584       __ leal(out,
6585               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6586       codegen_->RecordBootImageStringPatch(load);
6587       return;
6588     }
6589     case HLoadString::LoadKind::kBootImageRelRo: {
6590       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6591       __ movl(out,
6592               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6593       codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(load));
6594       return;
6595     }
6596     case HLoadString::LoadKind::kBssEntry: {
6597       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6598                                           /* no_rip= */ false);
6599       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6600       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6601       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6602       // No need for memory fence, thanks to the x86-64 memory model.
6603       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
6604       codegen_->AddSlowPath(slow_path);
6605       __ testl(out, out);
6606       __ j(kEqual, slow_path->GetEntryLabel());
6607       __ Bind(slow_path->GetExitLabel());
6608       return;
6609     }
6610     case HLoadString::LoadKind::kJitBootImageAddress: {
6611       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6612       DCHECK_NE(address, 0u);
6613       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6614       return;
6615     }
6616     case HLoadString::LoadKind::kJitTableAddress: {
6617       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6618                                           /* no_rip= */ true);
6619       Label* fixup_label = codegen_->NewJitRootStringPatch(
6620           load->GetDexFile(), load->GetStringIndex(), load->GetString());
6621       // /* GcRoot<mirror::String> */ out = *address
6622       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6623       return;
6624     }
6625     default:
6626       break;
6627   }
6628 
6629   // TODO: Re-add the compiler code to do string dex cache lookup again.
6630   // Custom calling convention: RAX serves as both input and output.
6631   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
6632   codegen_->InvokeRuntime(kQuickResolveString,
6633                           load,
6634                           load->GetDexPc());
6635   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6636 }
6637 
GetExceptionTlsAddress()6638 static Address GetExceptionTlsAddress() {
6639   return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
6640                            /* no_rip= */ true);
6641 }
6642 
VisitLoadException(HLoadException * load)6643 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
6644   LocationSummary* locations =
6645       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6646   locations->SetOut(Location::RequiresRegister());
6647 }
6648 
VisitLoadException(HLoadException * load)6649 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
6650   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
6651 }
6652 
VisitClearException(HClearException * clear)6653 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
6654   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6655 }
6656 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6657 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6658   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
6659 }
6660 
VisitThrow(HThrow * instruction)6661 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
6662   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6663       instruction, LocationSummary::kCallOnMainOnly);
6664   InvokeRuntimeCallingConvention calling_convention;
6665   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6666 }
6667 
VisitThrow(HThrow * instruction)6668 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
6669   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6670   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6671 }
6672 
6673 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6674 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6675   if (kEmitCompilerReadBarrier &&
6676       !kUseBakerReadBarrier &&
6677       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6678        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6679        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6680     return 1;
6681   }
6682   return 0;
6683 }
6684 
6685 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
6686 // interface pointer, the current interface is compared in memory.
6687 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6688 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6689   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6690     return 2;
6691   }
6692   return 1 + NumberOfInstanceOfTemps(type_check_kind);
6693 }
6694 
VisitInstanceOf(HInstanceOf * instruction)6695 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6696   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6697   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6698   bool baker_read_barrier_slow_path = false;
6699   switch (type_check_kind) {
6700     case TypeCheckKind::kExactCheck:
6701     case TypeCheckKind::kAbstractClassCheck:
6702     case TypeCheckKind::kClassHierarchyCheck:
6703     case TypeCheckKind::kArrayObjectCheck: {
6704       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
6705       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6706       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
6707       break;
6708     }
6709     case TypeCheckKind::kArrayCheck:
6710     case TypeCheckKind::kUnresolvedCheck:
6711     case TypeCheckKind::kInterfaceCheck:
6712       call_kind = LocationSummary::kCallOnSlowPath;
6713       break;
6714     case TypeCheckKind::kBitstringCheck:
6715       break;
6716   }
6717 
6718   LocationSummary* locations =
6719       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6720   if (baker_read_barrier_slow_path) {
6721     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6722   }
6723   locations->SetInAt(0, Location::RequiresRegister());
6724   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6725     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6726     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6727     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6728   } else {
6729     locations->SetInAt(1, Location::Any());
6730   }
6731   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
6732   locations->SetOut(Location::RequiresRegister());
6733   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6734 }
6735 
VisitInstanceOf(HInstanceOf * instruction)6736 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6737   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6738   LocationSummary* locations = instruction->GetLocations();
6739   Location obj_loc = locations->InAt(0);
6740   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6741   Location cls = locations->InAt(1);
6742   Location out_loc =  locations->Out();
6743   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6744   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6745   DCHECK_LE(num_temps, 1u);
6746   Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
6747   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6748   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6749   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6750   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6751   SlowPathCode* slow_path = nullptr;
6752   NearLabel done, zero;
6753 
6754   // Return 0 if `obj` is null.
6755   // Avoid null check if we know obj is not null.
6756   if (instruction->MustDoNullCheck()) {
6757     __ testl(obj, obj);
6758     __ j(kEqual, &zero);
6759   }
6760 
6761   switch (type_check_kind) {
6762     case TypeCheckKind::kExactCheck: {
6763       ReadBarrierOption read_barrier_option =
6764           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6765       // /* HeapReference<Class> */ out = obj->klass_
6766       GenerateReferenceLoadTwoRegisters(instruction,
6767                                         out_loc,
6768                                         obj_loc,
6769                                         class_offset,
6770                                         read_barrier_option);
6771       if (cls.IsRegister()) {
6772         __ cmpl(out, cls.AsRegister<CpuRegister>());
6773       } else {
6774         DCHECK(cls.IsStackSlot()) << cls;
6775         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6776       }
6777       if (zero.IsLinked()) {
6778         // Classes must be equal for the instanceof to succeed.
6779         __ j(kNotEqual, &zero);
6780         __ movl(out, Immediate(1));
6781         __ jmp(&done);
6782       } else {
6783         __ setcc(kEqual, out);
6784         // setcc only sets the low byte.
6785         __ andl(out, Immediate(1));
6786       }
6787       break;
6788     }
6789 
6790     case TypeCheckKind::kAbstractClassCheck: {
6791       ReadBarrierOption read_barrier_option =
6792           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6793       // /* HeapReference<Class> */ out = obj->klass_
6794       GenerateReferenceLoadTwoRegisters(instruction,
6795                                         out_loc,
6796                                         obj_loc,
6797                                         class_offset,
6798                                         read_barrier_option);
6799       // If the class is abstract, we eagerly fetch the super class of the
6800       // object to avoid doing a comparison we know will fail.
6801       NearLabel loop, success;
6802       __ Bind(&loop);
6803       // /* HeapReference<Class> */ out = out->super_class_
6804       GenerateReferenceLoadOneRegister(instruction,
6805                                        out_loc,
6806                                        super_offset,
6807                                        maybe_temp_loc,
6808                                        read_barrier_option);
6809       __ testl(out, out);
6810       // If `out` is null, we use it for the result, and jump to `done`.
6811       __ j(kEqual, &done);
6812       if (cls.IsRegister()) {
6813         __ cmpl(out, cls.AsRegister<CpuRegister>());
6814       } else {
6815         DCHECK(cls.IsStackSlot()) << cls;
6816         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6817       }
6818       __ j(kNotEqual, &loop);
6819       __ movl(out, Immediate(1));
6820       if (zero.IsLinked()) {
6821         __ jmp(&done);
6822       }
6823       break;
6824     }
6825 
6826     case TypeCheckKind::kClassHierarchyCheck: {
6827       ReadBarrierOption read_barrier_option =
6828           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6829       // /* HeapReference<Class> */ out = obj->klass_
6830       GenerateReferenceLoadTwoRegisters(instruction,
6831                                         out_loc,
6832                                         obj_loc,
6833                                         class_offset,
6834                                         read_barrier_option);
6835       // Walk over the class hierarchy to find a match.
6836       NearLabel loop, success;
6837       __ Bind(&loop);
6838       if (cls.IsRegister()) {
6839         __ cmpl(out, cls.AsRegister<CpuRegister>());
6840       } else {
6841         DCHECK(cls.IsStackSlot()) << cls;
6842         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6843       }
6844       __ j(kEqual, &success);
6845       // /* HeapReference<Class> */ out = out->super_class_
6846       GenerateReferenceLoadOneRegister(instruction,
6847                                        out_loc,
6848                                        super_offset,
6849                                        maybe_temp_loc,
6850                                        read_barrier_option);
6851       __ testl(out, out);
6852       __ j(kNotEqual, &loop);
6853       // If `out` is null, we use it for the result, and jump to `done`.
6854       __ jmp(&done);
6855       __ Bind(&success);
6856       __ movl(out, Immediate(1));
6857       if (zero.IsLinked()) {
6858         __ jmp(&done);
6859       }
6860       break;
6861     }
6862 
6863     case TypeCheckKind::kArrayObjectCheck: {
6864       ReadBarrierOption read_barrier_option =
6865           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6866       // /* HeapReference<Class> */ out = obj->klass_
6867       GenerateReferenceLoadTwoRegisters(instruction,
6868                                         out_loc,
6869                                         obj_loc,
6870                                         class_offset,
6871                                         read_barrier_option);
6872       // Do an exact check.
6873       NearLabel exact_check;
6874       if (cls.IsRegister()) {
6875         __ cmpl(out, cls.AsRegister<CpuRegister>());
6876       } else {
6877         DCHECK(cls.IsStackSlot()) << cls;
6878         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6879       }
6880       __ j(kEqual, &exact_check);
6881       // Otherwise, we need to check that the object's class is a non-primitive array.
6882       // /* HeapReference<Class> */ out = out->component_type_
6883       GenerateReferenceLoadOneRegister(instruction,
6884                                        out_loc,
6885                                        component_offset,
6886                                        maybe_temp_loc,
6887                                        read_barrier_option);
6888       __ testl(out, out);
6889       // If `out` is null, we use it for the result, and jump to `done`.
6890       __ j(kEqual, &done);
6891       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6892       __ j(kNotEqual, &zero);
6893       __ Bind(&exact_check);
6894       __ movl(out, Immediate(1));
6895       __ jmp(&done);
6896       break;
6897     }
6898 
6899     case TypeCheckKind::kArrayCheck: {
6900       // No read barrier since the slow path will retry upon failure.
6901       // /* HeapReference<Class> */ out = obj->klass_
6902       GenerateReferenceLoadTwoRegisters(instruction,
6903                                         out_loc,
6904                                         obj_loc,
6905                                         class_offset,
6906                                         kWithoutReadBarrier);
6907       if (cls.IsRegister()) {
6908         __ cmpl(out, cls.AsRegister<CpuRegister>());
6909       } else {
6910         DCHECK(cls.IsStackSlot()) << cls;
6911         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6912       }
6913       DCHECK(locations->OnlyCallsOnSlowPath());
6914       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6915           instruction, /* is_fatal= */ false);
6916       codegen_->AddSlowPath(slow_path);
6917       __ j(kNotEqual, slow_path->GetEntryLabel());
6918       __ movl(out, Immediate(1));
6919       if (zero.IsLinked()) {
6920         __ jmp(&done);
6921       }
6922       break;
6923     }
6924 
6925     case TypeCheckKind::kUnresolvedCheck:
6926     case TypeCheckKind::kInterfaceCheck: {
6927       // Note that we indeed only call on slow path, but we always go
6928       // into the slow path for the unresolved and interface check
6929       // cases.
6930       //
6931       // We cannot directly call the InstanceofNonTrivial runtime
6932       // entry point without resorting to a type checking slow path
6933       // here (i.e. by calling InvokeRuntime directly), as it would
6934       // require to assign fixed registers for the inputs of this
6935       // HInstanceOf instruction (following the runtime calling
6936       // convention), which might be cluttered by the potential first
6937       // read barrier emission at the beginning of this method.
6938       //
6939       // TODO: Introduce a new runtime entry point taking the object
6940       // to test (instead of its class) as argument, and let it deal
6941       // with the read barrier issues. This will let us refactor this
6942       // case of the `switch` code as it was previously (with a direct
6943       // call to the runtime not using a type checking slow path).
6944       // This should also be beneficial for the other cases above.
6945       DCHECK(locations->OnlyCallsOnSlowPath());
6946       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6947           instruction, /* is_fatal= */ false);
6948       codegen_->AddSlowPath(slow_path);
6949       __ jmp(slow_path->GetEntryLabel());
6950       if (zero.IsLinked()) {
6951         __ jmp(&done);
6952       }
6953       break;
6954     }
6955 
6956     case TypeCheckKind::kBitstringCheck: {
6957       // /* HeapReference<Class> */ temp = obj->klass_
6958       GenerateReferenceLoadTwoRegisters(instruction,
6959                                         out_loc,
6960                                         obj_loc,
6961                                         class_offset,
6962                                         kWithoutReadBarrier);
6963 
6964       GenerateBitstringTypeCheckCompare(instruction, out);
6965       if (zero.IsLinked()) {
6966         __ j(kNotEqual, &zero);
6967         __ movl(out, Immediate(1));
6968         __ jmp(&done);
6969       } else {
6970         __ setcc(kEqual, out);
6971         // setcc only sets the low byte.
6972         __ andl(out, Immediate(1));
6973       }
6974       break;
6975     }
6976   }
6977 
6978   if (zero.IsLinked()) {
6979     __ Bind(&zero);
6980     __ xorl(out, out);
6981   }
6982 
6983   if (done.IsLinked()) {
6984     __ Bind(&done);
6985   }
6986 
6987   if (slow_path != nullptr) {
6988     __ Bind(slow_path->GetExitLabel());
6989   }
6990 }
6991 
VisitCheckCast(HCheckCast * instruction)6992 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6993   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6994   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
6995   LocationSummary* locations =
6996       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6997   locations->SetInAt(0, Location::RequiresRegister());
6998   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6999     // Require a register for the interface check since there is a loop that compares the class to
7000     // a memory address.
7001     locations->SetInAt(1, Location::RequiresRegister());
7002   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7003     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7004     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7005     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7006   } else {
7007     locations->SetInAt(1, Location::Any());
7008   }
7009   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7010   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7011 }
7012 
VisitCheckCast(HCheckCast * instruction)7013 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
7014   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7015   LocationSummary* locations = instruction->GetLocations();
7016   Location obj_loc = locations->InAt(0);
7017   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7018   Location cls = locations->InAt(1);
7019   Location temp_loc = locations->GetTemp(0);
7020   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
7021   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7022   DCHECK_GE(num_temps, 1u);
7023   DCHECK_LE(num_temps, 2u);
7024   Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
7025   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7026   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7027   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7028   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7029   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7030   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7031   const uint32_t object_array_data_offset =
7032       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7033 
7034   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7035   SlowPathCode* type_check_slow_path =
7036       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7037           instruction, is_type_check_slow_path_fatal);
7038   codegen_->AddSlowPath(type_check_slow_path);
7039 
7040 
7041   NearLabel done;
7042   // Avoid null check if we know obj is not null.
7043   if (instruction->MustDoNullCheck()) {
7044     __ testl(obj, obj);
7045     __ j(kEqual, &done);
7046   }
7047 
7048   switch (type_check_kind) {
7049     case TypeCheckKind::kExactCheck:
7050     case TypeCheckKind::kArrayCheck: {
7051       // /* HeapReference<Class> */ temp = obj->klass_
7052       GenerateReferenceLoadTwoRegisters(instruction,
7053                                         temp_loc,
7054                                         obj_loc,
7055                                         class_offset,
7056                                         kWithoutReadBarrier);
7057       if (cls.IsRegister()) {
7058         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7059       } else {
7060         DCHECK(cls.IsStackSlot()) << cls;
7061         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7062       }
7063       // Jump to slow path for throwing the exception or doing a
7064       // more involved array check.
7065       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7066       break;
7067     }
7068 
7069     case TypeCheckKind::kAbstractClassCheck: {
7070       // /* HeapReference<Class> */ temp = obj->klass_
7071       GenerateReferenceLoadTwoRegisters(instruction,
7072                                         temp_loc,
7073                                         obj_loc,
7074                                         class_offset,
7075                                         kWithoutReadBarrier);
7076       // If the class is abstract, we eagerly fetch the super class of the
7077       // object to avoid doing a comparison we know will fail.
7078       NearLabel loop;
7079       __ Bind(&loop);
7080       // /* HeapReference<Class> */ temp = temp->super_class_
7081       GenerateReferenceLoadOneRegister(instruction,
7082                                        temp_loc,
7083                                        super_offset,
7084                                        maybe_temp2_loc,
7085                                        kWithoutReadBarrier);
7086 
7087       // If the class reference currently in `temp` is null, jump to the slow path to throw the
7088       // exception.
7089       __ testl(temp, temp);
7090       // Otherwise, compare the classes.
7091       __ j(kZero, type_check_slow_path->GetEntryLabel());
7092       if (cls.IsRegister()) {
7093         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7094       } else {
7095         DCHECK(cls.IsStackSlot()) << cls;
7096         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7097       }
7098       __ j(kNotEqual, &loop);
7099       break;
7100     }
7101 
7102     case TypeCheckKind::kClassHierarchyCheck: {
7103       // /* HeapReference<Class> */ temp = obj->klass_
7104       GenerateReferenceLoadTwoRegisters(instruction,
7105                                         temp_loc,
7106                                         obj_loc,
7107                                         class_offset,
7108                                         kWithoutReadBarrier);
7109       // Walk over the class hierarchy to find a match.
7110       NearLabel loop;
7111       __ Bind(&loop);
7112       if (cls.IsRegister()) {
7113         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7114       } else {
7115         DCHECK(cls.IsStackSlot()) << cls;
7116         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7117       }
7118       __ j(kEqual, &done);
7119 
7120       // /* HeapReference<Class> */ temp = temp->super_class_
7121       GenerateReferenceLoadOneRegister(instruction,
7122                                        temp_loc,
7123                                        super_offset,
7124                                        maybe_temp2_loc,
7125                                        kWithoutReadBarrier);
7126 
7127       // If the class reference currently in `temp` is not null, jump
7128       // back at the beginning of the loop.
7129       __ testl(temp, temp);
7130       __ j(kNotZero, &loop);
7131       // Otherwise, jump to the slow path to throw the exception.
7132       __ jmp(type_check_slow_path->GetEntryLabel());
7133       break;
7134     }
7135 
7136     case TypeCheckKind::kArrayObjectCheck: {
7137       // /* HeapReference<Class> */ temp = obj->klass_
7138       GenerateReferenceLoadTwoRegisters(instruction,
7139                                         temp_loc,
7140                                         obj_loc,
7141                                         class_offset,
7142                                         kWithoutReadBarrier);
7143       // Do an exact check.
7144       NearLabel check_non_primitive_component_type;
7145       if (cls.IsRegister()) {
7146         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7147       } else {
7148         DCHECK(cls.IsStackSlot()) << cls;
7149         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7150       }
7151       __ j(kEqual, &done);
7152 
7153       // Otherwise, we need to check that the object's class is a non-primitive array.
7154       // /* HeapReference<Class> */ temp = temp->component_type_
7155       GenerateReferenceLoadOneRegister(instruction,
7156                                        temp_loc,
7157                                        component_offset,
7158                                        maybe_temp2_loc,
7159                                        kWithoutReadBarrier);
7160 
7161       // If the component type is not null (i.e. the object is indeed
7162       // an array), jump to label `check_non_primitive_component_type`
7163       // to further check that this component type is not a primitive
7164       // type.
7165       __ testl(temp, temp);
7166       // Otherwise, jump to the slow path to throw the exception.
7167       __ j(kZero, type_check_slow_path->GetEntryLabel());
7168       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7169       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7170       break;
7171     }
7172 
7173     case TypeCheckKind::kUnresolvedCheck: {
7174       // We always go into the type check slow path for the unresolved case.
7175       //
7176       // We cannot directly call the CheckCast runtime entry point
7177       // without resorting to a type checking slow path here (i.e. by
7178       // calling InvokeRuntime directly), as it would require to
7179       // assign fixed registers for the inputs of this HInstanceOf
7180       // instruction (following the runtime calling convention), which
7181       // might be cluttered by the potential first read barrier
7182       // emission at the beginning of this method.
7183       __ jmp(type_check_slow_path->GetEntryLabel());
7184       break;
7185     }
7186 
7187     case TypeCheckKind::kInterfaceCheck: {
7188       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7189       // We can not get false positives by doing this.
7190       // /* HeapReference<Class> */ temp = obj->klass_
7191       GenerateReferenceLoadTwoRegisters(instruction,
7192                                         temp_loc,
7193                                         obj_loc,
7194                                         class_offset,
7195                                         kWithoutReadBarrier);
7196 
7197       // /* HeapReference<Class> */ temp = temp->iftable_
7198       GenerateReferenceLoadTwoRegisters(instruction,
7199                                         temp_loc,
7200                                         temp_loc,
7201                                         iftable_offset,
7202                                         kWithoutReadBarrier);
7203       // Iftable is never null.
7204       __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
7205       // Maybe poison the `cls` for direct comparison with memory.
7206       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7207       // Loop through the iftable and check if any class matches.
7208       NearLabel start_loop;
7209       __ Bind(&start_loop);
7210       // Need to subtract first to handle the empty array case.
7211       __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
7212       __ j(kNegative, type_check_slow_path->GetEntryLabel());
7213       // Go to next interface if the classes do not match.
7214       __ cmpl(cls.AsRegister<CpuRegister>(),
7215               CodeGeneratorX86_64::ArrayAddress(temp,
7216                                                 maybe_temp2_loc,
7217                                                 TIMES_4,
7218                                                 object_array_data_offset));
7219       __ j(kNotEqual, &start_loop);  // Return if same class.
7220       // If `cls` was poisoned above, unpoison it.
7221       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7222       break;
7223     }
7224 
7225     case TypeCheckKind::kBitstringCheck: {
7226       // /* HeapReference<Class> */ temp = obj->klass_
7227       GenerateReferenceLoadTwoRegisters(instruction,
7228                                         temp_loc,
7229                                         obj_loc,
7230                                         class_offset,
7231                                         kWithoutReadBarrier);
7232 
7233       GenerateBitstringTypeCheckCompare(instruction, temp);
7234       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7235       break;
7236     }
7237   }
7238 
7239   if (done.IsLinked()) {
7240     __ Bind(&done);
7241   }
7242 
7243   __ Bind(type_check_slow_path->GetExitLabel());
7244 }
7245 
VisitMonitorOperation(HMonitorOperation * instruction)7246 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7247   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7248       instruction, LocationSummary::kCallOnMainOnly);
7249   InvokeRuntimeCallingConvention calling_convention;
7250   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7251 }
7252 
VisitMonitorOperation(HMonitorOperation * instruction)7253 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7254   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
7255                           instruction,
7256                           instruction->GetDexPc());
7257   if (instruction->IsEnter()) {
7258     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7259   } else {
7260     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7261   }
7262 }
7263 
VisitX86AndNot(HX86AndNot * instruction)7264 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7265   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7266   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7267   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7268   locations->SetInAt(0, Location::RequiresRegister());
7269   // There is no immediate variant of negated bitwise and in X86.
7270   locations->SetInAt(1, Location::RequiresRegister());
7271   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7272 }
7273 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7274 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7275   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7276   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7277   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7278   locations->SetInAt(0, Location::RequiresRegister());
7279   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7280 }
7281 
VisitX86AndNot(HX86AndNot * instruction)7282 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7283   LocationSummary* locations = instruction->GetLocations();
7284   Location first = locations->InAt(0);
7285   Location second = locations->InAt(1);
7286   Location dest = locations->Out();
7287   __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7288 }
7289 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7290 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7291   LocationSummary* locations = instruction->GetLocations();
7292   Location src = locations->InAt(0);
7293   Location dest = locations->Out();
7294   switch (instruction->GetOpKind()) {
7295     case HInstruction::kAnd:
7296       __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7297       break;
7298     case HInstruction::kXor:
7299       __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7300       break;
7301     default:
7302       LOG(FATAL) << "Unreachable";
7303   }
7304 }
7305 
VisitAnd(HAnd * instruction)7306 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7307 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7308 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7309 
HandleBitwiseOperation(HBinaryOperation * instruction)7310 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7311   LocationSummary* locations =
7312       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7313   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7314          || instruction->GetResultType() == DataType::Type::kInt64);
7315   locations->SetInAt(0, Location::RequiresRegister());
7316   locations->SetInAt(1, Location::Any());
7317   locations->SetOut(Location::SameAsFirstInput());
7318 }
7319 
VisitAnd(HAnd * instruction)7320 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
7321   HandleBitwiseOperation(instruction);
7322 }
7323 
VisitOr(HOr * instruction)7324 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
7325   HandleBitwiseOperation(instruction);
7326 }
7327 
VisitXor(HXor * instruction)7328 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
7329   HandleBitwiseOperation(instruction);
7330 }
7331 
HandleBitwiseOperation(HBinaryOperation * instruction)7332 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7333   LocationSummary* locations = instruction->GetLocations();
7334   Location first = locations->InAt(0);
7335   Location second = locations->InAt(1);
7336   DCHECK(first.Equals(locations->Out()));
7337 
7338   if (instruction->GetResultType() == DataType::Type::kInt32) {
7339     if (second.IsRegister()) {
7340       if (instruction->IsAnd()) {
7341         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7342       } else if (instruction->IsOr()) {
7343         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7344       } else {
7345         DCHECK(instruction->IsXor());
7346         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7347       }
7348     } else if (second.IsConstant()) {
7349       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
7350       if (instruction->IsAnd()) {
7351         __ andl(first.AsRegister<CpuRegister>(), imm);
7352       } else if (instruction->IsOr()) {
7353         __ orl(first.AsRegister<CpuRegister>(), imm);
7354       } else {
7355         DCHECK(instruction->IsXor());
7356         __ xorl(first.AsRegister<CpuRegister>(), imm);
7357       }
7358     } else {
7359       Address address(CpuRegister(RSP), second.GetStackIndex());
7360       if (instruction->IsAnd()) {
7361         __ andl(first.AsRegister<CpuRegister>(), address);
7362       } else if (instruction->IsOr()) {
7363         __ orl(first.AsRegister<CpuRegister>(), address);
7364       } else {
7365         DCHECK(instruction->IsXor());
7366         __ xorl(first.AsRegister<CpuRegister>(), address);
7367       }
7368     }
7369   } else {
7370     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7371     CpuRegister first_reg = first.AsRegister<CpuRegister>();
7372     bool second_is_constant = false;
7373     int64_t value = 0;
7374     if (second.IsConstant()) {
7375       second_is_constant = true;
7376       value = second.GetConstant()->AsLongConstant()->GetValue();
7377     }
7378     bool is_int32_value = IsInt<32>(value);
7379 
7380     if (instruction->IsAnd()) {
7381       if (second_is_constant) {
7382         if (is_int32_value) {
7383           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7384         } else {
7385           __ andq(first_reg, codegen_->LiteralInt64Address(value));
7386         }
7387       } else if (second.IsDoubleStackSlot()) {
7388         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7389       } else {
7390         __ andq(first_reg, second.AsRegister<CpuRegister>());
7391       }
7392     } else if (instruction->IsOr()) {
7393       if (second_is_constant) {
7394         if (is_int32_value) {
7395           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7396         } else {
7397           __ orq(first_reg, codegen_->LiteralInt64Address(value));
7398         }
7399       } else if (second.IsDoubleStackSlot()) {
7400         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7401       } else {
7402         __ orq(first_reg, second.AsRegister<CpuRegister>());
7403       }
7404     } else {
7405       DCHECK(instruction->IsXor());
7406       if (second_is_constant) {
7407         if (is_int32_value) {
7408           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7409         } else {
7410           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7411         }
7412       } else if (second.IsDoubleStackSlot()) {
7413         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7414       } else {
7415         __ xorq(first_reg, second.AsRegister<CpuRegister>());
7416       }
7417     }
7418   }
7419 }
7420 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7421 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7422     HInstruction* instruction,
7423     Location out,
7424     uint32_t offset,
7425     Location maybe_temp,
7426     ReadBarrierOption read_barrier_option) {
7427   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7428   if (read_barrier_option == kWithReadBarrier) {
7429     CHECK(kEmitCompilerReadBarrier);
7430     if (kUseBakerReadBarrier) {
7431       // Load with fast path based Baker's read barrier.
7432       // /* HeapReference<Object> */ out = *(out + offset)
7433       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7434           instruction, out, out_reg, offset, /* needs_null_check= */ false);
7435     } else {
7436       // Load with slow path based read barrier.
7437       // Save the value of `out` into `maybe_temp` before overwriting it
7438       // in the following move operation, as we will need it for the
7439       // read barrier below.
7440       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7441       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7442       // /* HeapReference<Object> */ out = *(out + offset)
7443       __ movl(out_reg, Address(out_reg, offset));
7444       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7445     }
7446   } else {
7447     // Plain load with no read barrier.
7448     // /* HeapReference<Object> */ out = *(out + offset)
7449     __ movl(out_reg, Address(out_reg, offset));
7450     __ MaybeUnpoisonHeapReference(out_reg);
7451   }
7452 }
7453 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7454 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7455     HInstruction* instruction,
7456     Location out,
7457     Location obj,
7458     uint32_t offset,
7459     ReadBarrierOption read_barrier_option) {
7460   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7461   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7462   if (read_barrier_option == kWithReadBarrier) {
7463     CHECK(kEmitCompilerReadBarrier);
7464     if (kUseBakerReadBarrier) {
7465       // Load with fast path based Baker's read barrier.
7466       // /* HeapReference<Object> */ out = *(obj + offset)
7467       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7468           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7469     } else {
7470       // Load with slow path based read barrier.
7471       // /* HeapReference<Object> */ out = *(obj + offset)
7472       __ movl(out_reg, Address(obj_reg, offset));
7473       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7474     }
7475   } else {
7476     // Plain load with no read barrier.
7477     // /* HeapReference<Object> */ out = *(obj + offset)
7478     __ movl(out_reg, Address(obj_reg, offset));
7479     __ MaybeUnpoisonHeapReference(out_reg);
7480   }
7481 }
7482 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7483 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7484     HInstruction* instruction,
7485     Location root,
7486     const Address& address,
7487     Label* fixup_label,
7488     ReadBarrierOption read_barrier_option) {
7489   CpuRegister root_reg = root.AsRegister<CpuRegister>();
7490   if (read_barrier_option == kWithReadBarrier) {
7491     DCHECK(kEmitCompilerReadBarrier);
7492     if (kUseBakerReadBarrier) {
7493       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7494       // Baker's read barrier are used:
7495       //
7496       //   root = obj.field;
7497       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7498       //   if (temp != null) {
7499       //     root = temp(root)
7500       //   }
7501 
7502       // /* GcRoot<mirror::Object> */ root = *address
7503       __ movl(root_reg, address);
7504       if (fixup_label != nullptr) {
7505         __ Bind(fixup_label);
7506       }
7507       static_assert(
7508           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7509           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7510           "have different sizes.");
7511       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7512                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
7513                     "have different sizes.");
7514 
7515       // Slow path marking the GC root `root`.
7516       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7517           instruction, root, /* unpoison_ref_before_marking= */ false);
7518       codegen_->AddSlowPath(slow_path);
7519 
7520       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
7521       const int32_t entry_point_offset =
7522           Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
7523       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
7524       // The entrypoint is null when the GC is not marking.
7525       __ j(kNotEqual, slow_path->GetEntryLabel());
7526       __ Bind(slow_path->GetExitLabel());
7527     } else {
7528       // GC root loaded through a slow path for read barriers other
7529       // than Baker's.
7530       // /* GcRoot<mirror::Object>* */ root = address
7531       __ leaq(root_reg, address);
7532       if (fixup_label != nullptr) {
7533         __ Bind(fixup_label);
7534       }
7535       // /* mirror::Object* */ root = root->Read()
7536       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7537     }
7538   } else {
7539     // Plain GC root load with no read barrier.
7540     // /* GcRoot<mirror::Object> */ root = *address
7541     __ movl(root_reg, address);
7542     if (fixup_label != nullptr) {
7543       __ Bind(fixup_label);
7544     }
7545     // Note that GC roots are not affected by heap poisoning, thus we
7546     // do not have to unpoison `root_reg` here.
7547   }
7548 }
7549 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)7550 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7551                                                                 Location ref,
7552                                                                 CpuRegister obj,
7553                                                                 uint32_t offset,
7554                                                                 bool needs_null_check) {
7555   DCHECK(kEmitCompilerReadBarrier);
7556   DCHECK(kUseBakerReadBarrier);
7557 
7558   // /* HeapReference<Object> */ ref = *(obj + offset)
7559   Address src(obj, offset);
7560   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7561 }
7562 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)7563 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7564                                                                 Location ref,
7565                                                                 CpuRegister obj,
7566                                                                 uint32_t data_offset,
7567                                                                 Location index,
7568                                                                 bool needs_null_check) {
7569   DCHECK(kEmitCompilerReadBarrier);
7570   DCHECK(kUseBakerReadBarrier);
7571 
7572   static_assert(
7573       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7574       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7575   // /* HeapReference<Object> */ ref =
7576   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
7577   Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
7578   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7579 }
7580 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)7581 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7582                                                                     Location ref,
7583                                                                     CpuRegister obj,
7584                                                                     const Address& src,
7585                                                                     bool needs_null_check,
7586                                                                     bool always_update_field,
7587                                                                     CpuRegister* temp1,
7588                                                                     CpuRegister* temp2) {
7589   DCHECK(kEmitCompilerReadBarrier);
7590   DCHECK(kUseBakerReadBarrier);
7591 
7592   // In slow path based read barriers, the read barrier call is
7593   // inserted after the original load. However, in fast path based
7594   // Baker's read barriers, we need to perform the load of
7595   // mirror::Object::monitor_ *before* the original reference load.
7596   // This load-load ordering is required by the read barrier.
7597   // The fast path/slow path (for Baker's algorithm) should look like:
7598   //
7599   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7600   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
7601   //   HeapReference<Object> ref = *src;  // Original reference load.
7602   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
7603   //   if (is_gray) {
7604   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
7605   //   }
7606   //
7607   // Note: the original implementation in ReadBarrier::Barrier is
7608   // slightly more complex as:
7609   // - it implements the load-load fence using a data dependency on
7610   //   the high-bits of rb_state, which are expected to be all zeroes
7611   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
7612   //   here, which is a no-op thanks to the x86-64 memory model);
7613   // - it performs additional checks that we do not do here for
7614   //   performance reasons.
7615 
7616   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
7617   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7618 
7619   // Given the numeric representation, it's enough to check the low bit of the rb_state.
7620   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7621   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7622   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7623   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7624   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7625 
7626   // if (rb_state == ReadBarrier::GrayState())
7627   //   ref = ReadBarrier::Mark(ref);
7628   // At this point, just do the "if" and make sure that flags are preserved until the branch.
7629   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7630   if (needs_null_check) {
7631     MaybeRecordImplicitNullCheck(instruction);
7632   }
7633 
7634   // Load fence to prevent load-load reordering.
7635   // Note that this is a no-op, thanks to the x86-64 memory model.
7636   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7637 
7638   // The actual reference load.
7639   // /* HeapReference<Object> */ ref = *src
7640   __ movl(ref_reg, src);  // Flags are unaffected.
7641 
7642   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7643   // Slow path marking the object `ref` when it is gray.
7644   SlowPathCode* slow_path;
7645   if (always_update_field) {
7646     DCHECK(temp1 != nullptr);
7647     DCHECK(temp2 != nullptr);
7648     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
7649         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
7650   } else {
7651     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7652         instruction, ref, /* unpoison_ref_before_marking= */ true);
7653   }
7654   AddSlowPath(slow_path);
7655 
7656   // We have done the "if" of the gray bit check above, now branch based on the flags.
7657   __ j(kNotZero, slow_path->GetEntryLabel());
7658 
7659   // Object* ref = ref_addr->AsMirrorPtr()
7660   __ MaybeUnpoisonHeapReference(ref_reg);
7661 
7662   __ Bind(slow_path->GetExitLabel());
7663 }
7664 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7665 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
7666                                                   Location out,
7667                                                   Location ref,
7668                                                   Location obj,
7669                                                   uint32_t offset,
7670                                                   Location index) {
7671   DCHECK(kEmitCompilerReadBarrier);
7672 
7673   // Insert a slow path based read barrier *after* the reference load.
7674   //
7675   // If heap poisoning is enabled, the unpoisoning of the loaded
7676   // reference will be carried out by the runtime within the slow
7677   // path.
7678   //
7679   // Note that `ref` currently does not get unpoisoned (when heap
7680   // poisoning is enabled), which is alright as the `ref` argument is
7681   // not used by the artReadBarrierSlow entry point.
7682   //
7683   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7684   SlowPathCode* slow_path = new (GetScopedAllocator())
7685       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
7686   AddSlowPath(slow_path);
7687 
7688   __ jmp(slow_path->GetEntryLabel());
7689   __ Bind(slow_path->GetExitLabel());
7690 }
7691 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7692 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7693                                                        Location out,
7694                                                        Location ref,
7695                                                        Location obj,
7696                                                        uint32_t offset,
7697                                                        Location index) {
7698   if (kEmitCompilerReadBarrier) {
7699     // Baker's read barriers shall be handled by the fast path
7700     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
7701     DCHECK(!kUseBakerReadBarrier);
7702     // If heap poisoning is enabled, unpoisoning will be taken care of
7703     // by the runtime within the slow path.
7704     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7705   } else if (kPoisonHeapReferences) {
7706     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
7707   }
7708 }
7709 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7710 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7711                                                          Location out,
7712                                                          Location root) {
7713   DCHECK(kEmitCompilerReadBarrier);
7714 
7715   // Insert a slow path based read barrier *after* the GC root load.
7716   //
7717   // Note that GC roots are not affected by heap poisoning, so we do
7718   // not need to do anything special for this here.
7719   SlowPathCode* slow_path =
7720       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
7721   AddSlowPath(slow_path);
7722 
7723   __ jmp(slow_path->GetEntryLabel());
7724   __ Bind(slow_path->GetExitLabel());
7725 }
7726 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7727 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7728   // Nothing to do, this should be removed during prepare for register allocator.
7729   LOG(FATAL) << "Unreachable";
7730 }
7731 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7732 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7733   // Nothing to do, this should be removed during prepare for register allocator.
7734   LOG(FATAL) << "Unreachable";
7735 }
7736 
7737 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7738 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7739   LocationSummary* locations =
7740       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7741   locations->SetInAt(0, Location::RequiresRegister());
7742   locations->AddTemp(Location::RequiresRegister());
7743   locations->AddTemp(Location::RequiresRegister());
7744 }
7745 
VisitPackedSwitch(HPackedSwitch * switch_instr)7746 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7747   int32_t lower_bound = switch_instr->GetStartValue();
7748   uint32_t num_entries = switch_instr->GetNumEntries();
7749   LocationSummary* locations = switch_instr->GetLocations();
7750   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
7751   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
7752   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
7753   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7754 
7755   // Should we generate smaller inline compare/jumps?
7756   if (num_entries <= kPackedSwitchJumpTableThreshold) {
7757     // Figure out the correct compare values and jump conditions.
7758     // Handle the first compare/branch as a special case because it might
7759     // jump to the default case.
7760     DCHECK_GT(num_entries, 2u);
7761     Condition first_condition;
7762     uint32_t index;
7763     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
7764     if (lower_bound != 0) {
7765       first_condition = kLess;
7766       __ cmpl(value_reg_in, Immediate(lower_bound));
7767       __ j(first_condition, codegen_->GetLabelOf(default_block));
7768       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7769 
7770       index = 1;
7771     } else {
7772       // Handle all the compare/jumps below.
7773       first_condition = kBelow;
7774       index = 0;
7775     }
7776 
7777     // Handle the rest of the compare/jumps.
7778     for (; index + 1 < num_entries; index += 2) {
7779       int32_t compare_to_value = lower_bound + index + 1;
7780       __ cmpl(value_reg_in, Immediate(compare_to_value));
7781       // Jump to successors[index] if value < case_value[index].
7782       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7783       // Jump to successors[index + 1] if value == case_value[index + 1].
7784       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7785     }
7786 
7787     if (index != num_entries) {
7788       // There are an odd number of entries. Handle the last one.
7789       DCHECK_EQ(index + 1, num_entries);
7790       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
7791       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7792     }
7793 
7794     // And the default for any other value.
7795     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
7796       __ jmp(codegen_->GetLabelOf(default_block));
7797     }
7798     return;
7799   }
7800 
7801   // Remove the bias, if needed.
7802   Register value_reg_out = value_reg_in.AsRegister();
7803   if (lower_bound != 0) {
7804     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
7805     value_reg_out = temp_reg.AsRegister();
7806   }
7807   CpuRegister value_reg(value_reg_out);
7808 
7809   // Is the value in range?
7810   __ cmpl(value_reg, Immediate(num_entries - 1));
7811   __ j(kAbove, codegen_->GetLabelOf(default_block));
7812 
7813   // We are in the range of the table.
7814   // Load the address of the jump table in the constant area.
7815   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
7816 
7817   // Load the (signed) offset from the jump table.
7818   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
7819 
7820   // Add the offset to the address of the table base.
7821   __ addq(temp_reg, base_reg);
7822 
7823   // And jump.
7824   __ jmp(temp_reg);
7825 }
7826 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7827 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7828                                                       ATTRIBUTE_UNUSED) {
7829   LOG(FATAL) << "Unreachable";
7830 }
7831 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7832 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7833                                                               ATTRIBUTE_UNUSED) {
7834   LOG(FATAL) << "Unreachable";
7835 }
7836 
Load32BitValue(CpuRegister dest,int32_t value)7837 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
7838   if (value == 0) {
7839     __ xorl(dest, dest);
7840   } else {
7841     __ movl(dest, Immediate(value));
7842   }
7843 }
7844 
Load64BitValue(CpuRegister dest,int64_t value)7845 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
7846   if (value == 0) {
7847     // Clears upper bits too.
7848     __ xorl(dest, dest);
7849   } else if (IsUint<32>(value)) {
7850     // We can use a 32 bit move, as it will zero-extend and is shorter.
7851     __ movl(dest, Immediate(static_cast<int32_t>(value)));
7852   } else {
7853     __ movq(dest, Immediate(value));
7854   }
7855 }
7856 
Load32BitValue(XmmRegister dest,int32_t value)7857 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
7858   if (value == 0) {
7859     __ xorps(dest, dest);
7860   } else {
7861     __ movss(dest, LiteralInt32Address(value));
7862   }
7863 }
7864 
Load64BitValue(XmmRegister dest,int64_t value)7865 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
7866   if (value == 0) {
7867     __ xorpd(dest, dest);
7868   } else {
7869     __ movsd(dest, LiteralInt64Address(value));
7870   }
7871 }
7872 
Load32BitValue(XmmRegister dest,float value)7873 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
7874   Load32BitValue(dest, bit_cast<int32_t, float>(value));
7875 }
7876 
Load64BitValue(XmmRegister dest,double value)7877 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
7878   Load64BitValue(dest, bit_cast<int64_t, double>(value));
7879 }
7880 
Compare32BitValue(CpuRegister dest,int32_t value)7881 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
7882   if (value == 0) {
7883     __ testl(dest, dest);
7884   } else {
7885     __ cmpl(dest, Immediate(value));
7886   }
7887 }
7888 
Compare64BitValue(CpuRegister dest,int64_t value)7889 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
7890   if (IsInt<32>(value)) {
7891     if (value == 0) {
7892       __ testq(dest, dest);
7893     } else {
7894       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
7895     }
7896   } else {
7897     // Value won't fit in an int.
7898     __ cmpq(dest, LiteralInt64Address(value));
7899   }
7900 }
7901 
GenerateIntCompare(Location lhs,Location rhs)7902 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
7903   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7904   GenerateIntCompare(lhs_reg, rhs);
7905 }
7906 
GenerateIntCompare(CpuRegister lhs,Location rhs)7907 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
7908   if (rhs.IsConstant()) {
7909     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
7910     Compare32BitValue(lhs, value);
7911   } else if (rhs.IsStackSlot()) {
7912     __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7913   } else {
7914     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
7915   }
7916 }
7917 
GenerateLongCompare(Location lhs,Location rhs)7918 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
7919   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7920   if (rhs.IsConstant()) {
7921     int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
7922     Compare64BitValue(lhs_reg, value);
7923   } else if (rhs.IsDoubleStackSlot()) {
7924     __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7925   } else {
7926     __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
7927   }
7928 }
7929 
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)7930 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
7931                                           Location index,
7932                                           ScaleFactor scale,
7933                                           uint32_t data_offset) {
7934   return index.IsConstant() ?
7935       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
7936       Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
7937 }
7938 
Store64BitValueToStack(Location dest,int64_t value)7939 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
7940   DCHECK(dest.IsDoubleStackSlot());
7941   if (IsInt<32>(value)) {
7942     // Can move directly as an int32 constant.
7943     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
7944             Immediate(static_cast<int32_t>(value)));
7945   } else {
7946     Load64BitValue(CpuRegister(TMP), value);
7947     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
7948   }
7949 }
7950 
7951 /**
7952  * Class to handle late fixup of offsets into constant area.
7953  */
7954 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
7955  public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)7956   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
7957       : codegen_(&codegen), offset_into_constant_area_(offset) {}
7958 
7959  protected:
SetOffset(size_t offset)7960   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7961 
7962   CodeGeneratorX86_64* codegen_;
7963 
7964  private:
Process(const MemoryRegion & region,int pos)7965   void Process(const MemoryRegion& region, int pos) override {
7966     // Patch the correct offset for the instruction.  We use the address of the
7967     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
7968     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7969     int32_t relative_position = constant_offset - pos;
7970 
7971     // Patch in the right value.
7972     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7973   }
7974 
7975   // Location in constant area that the fixup refers to.
7976   size_t offset_into_constant_area_;
7977 };
7978 
7979 /**
7980  t * Class to handle late fixup of offsets to a jump table that will be created in the
7981  * constant area.
7982  */
7983 class JumpTableRIPFixup : public RIPFixup {
7984  public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)7985   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7986       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7987 
CreateJumpTable()7988   void CreateJumpTable() {
7989     X86_64Assembler* assembler = codegen_->GetAssembler();
7990 
7991     // Ensure that the reference to the jump table has the correct offset.
7992     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7993     SetOffset(offset_in_constant_table);
7994 
7995     // Compute the offset from the start of the function to this jump table.
7996     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7997 
7998     // Populate the jump table with the correct values for the jump table.
7999     int32_t num_entries = switch_instr_->GetNumEntries();
8000     HBasicBlock* block = switch_instr_->GetBlock();
8001     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8002     // The value that we want is the target offset - the position of the table.
8003     for (int32_t i = 0; i < num_entries; i++) {
8004       HBasicBlock* b = successors[i];
8005       Label* l = codegen_->GetLabelOf(b);
8006       DCHECK(l->IsBound());
8007       int32_t offset_to_block = l->Position() - current_table_offset;
8008       assembler->AppendInt32(offset_to_block);
8009     }
8010   }
8011 
8012  private:
8013   const HPackedSwitch* switch_instr_;
8014 };
8015 
Finalize(CodeAllocator * allocator)8016 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
8017   // Generate the constant area if needed.
8018   X86_64Assembler* assembler = GetAssembler();
8019   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8020     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
8021     assembler->Align(4, 0);
8022     constant_area_start_ = assembler->CodeSize();
8023 
8024     // Populate any jump tables.
8025     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8026       jump_table->CreateJumpTable();
8027     }
8028 
8029     // And now add the constant area to the generated code.
8030     assembler->AddConstantArea();
8031   }
8032 
8033   // And finish up.
8034   CodeGenerator::Finalize(allocator);
8035 }
8036 
LiteralDoubleAddress(double v)8037 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
8038   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
8039   return Address::RIP(fixup);
8040 }
8041 
LiteralFloatAddress(float v)8042 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
8043   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
8044   return Address::RIP(fixup);
8045 }
8046 
LiteralInt32Address(int32_t v)8047 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
8048   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
8049   return Address::RIP(fixup);
8050 }
8051 
LiteralInt64Address(int64_t v)8052 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
8053   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
8054   return Address::RIP(fixup);
8055 }
8056 
8057 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)8058 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
8059   if (!trg.IsValid()) {
8060     DCHECK_EQ(type, DataType::Type::kVoid);
8061     return;
8062   }
8063 
8064   DCHECK_NE(type, DataType::Type::kVoid);
8065 
8066   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
8067   if (trg.Equals(return_loc)) {
8068     return;
8069   }
8070 
8071   // Let the parallel move resolver take care of all of this.
8072   HParallelMove parallel_move(GetGraph()->GetAllocator());
8073   parallel_move.AddMove(return_loc, trg, type, nullptr);
8074   GetMoveResolver()->EmitNativeCode(&parallel_move);
8075 }
8076 
LiteralCaseTable(HPackedSwitch * switch_instr)8077 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
8078   // Create a fixup to be used to create and address the jump table.
8079   JumpTableRIPFixup* table_fixup =
8080       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8081 
8082   // We have to populate the jump tables.
8083   fixups_to_jump_tables_.push_back(table_fixup);
8084   return Address::RIP(table_fixup);
8085 }
8086 
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)8087 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
8088                                              const Address& addr_high,
8089                                              int64_t v,
8090                                              HInstruction* instruction) {
8091   if (IsInt<32>(v)) {
8092     int32_t v_32 = v;
8093     __ movq(addr_low, Immediate(v_32));
8094     MaybeRecordImplicitNullCheck(instruction);
8095   } else {
8096     // Didn't fit in a register.  Do it in pieces.
8097     int32_t low_v = Low32Bits(v);
8098     int32_t high_v = High32Bits(v);
8099     __ movl(addr_low, Immediate(low_v));
8100     MaybeRecordImplicitNullCheck(instruction);
8101     __ movl(addr_high, Immediate(high_v));
8102   }
8103 }
8104 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8105 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
8106                                           const uint8_t* roots_data,
8107                                           const PatchInfo<Label>& info,
8108                                           uint64_t index_in_table) const {
8109   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8110   uintptr_t address =
8111       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8112   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8113   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8114      dchecked_integral_cast<uint32_t>(address);
8115 }
8116 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8117 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8118   for (const PatchInfo<Label>& info : jit_string_patches_) {
8119     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8120     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8121     PatchJitRootUse(code, roots_data, info, index_in_table);
8122   }
8123 
8124   for (const PatchInfo<Label>& info : jit_class_patches_) {
8125     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8126     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8127     PatchJitRootUse(code, roots_data, info, index_in_table);
8128   }
8129 }
8130 
CpuHasAvxFeatureFlag()8131 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
8132   return codegen_->GetInstructionSetFeatures().HasAVX();
8133 }
8134 
CpuHasAvx2FeatureFlag()8135 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
8136   return codegen_->GetInstructionSetFeatures().HasAVX2();
8137 }
8138 
CpuHasAvxFeatureFlag()8139 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
8140   return codegen_->GetInstructionSetFeatures().HasAVX();
8141 }
8142 
CpuHasAvx2FeatureFlag()8143 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
8144   return codegen_->GetInstructionSetFeatures().HasAVX2();
8145 }
8146 
8147 #undef __
8148 
8149 }  // namespace x86_64
8150 }  // namespace art
8151