• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "arch/x86_64/jni_frame_x86_64.h"
20 #include "art_method-inl.h"
21 #include "class_root-inl.h"
22 #include "class_table.h"
23 #include "code_generator_utils.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_list.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86_64.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/method_type.h"
39 #include "mirror/object_reference.h"
40 #include "mirror/var_handle.h"
41 #include "optimizing/nodes.h"
42 #include "profiling_info_builder.h"
43 #include "scoped_thread_state_change-inl.h"
44 #include "thread.h"
45 #include "trace.h"
46 #include "utils/assembler.h"
47 #include "utils/stack_checks.h"
48 #include "utils/x86_64/assembler_x86_64.h"
49 #include "utils/x86_64/constants_x86_64.h"
50 #include "utils/x86_64/managed_register_x86_64.h"
51 
52 namespace art HIDDEN {
53 
54 template<class MirrorType>
55 class GcRoot;
56 
57 namespace x86_64 {
58 
59 static constexpr int kCurrentMethodStackOffset = 0;
60 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
61 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
62 // generates less code/data with a small num_entries.
63 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
64 
65 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
66 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
67 
68 static constexpr int kC2ConditionMask = 0x400;
69 
OneRegInReferenceOutSaveEverythingCallerSaves()70 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
71   // Custom calling convention: RAX serves as both input and output.
72   RegisterSet caller_saves = RegisterSet::Empty();
73   caller_saves.Add(Location::RegisterLocation(RAX));
74   return caller_saves;
75 }
76 
77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
79 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
80 
81 class NullCheckSlowPathX86_64 : public SlowPathCode {
82  public:
NullCheckSlowPathX86_64(HNullCheck * instruction)83   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
84 
EmitNativeCode(CodeGenerator * codegen)85   void EmitNativeCode(CodeGenerator* codegen) override {
86     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
87     __ Bind(GetEntryLabel());
88     if (instruction_->CanThrowIntoCatchBlock()) {
89       // Live registers will be restored in the catch block if caught.
90       SaveLiveRegisters(codegen, instruction_->GetLocations());
91     }
92     x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer, instruction_, this);
93     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
94   }
95 
IsFatal() const96   bool IsFatal() const override { return true; }
97 
GetDescription() const98   const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
99 
100  private:
101   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
102 };
103 
104 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
105  public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)106   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
107 
EmitNativeCode(CodeGenerator * codegen)108   void EmitNativeCode(CodeGenerator* codegen) override {
109     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
110     __ Bind(GetEntryLabel());
111     x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, this);
112     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
113   }
114 
IsFatal() const115   bool IsFatal() const override { return true; }
116 
GetDescription() const117   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
118 
119  private:
120   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
121 };
122 
123 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
124  public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)125   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
126       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
127 
EmitNativeCode(CodeGenerator * codegen)128   void EmitNativeCode(CodeGenerator* codegen) override {
129     __ Bind(GetEntryLabel());
130     if (type_ == DataType::Type::kInt32) {
131       if (is_div_) {
132         __ negl(cpu_reg_);
133       } else {
134         __ xorl(cpu_reg_, cpu_reg_);
135       }
136 
137     } else {
138       DCHECK_EQ(DataType::Type::kInt64, type_);
139       if (is_div_) {
140         __ negq(cpu_reg_);
141       } else {
142         __ xorl(cpu_reg_, cpu_reg_);
143       }
144     }
145     __ jmp(GetExitLabel());
146   }
147 
GetDescription() const148   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
149 
150  private:
151   const CpuRegister cpu_reg_;
152   const DataType::Type type_;
153   const bool is_div_;
154   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
155 };
156 
157 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
158  public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)159   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
160       : SlowPathCode(instruction), successor_(successor) {}
161 
EmitNativeCode(CodeGenerator * codegen)162   void EmitNativeCode(CodeGenerator* codegen) override {
163     LocationSummary* locations = instruction_->GetLocations();
164     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
165     __ Bind(GetEntryLabel());
166     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
167     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, this);
168     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
169     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
170     if (successor_ == nullptr) {
171       __ jmp(GetReturnLabel());
172     } else {
173       __ jmp(x86_64_codegen->GetLabelOf(successor_));
174     }
175   }
176 
GetReturnLabel()177   Label* GetReturnLabel() {
178     DCHECK(successor_ == nullptr);
179     return &return_label_;
180   }
181 
GetSuccessor() const182   HBasicBlock* GetSuccessor() const {
183     return successor_;
184   }
185 
GetDescription() const186   const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
187 
188  private:
189   HBasicBlock* const successor_;
190   Label return_label_;
191 
192   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
193 };
194 
195 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
196  public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)197   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
198     : SlowPathCode(instruction) {}
199 
EmitNativeCode(CodeGenerator * codegen)200   void EmitNativeCode(CodeGenerator* codegen) override {
201     LocationSummary* locations = instruction_->GetLocations();
202     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
203     __ Bind(GetEntryLabel());
204     if (instruction_->CanThrowIntoCatchBlock()) {
205       // Live registers will be restored in the catch block if caught.
206       SaveLiveRegisters(codegen, locations);
207     }
208 
209     Location index_loc = locations->InAt(0);
210     Location length_loc = locations->InAt(1);
211     InvokeRuntimeCallingConvention calling_convention;
212     Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
213     Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
214 
215     // Are we using an array length from memory?
216     if (!length_loc.IsValid()) {
217       DCHECK(instruction_->InputAt(1)->IsArrayLength());
218       HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
219       DCHECK(array_length->IsEmittedAtUseSite());
220       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
221       Location array_loc = array_length->GetLocations()->InAt(0);
222       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
223       if (!index_loc.Equals(length_arg)) {
224         // The index is not clobbered by loading the length directly to `length_arg`.
225         __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
226         x86_64_codegen->Move(index_arg, index_loc);
227       } else if (!array_loc.Equals(index_arg)) {
228         // The array reference is not clobbered by the index move.
229         x86_64_codegen->Move(index_arg, index_loc);
230         __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
231       } else {
232         // Load the array length into `TMP`.
233         DCHECK(codegen->IsBlockedCoreRegister(TMP));
234         __ movl(CpuRegister(TMP), array_len);
235         // Single move to CPU register does not clobber `TMP`.
236         x86_64_codegen->Move(index_arg, index_loc);
237         __ movl(length_arg.AsRegister<CpuRegister>(), CpuRegister(TMP));
238       }
239       if (mirror::kUseStringCompression && array_length->IsStringLength()) {
240         __ shrl(length_arg.AsRegister<CpuRegister>(), Immediate(1));
241       }
242     } else {
243       // We're moving two locations to locations that could overlap,
244       // so we need a parallel move resolver.
245       codegen->EmitParallelMoves(
246           index_loc,
247           index_arg,
248           DataType::Type::kInt32,
249           length_loc,
250           length_arg,
251           DataType::Type::kInt32);
252     }
253 
254     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
255         ? kQuickThrowStringBounds
256         : kQuickThrowArrayBounds;
257     x86_64_codegen->InvokeRuntime(entrypoint, instruction_, this);
258     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
259     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
260   }
261 
IsFatal() const262   bool IsFatal() const override { return true; }
263 
GetDescription() const264   const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
265 
266  private:
267   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
268 };
269 
270 class LoadMethodTypeSlowPathX86_64: public SlowPathCode {
271  public:
LoadMethodTypeSlowPathX86_64(HLoadMethodType * mt)272   explicit LoadMethodTypeSlowPathX86_64(HLoadMethodType* mt) : SlowPathCode(mt) {}
273 
EmitNativeCode(CodeGenerator * codegen)274   void EmitNativeCode(CodeGenerator* codegen) override {
275     LocationSummary* locations = instruction_->GetLocations();
276     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
277 
278     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
279     __ Bind(GetEntryLabel());
280     SaveLiveRegisters(codegen, locations);
281 
282     const dex::ProtoIndex proto_index = instruction_->AsLoadMethodType()->GetProtoIndex();
283     // Custom calling convention: RAX serves as both input and output.
284     __ movl(CpuRegister(RAX), Immediate(proto_index.index_));
285     x86_64_codegen->InvokeRuntime(kQuickResolveMethodType, instruction_, this);
286     CheckEntrypointTypes<kQuickResolveMethodType, void*, uint32_t>();
287     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
288     RestoreLiveRegisters(codegen, locations);
289 
290     __ jmp(GetExitLabel());
291   }
292 
GetDescription() const293   const char* GetDescription() const override { return "LoadMethodTypeSlowPathX86_64"; }
294 
295  private:
296   DISALLOW_COPY_AND_ASSIGN(LoadMethodTypeSlowPathX86_64);
297 };
298 
299 class LoadClassSlowPathX86_64 : public SlowPathCode {
300  public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)301   LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
302       : SlowPathCode(at), cls_(cls) {
303     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
304     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
305   }
306 
EmitNativeCode(CodeGenerator * codegen)307   void EmitNativeCode(CodeGenerator* codegen) override {
308     LocationSummary* locations = instruction_->GetLocations();
309     Location out = locations->Out();
310     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
311     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
312 
313     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
314     __ Bind(GetEntryLabel());
315     SaveLiveRegisters(codegen, locations);
316 
317     // Custom calling convention: RAX serves as both input and output.
318     if (must_resolve_type) {
319       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()) ||
320              x86_64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
321              ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
322                              &cls_->GetDexFile()));
323       dex::TypeIndex type_index = cls_->GetTypeIndex();
324       __ movl(CpuRegister(RAX), Immediate(type_index.index_));
325       if (cls_->NeedsAccessCheck()) {
326         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
327         x86_64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, this);
328       } else {
329         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
330         x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, this);
331       }
332       // If we also must_do_clinit, the resolved type is now in the correct register.
333     } else {
334       DCHECK(must_do_clinit);
335       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
336       x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
337     }
338     if (must_do_clinit) {
339       x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, this);
340       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
341     }
342 
343     // Move the class to the desired location.
344     if (out.IsValid()) {
345       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
346       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
347     }
348 
349     RestoreLiveRegisters(codegen, locations);
350     __ jmp(GetExitLabel());
351   }
352 
GetDescription() const353   const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
354 
355  private:
356   // The class this slow path will load.
357   HLoadClass* const cls_;
358 
359   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
360 };
361 
362 class LoadStringSlowPathX86_64 : public SlowPathCode {
363  public:
LoadStringSlowPathX86_64(HLoadString * instruction)364   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
365 
EmitNativeCode(CodeGenerator * codegen)366   void EmitNativeCode(CodeGenerator* codegen) override {
367     LocationSummary* locations = instruction_->GetLocations();
368     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
369 
370     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
371     __ Bind(GetEntryLabel());
372     SaveLiveRegisters(codegen, locations);
373 
374     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
375     // Custom calling convention: RAX serves as both input and output.
376     __ movl(CpuRegister(RAX), Immediate(string_index.index_));
377     x86_64_codegen->InvokeRuntime(kQuickResolveString, instruction_, this);
378     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
379     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
380     RestoreLiveRegisters(codegen, locations);
381 
382     __ jmp(GetExitLabel());
383   }
384 
GetDescription() const385   const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
386 
387  private:
388   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
389 };
390 
391 class TypeCheckSlowPathX86_64 : public SlowPathCode {
392  public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)393   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
394       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
395 
EmitNativeCode(CodeGenerator * codegen)396   void EmitNativeCode(CodeGenerator* codegen) override {
397     LocationSummary* locations = instruction_->GetLocations();
398     DCHECK(instruction_->IsCheckCast()
399            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
400 
401     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
402     __ Bind(GetEntryLabel());
403 
404     if (kPoisonHeapReferences &&
405         instruction_->IsCheckCast() &&
406         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
407       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
408       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
409     }
410 
411     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
412       SaveLiveRegisters(codegen, locations);
413     }
414 
415     // We're moving two locations to locations that could overlap, so we need a parallel
416     // move resolver.
417     InvokeRuntimeCallingConvention calling_convention;
418     codegen->EmitParallelMoves(locations->InAt(0),
419                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
420                                DataType::Type::kReference,
421                                locations->InAt(1),
422                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
423                                DataType::Type::kReference);
424     if (instruction_->IsInstanceOf()) {
425       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, this);
426       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
427     } else {
428       DCHECK(instruction_->IsCheckCast());
429       x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, this);
430       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
431     }
432 
433     if (!is_fatal_) {
434       if (instruction_->IsInstanceOf()) {
435         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
436       }
437 
438       RestoreLiveRegisters(codegen, locations);
439       __ jmp(GetExitLabel());
440     }
441   }
442 
GetDescription() const443   const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
444 
IsFatal() const445   bool IsFatal() const override { return is_fatal_; }
446 
447  private:
448   const bool is_fatal_;
449 
450   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
451 };
452 
453 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
454  public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)455   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
456       : SlowPathCode(instruction) {}
457 
EmitNativeCode(CodeGenerator * codegen)458   void EmitNativeCode(CodeGenerator* codegen) override {
459     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
460     __ Bind(GetEntryLabel());
461     LocationSummary* locations = instruction_->GetLocations();
462     SaveLiveRegisters(codegen, locations);
463     InvokeRuntimeCallingConvention calling_convention;
464     x86_64_codegen->Load32BitValue(
465         CpuRegister(calling_convention.GetRegisterAt(0)),
466         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
467     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, this);
468     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
469   }
470 
GetDescription() const471   const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
472 
473  private:
474   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
475 };
476 
477 class ArraySetSlowPathX86_64 : public SlowPathCode {
478  public:
ArraySetSlowPathX86_64(HInstruction * instruction)479   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
480 
EmitNativeCode(CodeGenerator * codegen)481   void EmitNativeCode(CodeGenerator* codegen) override {
482     LocationSummary* locations = instruction_->GetLocations();
483     __ Bind(GetEntryLabel());
484     SaveLiveRegisters(codegen, locations);
485 
486     InvokeRuntimeCallingConvention calling_convention;
487     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
488     parallel_move.AddMove(
489         locations->InAt(0),
490         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
491         DataType::Type::kReference,
492         nullptr);
493     parallel_move.AddMove(
494         locations->InAt(1),
495         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
496         DataType::Type::kInt32,
497         nullptr);
498     parallel_move.AddMove(
499         locations->InAt(2),
500         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
501         DataType::Type::kReference,
502         nullptr);
503     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
504 
505     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
506     x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, this);
507     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
508     RestoreLiveRegisters(codegen, locations);
509     __ jmp(GetExitLabel());
510   }
511 
GetDescription() const512   const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
513 
514  private:
515   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
516 };
517 
518 // Slow path marking an object reference `ref` during a read
519 // barrier. The field `obj.field` in the object `obj` holding this
520 // reference does not get updated by this slow path after marking (see
521 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
522 //
523 // This means that after the execution of this slow path, `ref` will
524 // always be up-to-date, but `obj.field` may not; i.e., after the
525 // flip, `ref` will be a to-space reference, but `obj.field` will
526 // probably still be a from-space reference (unless it gets updated by
527 // another thread, or if another thread installed another object
528 // reference (different from `ref`) in `obj.field`).
529 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
530  public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)531   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
532                                 Location ref,
533                                 bool unpoison_ref_before_marking)
534       : SlowPathCode(instruction),
535         ref_(ref),
536         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
537   }
538 
GetDescription() const539   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
540 
EmitNativeCode(CodeGenerator * codegen)541   void EmitNativeCode(CodeGenerator* codegen) override {
542     DCHECK(codegen->EmitReadBarrier());
543     LocationSummary* locations = instruction_->GetLocations();
544     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
545     Register ref_reg = ref_cpu_reg.AsRegister();
546     DCHECK(locations->CanCall());
547     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
548     DCHECK(instruction_->IsInstanceFieldGet() ||
549            instruction_->IsStaticFieldGet() ||
550            instruction_->IsArrayGet() ||
551            instruction_->IsArraySet() ||
552            instruction_->IsLoadClass() ||
553            instruction_->IsLoadMethodType() ||
554            instruction_->IsLoadString() ||
555            instruction_->IsInstanceOf() ||
556            instruction_->IsCheckCast() ||
557            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
558         << "Unexpected instruction in read barrier marking slow path: "
559         << instruction_->DebugName();
560 
561     __ Bind(GetEntryLabel());
562     if (unpoison_ref_before_marking_) {
563       // Object* ref = ref_addr->AsMirrorPtr()
564       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
565     }
566     // No need to save live registers; it's taken care of by the
567     // entrypoint. Also, there is no need to update the stack mask,
568     // as this runtime call will not trigger a garbage collection.
569     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
570     DCHECK_NE(ref_reg, RSP);
571     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
572     // "Compact" slow path, saving two moves.
573     //
574     // Instead of using the standard runtime calling convention (input
575     // and output in R0):
576     //
577     //   RDI <- ref
578     //   RAX <- ReadBarrierMark(RDI)
579     //   ref <- RAX
580     //
581     // we just use rX (the register containing `ref`) as input and output
582     // of a dedicated entrypoint:
583     //
584     //   rX <- ReadBarrierMarkRegX(rX)
585     //
586     int32_t entry_point_offset =
587         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
588     // This runtime call does not require a stack map.
589     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
590     __ jmp(GetExitLabel());
591   }
592 
593  private:
594   // The location (register) of the marked object reference.
595   const Location ref_;
596   // Should the reference in `ref_` be unpoisoned prior to marking it?
597   const bool unpoison_ref_before_marking_;
598 
599   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
600 };
601 
602 // Slow path marking an object reference `ref` during a read barrier,
603 // and if needed, atomically updating the field `obj.field` in the
604 // object `obj` holding this reference after marking (contrary to
605 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
606 // `obj.field`).
607 //
608 // This means that after the execution of this slow path, both `ref`
609 // and `obj.field` will be up-to-date; i.e., after the flip, both will
610 // hold the same to-space reference (unless another thread installed
611 // another object reference (different from `ref`) in `obj.field`).
612 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
613  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)614   ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
615                                               Location ref,
616                                               CpuRegister obj,
617                                               const Address& field_addr,
618                                               bool unpoison_ref_before_marking,
619                                               CpuRegister temp1,
620                                               CpuRegister temp2)
621       : SlowPathCode(instruction),
622         ref_(ref),
623         obj_(obj),
624         field_addr_(field_addr),
625         unpoison_ref_before_marking_(unpoison_ref_before_marking),
626         temp1_(temp1),
627         temp2_(temp2) {
628   }
629 
GetDescription() const630   const char* GetDescription() const override {
631     return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
632   }
633 
EmitNativeCode(CodeGenerator * codegen)634   void EmitNativeCode(CodeGenerator* codegen) override {
635     DCHECK(codegen->EmitReadBarrier());
636     LocationSummary* locations = instruction_->GetLocations();
637     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
638     Register ref_reg = ref_cpu_reg.AsRegister();
639     DCHECK(locations->CanCall());
640     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
641     DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
642         << "Unexpected instruction in read barrier marking and field updating slow path: "
643         << instruction_->DebugName();
644     HInvoke* invoke = instruction_->AsInvoke();
645     DCHECK(IsUnsafeCASReference(invoke) ||
646            IsUnsafeGetAndSetReference(invoke) ||
647            IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
648 
649     __ Bind(GetEntryLabel());
650     if (unpoison_ref_before_marking_) {
651       // Object* ref = ref_addr->AsMirrorPtr()
652       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
653     }
654 
655     // Save the old (unpoisoned) reference.
656     __ movl(temp1_, ref_cpu_reg);
657 
658     // No need to save live registers; it's taken care of by the
659     // entrypoint. Also, there is no need to update the stack mask,
660     // as this runtime call will not trigger a garbage collection.
661     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
662     DCHECK_NE(ref_reg, RSP);
663     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
664     // "Compact" slow path, saving two moves.
665     //
666     // Instead of using the standard runtime calling convention (input
667     // and output in R0):
668     //
669     //   RDI <- ref
670     //   RAX <- ReadBarrierMark(RDI)
671     //   ref <- RAX
672     //
673     // we just use rX (the register containing `ref`) as input and output
674     // of a dedicated entrypoint:
675     //
676     //   rX <- ReadBarrierMarkRegX(rX)
677     //
678     int32_t entry_point_offset =
679         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
680     // This runtime call does not require a stack map.
681     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
682 
683     // If the new reference is different from the old reference,
684     // update the field in the holder (`*field_addr`).
685     //
686     // Note that this field could also hold a different object, if
687     // another thread had concurrently changed it. In that case, the
688     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
689     // operation below would abort the CAS, leaving the field as-is.
690     NearLabel done;
691     __ cmpl(temp1_, ref_cpu_reg);
692     __ j(kEqual, &done);
693 
694     // Update the holder's field atomically.  This may fail if
695     // mutator updates before us, but it's OK.  This is achived
696     // using a strong compare-and-set (CAS) operation with relaxed
697     // memory synchronization ordering, where the expected value is
698     // the old reference and the desired value is the new reference.
699     // This operation is implemented with a 32-bit LOCK CMPXLCHG
700     // instruction, which requires the expected value (the old
701     // reference) to be in EAX.  Save RAX beforehand, and move the
702     // expected value (stored in `temp1_`) into EAX.
703     __ movq(temp2_, CpuRegister(RAX));
704     __ movl(CpuRegister(RAX), temp1_);
705 
706     // Convenience aliases.
707     CpuRegister base = obj_;
708     CpuRegister expected = CpuRegister(RAX);
709     CpuRegister value = ref_cpu_reg;
710 
711     bool base_equals_value = (base.AsRegister() == value.AsRegister());
712     Register value_reg = ref_reg;
713     if (kPoisonHeapReferences) {
714       if (base_equals_value) {
715         // If `base` and `value` are the same register location, move
716         // `value_reg` to a temporary register.  This way, poisoning
717         // `value_reg` won't invalidate `base`.
718         value_reg = temp1_.AsRegister();
719         __ movl(CpuRegister(value_reg), base);
720       }
721 
722       // Check that the register allocator did not assign the location
723       // of `expected` (RAX) to `value` nor to `base`, so that heap
724       // poisoning (when enabled) works as intended below.
725       // - If `value` were equal to `expected`, both references would
726       //   be poisoned twice, meaning they would not be poisoned at
727       //   all, as heap poisoning uses address negation.
728       // - If `base` were equal to `expected`, poisoning `expected`
729       //   would invalidate `base`.
730       DCHECK_NE(value_reg, expected.AsRegister());
731       DCHECK_NE(base.AsRegister(), expected.AsRegister());
732 
733       __ PoisonHeapReference(expected);
734       __ PoisonHeapReference(CpuRegister(value_reg));
735     }
736 
737     __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
738 
739     // If heap poisoning is enabled, we need to unpoison the values
740     // that were poisoned earlier.
741     if (kPoisonHeapReferences) {
742       if (base_equals_value) {
743         // `value_reg` has been moved to a temporary register, no need
744         // to unpoison it.
745       } else {
746         __ UnpoisonHeapReference(CpuRegister(value_reg));
747       }
748       // No need to unpoison `expected` (RAX), as it is be overwritten below.
749     }
750 
751     // Restore RAX.
752     __ movq(CpuRegister(RAX), temp2_);
753 
754     __ Bind(&done);
755     __ jmp(GetExitLabel());
756   }
757 
758  private:
759   // The location (register) of the marked object reference.
760   const Location ref_;
761   // The register containing the object holding the marked object reference field.
762   const CpuRegister obj_;
763   // The address of the marked reference field.  The base of this address must be `obj_`.
764   const Address field_addr_;
765 
766   // Should the reference in `ref_` be unpoisoned prior to marking it?
767   const bool unpoison_ref_before_marking_;
768 
769   const CpuRegister temp1_;
770   const CpuRegister temp2_;
771 
772   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
773 };
774 
775 // Slow path generating a read barrier for a heap reference.
776 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
777  public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)778   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
779                                             Location out,
780                                             Location ref,
781                                             Location obj,
782                                             uint32_t offset,
783                                             Location index)
784       : SlowPathCode(instruction),
785         out_(out),
786         ref_(ref),
787         obj_(obj),
788         offset_(offset),
789         index_(index) {
790     // If `obj` is equal to `out` or `ref`, it means the initial
791     // object has been overwritten by (or after) the heap object
792     // reference load to be instrumented, e.g.:
793     //
794     //   __ movl(out, Address(out, offset));
795     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
796     //
797     // In that case, we have lost the information about the original
798     // object, and the emitted read barrier cannot work properly.
799     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
800     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
801 }
802 
EmitNativeCode(CodeGenerator * codegen)803   void EmitNativeCode(CodeGenerator* codegen) override {
804     DCHECK(codegen->EmitReadBarrier());
805     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
806     LocationSummary* locations = instruction_->GetLocations();
807     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
808     DCHECK(locations->CanCall());
809     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
810     DCHECK(instruction_->IsInstanceFieldGet() ||
811            instruction_->IsStaticFieldGet() ||
812            instruction_->IsArrayGet() ||
813            instruction_->IsInstanceOf() ||
814            instruction_->IsCheckCast() ||
815            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
816         << "Unexpected instruction in read barrier for heap reference slow path: "
817         << instruction_->DebugName();
818 
819     __ Bind(GetEntryLabel());
820     SaveLiveRegisters(codegen, locations);
821 
822     // We may have to change the index's value, but as `index_` is a
823     // constant member (like other "inputs" of this slow path),
824     // introduce a copy of it, `index`.
825     Location index = index_;
826     if (index_.IsValid()) {
827       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
828       if (instruction_->IsArrayGet()) {
829         // Compute real offset and store it in index_.
830         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
831         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
832         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
833           // We are about to change the value of `index_reg` (see the
834           // calls to art::x86_64::X86_64Assembler::shll and
835           // art::x86_64::X86_64Assembler::AddImmediate below), but it
836           // has not been saved by the previous call to
837           // art::SlowPathCode::SaveLiveRegisters, as it is a
838           // callee-save register --
839           // art::SlowPathCode::SaveLiveRegisters does not consider
840           // callee-save registers, as it has been designed with the
841           // assumption that callee-save registers are supposed to be
842           // handled by the called function.  So, as a callee-save
843           // register, `index_reg` _would_ eventually be saved onto
844           // the stack, but it would be too late: we would have
845           // changed its value earlier.  Therefore, we manually save
846           // it here into another freely available register,
847           // `free_reg`, chosen of course among the caller-save
848           // registers (as a callee-save `free_reg` register would
849           // exhibit the same problem).
850           //
851           // Note we could have requested a temporary register from
852           // the register allocator instead; but we prefer not to, as
853           // this is a slow path, and we know we can find a
854           // caller-save register that is available.
855           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
856           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
857           index_reg = free_reg;
858           index = Location::RegisterLocation(index_reg);
859         } else {
860           // The initial register stored in `index_` has already been
861           // saved in the call to art::SlowPathCode::SaveLiveRegisters
862           // (as it is not a callee-save register), so we can freely
863           // use it.
864         }
865         // Shifting the index value contained in `index_reg` by the
866         // scale factor (2) cannot overflow in practice, as the
867         // runtime is unable to allocate object arrays with a size
868         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
869         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
870         static_assert(
871             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
872             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
873         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
874       } else {
875         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
876         // intrinsics, `index_` is not shifted by a scale factor of 2
877         // (as in the case of ArrayGet), as it is actually an offset
878         // to an object field within an object.
879         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
880         DCHECK(instruction_->GetLocations()->Intrinsified());
881         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
882                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
883                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) ||
884                (instruction_->AsInvoke()->GetIntrinsic() ==
885                     Intrinsics::kJdkUnsafeGetReferenceVolatile) ||
886                (instruction_->AsInvoke()->GetIntrinsic() ==
887                     Intrinsics::kJdkUnsafeGetReferenceAcquire))
888             << instruction_->AsInvoke()->GetIntrinsic();
889         DCHECK_EQ(offset_, 0U);
890         DCHECK(index_.IsRegister());
891       }
892     }
893 
894     // We're moving two or three locations to locations that could
895     // overlap, so we need a parallel move resolver.
896     InvokeRuntimeCallingConvention calling_convention;
897     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
898     parallel_move.AddMove(ref_,
899                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
900                           DataType::Type::kReference,
901                           nullptr);
902     parallel_move.AddMove(obj_,
903                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
904                           DataType::Type::kReference,
905                           nullptr);
906     if (index.IsValid()) {
907       parallel_move.AddMove(index,
908                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
909                             DataType::Type::kInt32,
910                             nullptr);
911       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
912     } else {
913       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
914       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
915     }
916     x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, this);
917     CheckEntrypointTypes<
918         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
919     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
920 
921     RestoreLiveRegisters(codegen, locations);
922     __ jmp(GetExitLabel());
923   }
924 
GetDescription() const925   const char* GetDescription() const override {
926     return "ReadBarrierForHeapReferenceSlowPathX86_64";
927   }
928 
929  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)930   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
931     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
932     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
933     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
934       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
935         return static_cast<CpuRegister>(i);
936       }
937     }
938     // We shall never fail to find a free caller-save register, as
939     // there are more than two core caller-save registers on x86-64
940     // (meaning it is possible to find one which is different from
941     // `ref` and `obj`).
942     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
943     LOG(FATAL) << "Could not find a free caller-save register";
944     UNREACHABLE();
945   }
946 
947   const Location out_;
948   const Location ref_;
949   const Location obj_;
950   const uint32_t offset_;
951   // An additional location containing an index to an array.
952   // Only used for HArrayGet and the UnsafeGetObject &
953   // UnsafeGetObjectVolatile intrinsics.
954   const Location index_;
955 
956   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
957 };
958 
959 // Slow path generating a read barrier for a GC root.
960 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
961  public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)962   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
963       : SlowPathCode(instruction), out_(out), root_(root) {
964   }
965 
EmitNativeCode(CodeGenerator * codegen)966   void EmitNativeCode(CodeGenerator* codegen) override {
967     DCHECK(codegen->EmitReadBarrier());
968     LocationSummary* locations = instruction_->GetLocations();
969     DCHECK(locations->CanCall());
970     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
971     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
972         << "Unexpected instruction in read barrier for GC root slow path: "
973         << instruction_->DebugName();
974 
975     __ Bind(GetEntryLabel());
976     SaveLiveRegisters(codegen, locations);
977 
978     InvokeRuntimeCallingConvention calling_convention;
979     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
980     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
981     x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, this);
982     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
983     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
984 
985     RestoreLiveRegisters(codegen, locations);
986     __ jmp(GetExitLabel());
987   }
988 
GetDescription() const989   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
990 
991  private:
992   const Location out_;
993   const Location root_;
994 
995   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
996 };
997 
998 class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
999  public:
MethodEntryExitHooksSlowPathX86_64(HInstruction * instruction)1000   explicit MethodEntryExitHooksSlowPathX86_64(HInstruction* instruction)
1001       : SlowPathCode(instruction) {}
1002 
EmitNativeCode(CodeGenerator * codegen)1003   void EmitNativeCode(CodeGenerator* codegen) override {
1004     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1005     LocationSummary* locations = instruction_->GetLocations();
1006     QuickEntrypointEnum entry_point =
1007         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
1008     __ Bind(GetEntryLabel());
1009     SaveLiveRegisters(codegen, locations);
1010     if (instruction_->IsMethodExitHook()) {
1011       // Load FrameSize to pass to the exit hook.
1012       __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize()));
1013     }
1014     x86_64_codegen->InvokeRuntime(entry_point, instruction_, this);
1015     RestoreLiveRegisters(codegen, locations);
1016     __ jmp(GetExitLabel());
1017   }
1018 
GetDescription() const1019   const char* GetDescription() const override {
1020     return "MethodEntryExitHooksSlowPath";
1021   }
1022 
1023  private:
1024   DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64);
1025 };
1026 
1027 class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
1028  public:
CompileOptimizedSlowPathX86_64(HSuspendCheck * suspend_check,uint64_t counter_address)1029   CompileOptimizedSlowPathX86_64(HSuspendCheck* suspend_check, uint64_t counter_address)
1030       : SlowPathCode(suspend_check),
1031         counter_address_(counter_address) {}
1032 
EmitNativeCode(CodeGenerator * codegen)1033   void EmitNativeCode(CodeGenerator* codegen) override {
1034     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1035     __ Bind(GetEntryLabel());
1036     __ movq(CpuRegister(TMP), Immediate(counter_address_));
1037     __ movw(Address(CpuRegister(TMP), 0), Immediate(ProfilingInfo::GetOptimizeThreshold()));
1038     if (instruction_ != nullptr) {
1039       // Only saves full width XMM for SIMD.
1040       SaveLiveRegisters(codegen, instruction_->GetLocations());
1041     }
1042     x86_64_codegen->GenerateInvokeRuntime(
1043         GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1044     if (instruction_ != nullptr) {
1045       // Only restores full width XMM for SIMD.
1046       RestoreLiveRegisters(codegen, instruction_->GetLocations());
1047     }
1048     __ jmp(GetExitLabel());
1049   }
1050 
GetDescription() const1051   const char* GetDescription() const override {
1052     return "CompileOptimizedSlowPath";
1053   }
1054 
1055  private:
1056   uint64_t counter_address_;
1057 
1058   DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86_64);
1059 };
1060 
1061 #undef __
1062 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1063 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
1064 
X86_64IntegerCondition(IfCondition cond)1065 inline Condition X86_64IntegerCondition(IfCondition cond) {
1066   switch (cond) {
1067     case kCondEQ: return kEqual;
1068     case kCondNE: return kNotEqual;
1069     case kCondLT: return kLess;
1070     case kCondLE: return kLessEqual;
1071     case kCondGT: return kGreater;
1072     case kCondGE: return kGreaterEqual;
1073     case kCondB:  return kBelow;
1074     case kCondBE: return kBelowEqual;
1075     case kCondA:  return kAbove;
1076     case kCondAE: return kAboveEqual;
1077   }
1078   LOG(FATAL) << "Unreachable";
1079   UNREACHABLE();
1080 }
1081 
1082 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)1083 inline Condition X86_64FPCondition(IfCondition cond) {
1084   switch (cond) {
1085     case kCondEQ: return kEqual;
1086     case kCondNE: return kNotEqual;
1087     case kCondLT: return kBelow;
1088     case kCondLE: return kBelowEqual;
1089     case kCondGT: return kAbove;
1090     case kCondGE: return kAboveEqual;
1091     default:      break;  // should not happen
1092   }
1093   LOG(FATAL) << "Unreachable";
1094   UNREACHABLE();
1095 }
1096 
BlockNonVolatileXmmRegisters(LocationSummary * locations)1097 void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
1098   // We have to ensure that the native code we call directly (such as @CriticalNative
1099   // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
1100   // which are non-volatile for ART, but volatile for Native calls.  This will ensure
1101   // that they are saved in the prologue and properly restored.
1102   for (FloatRegister fp_reg : non_volatile_xmm_regs) {
1103     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
1104   }
1105 }
1106 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)1107 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
1108     const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
1109     [[maybe_unused]] ArtMethod* method) {
1110   return desired_dispatch_info;
1111 }
1112 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)1113 void CodeGeneratorX86_64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
1114   switch (load_kind) {
1115     case MethodLoadKind::kBootImageLinkTimePcRelative:
1116       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1117       __ leal(temp.AsRegister<CpuRegister>(),
1118               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1119       RecordBootImageMethodPatch(invoke);
1120       break;
1121     case MethodLoadKind::kBootImageRelRo: {
1122       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1123       __ movl(temp.AsRegister<CpuRegister>(),
1124               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1125       RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1126       break;
1127     }
1128     case MethodLoadKind::kAppImageRelRo: {
1129       DCHECK(GetCompilerOptions().IsAppImage());
1130       __ movl(temp.AsRegister<CpuRegister>(),
1131               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1132       RecordAppImageMethodPatch(invoke);
1133       break;
1134     }
1135     case MethodLoadKind::kBssEntry: {
1136       __ movq(temp.AsRegister<CpuRegister>(),
1137               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1138       RecordMethodBssEntryPatch(invoke);
1139       // No need for memory fence, thanks to the x86-64 memory model.
1140       break;
1141     }
1142     case MethodLoadKind::kJitDirectAddress: {
1143       Load64BitValue(temp.AsRegister<CpuRegister>(),
1144                      reinterpret_cast<int64_t>(invoke->GetResolvedMethod()));
1145       break;
1146     }
1147     case MethodLoadKind::kRuntimeCall: {
1148       // Test situation, don't do anything.
1149       break;
1150     }
1151     default: {
1152       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
1153       UNREACHABLE();
1154     }
1155   }
1156 }
1157 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)1158 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
1159     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
1160   // All registers are assumed to be correctly set up.
1161 
1162   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
1163   switch (invoke->GetMethodLoadKind()) {
1164     case MethodLoadKind::kStringInit: {
1165       // temp = thread->string_init_entrypoint
1166       uint32_t offset =
1167           GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
1168       __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
1169       break;
1170     }
1171     case MethodLoadKind::kRecursive: {
1172       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
1173       break;
1174     }
1175     case MethodLoadKind::kRuntimeCall: {
1176       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1177       return;  // No code pointer retrieval; the runtime performs the call directly.
1178     }
1179     case MethodLoadKind::kBootImageLinkTimePcRelative:
1180       // For kCallCriticalNative we skip loading the method and do the call directly.
1181       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
1182         break;
1183       }
1184       FALLTHROUGH_INTENDED;
1185     default: {
1186       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
1187       break;
1188     }
1189   }
1190 
1191   switch (invoke->GetCodePtrLocation()) {
1192     case CodePtrLocation::kCallSelf:
1193       DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
1194       __ call(&frame_entry_label_);
1195       RecordPcInfo(invoke, slow_path);
1196       break;
1197     case CodePtrLocation::kCallCriticalNative: {
1198       size_t out_frame_size =
1199           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
1200                                     kNativeStackAlignment,
1201                                     GetCriticalNativeDirectCallFrameSize>(invoke);
1202       if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
1203         DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1204         __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1205         RecordBootImageJniEntrypointPatch(invoke);
1206       } else {
1207         // (callee_method + offset_of_jni_entry_point)()
1208         __ call(Address(callee_method.AsRegister<CpuRegister>(),
1209                          ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
1210       }
1211       RecordPcInfo(invoke, slow_path);
1212       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
1213       switch (invoke->GetType()) {
1214         case DataType::Type::kBool:
1215           __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
1216           break;
1217         case DataType::Type::kInt8:
1218           __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
1219           break;
1220         case DataType::Type::kUint16:
1221           __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
1222           break;
1223         case DataType::Type::kInt16:
1224           __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
1225           break;
1226         case DataType::Type::kInt32:
1227         case DataType::Type::kInt64:
1228         case DataType::Type::kFloat32:
1229         case DataType::Type::kFloat64:
1230         case DataType::Type::kVoid:
1231           break;
1232         default:
1233           DCHECK(false) << invoke->GetType();
1234           break;
1235       }
1236       if (out_frame_size != 0u) {
1237         DecreaseFrame(out_frame_size);
1238       }
1239       break;
1240     }
1241     case CodePtrLocation::kCallArtMethod:
1242       // (callee_method + offset_of_quick_compiled_code)()
1243       __ call(Address(callee_method.AsRegister<CpuRegister>(),
1244                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1245                           kX86_64PointerSize).SizeValue()));
1246       RecordPcInfo(invoke, slow_path);
1247       break;
1248   }
1249 
1250   DCHECK(!IsLeafMethod());
1251 }
1252 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1253 void CodeGeneratorX86_64::GenerateVirtualCall(
1254     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1255   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1256   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1257       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1258 
1259   // Use the calling convention instead of the location of the receiver, as
1260   // intrinsics may have put the receiver in a different register. In the intrinsics
1261   // slow path, the arguments have been moved to the right place, so here we are
1262   // guaranteed that the receiver is the first register of the calling convention.
1263   InvokeDexCallingConvention calling_convention;
1264   Register receiver = calling_convention.GetRegisterAt(0);
1265 
1266   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1267   // /* HeapReference<Class> */ temp = receiver->klass_
1268   __ movl(temp, Address(CpuRegister(receiver), class_offset));
1269   MaybeRecordImplicitNullCheck(invoke);
1270   // Instead of simply (possibly) unpoisoning `temp` here, we should
1271   // emit a read barrier for the previous class reference load.
1272   // However this is not required in practice, as this is an
1273   // intermediate/temporary reference and because the current
1274   // concurrent copying collector keeps the from-space memory
1275   // intact/accessible until the end of the marking phase (the
1276   // concurrent copying collector may not in the future).
1277   __ MaybeUnpoisonHeapReference(temp);
1278 
1279   MaybeGenerateInlineCacheCheck(invoke, temp);
1280 
1281   // temp = temp->GetMethodAt(method_offset);
1282   __ movq(temp, Address(temp, method_offset));
1283   // call temp->GetEntryPoint();
1284   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1285       kX86_64PointerSize).SizeValue()));
1286   RecordPcInfo(invoke, slow_path);
1287 }
1288 
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1289 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1290   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1291   __ Bind(&boot_image_other_patches_.back().label);
1292 }
1293 
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1294 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1295   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1296   __ Bind(&boot_image_other_patches_.back().label);
1297 }
1298 
RecordBootImageMethodPatch(HInvoke * invoke)1299 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvoke* invoke) {
1300   boot_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1301                                           invoke->GetResolvedMethodReference().index);
1302   __ Bind(&boot_image_method_patches_.back().label);
1303 }
1304 
RecordAppImageMethodPatch(HInvoke * invoke)1305 void CodeGeneratorX86_64::RecordAppImageMethodPatch(HInvoke* invoke) {
1306   app_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1307                                          invoke->GetResolvedMethodReference().index);
1308   __ Bind(&app_image_method_patches_.back().label);
1309 }
1310 
RecordMethodBssEntryPatch(HInvoke * invoke)1311 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvoke* invoke) {
1312   DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
1313          GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
1314          ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
1315                          invoke->GetMethodReference().dex_file));
1316   method_bss_entry_patches_.emplace_back(invoke->GetMethodReference().dex_file,
1317                                          invoke->GetMethodReference().index);
1318   __ Bind(&method_bss_entry_patches_.back().label);
1319 }
1320 
RecordBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1321 void CodeGeneratorX86_64::RecordBootImageTypePatch(const DexFile& dex_file,
1322                                                    dex::TypeIndex type_index) {
1323   boot_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1324   __ Bind(&boot_image_type_patches_.back().label);
1325 }
1326 
RecordAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1327 void CodeGeneratorX86_64::RecordAppImageTypePatch(const DexFile& dex_file,
1328                                                   dex::TypeIndex type_index) {
1329   app_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1330   __ Bind(&app_image_type_patches_.back().label);
1331 }
1332 
NewTypeBssEntryPatch(HLoadClass * load_class)1333 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1334   ArenaDeque<PatchInfo<Label>>* patches = nullptr;
1335   switch (load_class->GetLoadKind()) {
1336     case HLoadClass::LoadKind::kBssEntry:
1337       patches = &type_bss_entry_patches_;
1338       break;
1339     case HLoadClass::LoadKind::kBssEntryPublic:
1340       patches = &public_type_bss_entry_patches_;
1341       break;
1342     case HLoadClass::LoadKind::kBssEntryPackage:
1343       patches = &package_type_bss_entry_patches_;
1344       break;
1345     default:
1346       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
1347       UNREACHABLE();
1348   }
1349   patches->emplace_back(&load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1350   return &patches->back().label;
1351 }
1352 
RecordBootImageStringPatch(HLoadString * load_string)1353 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1354   boot_image_string_patches_.emplace_back(
1355       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1356   __ Bind(&boot_image_string_patches_.back().label);
1357 }
1358 
NewStringBssEntryPatch(HLoadString * load_string)1359 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1360   string_bss_entry_patches_.emplace_back(
1361       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1362   return &string_bss_entry_patches_.back().label;
1363 }
1364 
NewMethodTypeBssEntryPatch(HLoadMethodType * load_method_type)1365 Label* CodeGeneratorX86_64::NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type) {
1366   method_type_bss_entry_patches_.emplace_back(
1367       &load_method_type->GetDexFile(), load_method_type->GetProtoIndex().index_);
1368   return &method_type_bss_entry_patches_.back().label;
1369 }
1370 
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)1371 void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
1372   boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1373                                                   invoke->GetResolvedMethodReference().index);
1374   __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
1375 }
1376 
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1377 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1378   if (GetCompilerOptions().IsBootImage()) {
1379     __ leal(reg,
1380             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1381     RecordBootImageIntrinsicPatch(boot_image_reference);
1382   } else if (GetCompilerOptions().GetCompilePic()) {
1383     __ movl(reg,
1384             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1385     RecordBootImageRelRoPatch(boot_image_reference);
1386   } else {
1387     DCHECK(GetCompilerOptions().IsJitCompiler());
1388     gc::Heap* heap = Runtime::Current()->GetHeap();
1389     DCHECK(!heap->GetBootImageSpaces().empty());
1390     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1391     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1392   }
1393 }
1394 
LoadIntrinsicDeclaringClass(CpuRegister reg,HInvoke * invoke)1395 void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) {
1396   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
1397   if (GetCompilerOptions().IsBootImage()) {
1398     // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1399     __ leal(reg,
1400             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1401     MethodReference target_method = invoke->GetResolvedMethodReference();
1402     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1403     boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1404     __ Bind(&boot_image_type_patches_.back().label);
1405   } else {
1406     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
1407     LoadBootImageAddress(reg, boot_image_offset);
1408   }
1409 }
1410 
LoadClassRootForIntrinsic(CpuRegister reg,ClassRoot class_root)1411 void CodeGeneratorX86_64::LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root) {
1412   if (GetCompilerOptions().IsBootImage()) {
1413     ScopedObjectAccess soa(Thread::Current());
1414     ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
1415     boot_image_type_patches_.emplace_back(&klass->GetDexFile(), klass->GetDexTypeIndex().index_);
1416     __ Bind(&boot_image_type_patches_.back().label);
1417   } else {
1418     uint32_t boot_image_offset = GetBootImageOffset(class_root);
1419     LoadBootImageAddress(reg, boot_image_offset);
1420   }
1421 }
1422 
1423 // The label points to the end of the "movl" or another instruction but the literal offset
1424 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1425 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1426 
1427 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1428 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1429     const ArenaDeque<PatchInfo<Label>>& infos,
1430     ArenaVector<linker::LinkerPatch>* linker_patches) {
1431   for (const PatchInfo<Label>& info : infos) {
1432     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1433     linker_patches->push_back(
1434         Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1435   }
1436 }
1437 
1438 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1439 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1440                                      const DexFile* target_dex_file,
1441                                      uint32_t pc_insn_offset,
1442                                      uint32_t boot_image_offset) {
1443   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
1444   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1445 }
1446 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1447 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1448   DCHECK(linker_patches->empty());
1449   size_t size =
1450       boot_image_method_patches_.size() +
1451       app_image_method_patches_.size() +
1452       method_bss_entry_patches_.size() +
1453       boot_image_type_patches_.size() +
1454       app_image_type_patches_.size() +
1455       type_bss_entry_patches_.size() +
1456       public_type_bss_entry_patches_.size() +
1457       package_type_bss_entry_patches_.size() +
1458       boot_image_string_patches_.size() +
1459       string_bss_entry_patches_.size() +
1460       method_type_bss_entry_patches_.size() +
1461       boot_image_jni_entrypoint_patches_.size() +
1462       boot_image_other_patches_.size();
1463   linker_patches->reserve(size);
1464   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1465     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1466         boot_image_method_patches_, linker_patches);
1467     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1468         boot_image_type_patches_, linker_patches);
1469     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1470         boot_image_string_patches_, linker_patches);
1471   } else {
1472     DCHECK(boot_image_method_patches_.empty());
1473     DCHECK(boot_image_type_patches_.empty());
1474     DCHECK(boot_image_string_patches_.empty());
1475   }
1476   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_method_patches_.empty());
1477   DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
1478   if (GetCompilerOptions().IsBootImage()) {
1479     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1480         boot_image_other_patches_, linker_patches);
1481   } else {
1482     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
1483         boot_image_other_patches_, linker_patches);
1484     EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodAppImageRelRoPatch>(
1485         app_image_method_patches_, linker_patches);
1486     EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
1487         app_image_type_patches_, linker_patches);
1488   }
1489   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1490       method_bss_entry_patches_, linker_patches);
1491   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1492       type_bss_entry_patches_, linker_patches);
1493   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
1494       public_type_bss_entry_patches_, linker_patches);
1495   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
1496       package_type_bss_entry_patches_, linker_patches);
1497   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1498       string_bss_entry_patches_, linker_patches);
1499   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodTypeBssEntryPatch>(
1500       method_type_bss_entry_patches_, linker_patches);
1501   EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
1502       boot_image_jni_entrypoint_patches_, linker_patches);
1503   DCHECK_EQ(size, linker_patches->size());
1504 }
1505 
DumpCoreRegister(std::ostream & stream,int reg) const1506 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1507   stream << Register(reg);
1508 }
1509 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1510 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1511   stream << FloatRegister(reg);
1512 }
1513 
GetInstructionSetFeatures() const1514 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1515   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1516 }
1517 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1518 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1519   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1520   return kX86_64WordSize;
1521 }
1522 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1523 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1524   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1525   return kX86_64WordSize;
1526 }
1527 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1528 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1529   if (GetGraph()->HasSIMD()) {
1530     __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1531   } else {
1532     __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1533   }
1534   return GetSlowPathFPWidth();
1535 }
1536 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1537 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1538   if (GetGraph()->HasSIMD()) {
1539     __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1540   } else {
1541     __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1542   }
1543   return GetSlowPathFPWidth();
1544 }
1545 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,SlowPathCode * slow_path)1546 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1547                                         HInstruction* instruction,
1548                                         SlowPathCode* slow_path) {
1549   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1550   GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1551   if (EntrypointRequiresStackMap(entrypoint)) {
1552     RecordPcInfo(instruction, slow_path);
1553   }
1554 }
1555 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1556 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1557                                                               HInstruction* instruction,
1558                                                               SlowPathCode* slow_path) {
1559   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1560   GenerateInvokeRuntime(entry_point_offset);
1561 }
1562 
GenerateInvokeRuntime(int32_t entry_point_offset)1563 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1564   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1565 }
1566 
1567 namespace detail {
1568 
1569 // Mark which intrinsics we don't have handcrafted code for.
1570 template <Intrinsics T>
1571 struct IsUnimplemented {
1572   bool is_unimplemented = false;
1573 };
1574 
1575 #define TRUE_OVERRIDE(Name)                     \
1576   template <>                                   \
1577   struct IsUnimplemented<Intrinsics::k##Name> { \
1578     bool is_unimplemented = true;               \
1579   };
1580 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE)
1581 #undef TRUE_OVERRIDE
1582 
1583 static constexpr bool kIsIntrinsicUnimplemented[] = {
1584     false,  // kNone
1585 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1586     IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1587     ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1588 #undef IS_UNIMPLEMENTED
1589 };
1590 
1591 }  // namespace detail
1592 
1593 static constexpr int kNumberOfCpuRegisterPairs = 0;
1594 // Use a fake return address register to mimic Quick.
1595 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1596 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1597                                          const CompilerOptions& compiler_options,
1598                                          OptimizingCompilerStats* stats)
1599     : CodeGenerator(graph,
1600                     kNumberOfCpuRegisters,
1601                     kNumberOfFloatRegisters,
1602                     kNumberOfCpuRegisterPairs,
1603                     ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
1604                         | (1 << kFakeReturnRegister),
1605                     ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)),
1606                     compiler_options,
1607                     stats,
1608                     ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1609       block_labels_(nullptr),
1610       location_builder_(graph, this),
1611       instruction_visitor_(graph, this),
1612       move_resolver_(graph->GetAllocator(), this),
1613       assembler_(graph->GetAllocator(),
1614                  compiler_options.GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures()),
1615       constant_area_start_(0),
1616       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1617       app_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1618       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1619       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1620       app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1621       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1622       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1623       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1624       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1625       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1626       method_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1627       boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1628       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1629       jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1630       jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1631       jit_method_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1632       fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1633   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1634 }
1635 
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1636 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1637                                                                CodeGeneratorX86_64* codegen)
1638       : InstructionCodeGenerator(graph, codegen),
1639         assembler_(codegen->GetAssembler()),
1640         codegen_(codegen) {}
1641 
SetupBlockedRegisters() const1642 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1643   // Stack register is always reserved.
1644   blocked_core_registers_[RSP] = true;
1645 
1646   // Block the register used as TMP.
1647   blocked_core_registers_[TMP] = true;
1648 }
1649 
DWARFReg(Register reg)1650 static dwarf::Reg DWARFReg(Register reg) {
1651   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1652 }
1653 
DWARFReg(FloatRegister reg)1654 static dwarf::Reg DWARFReg(FloatRegister reg) {
1655   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1656 }
1657 
VisitMethodEntryHook(HMethodEntryHook * method_hook)1658 void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1659   LocationSummary* locations = new (GetGraph()->GetAllocator())
1660       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1661   // We use rdtsc to record the timestamp for method profiling. rdtsc returns
1662   // two 32-bit values in EAX + EDX even on 64-bit architectures.
1663   locations->AddTemp(Location::RegisterLocation(RAX));
1664   locations->AddTemp(Location::RegisterLocation(RDX));
1665 }
1666 
GenerateMethodEntryExitHook(HInstruction * instruction)1667 void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1668   SlowPathCode* slow_path =
1669       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
1670   LocationSummary* locations = instruction->GetLocations();
1671   codegen_->AddSlowPath(slow_path);
1672 
1673   if (instruction->IsMethodExitHook()) {
1674     // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1675     // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1676     // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1677     // disabled in debuggable runtime. The other bit is used when this method itself requires a
1678     // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1679     __ cmpl(Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()),
1680             Immediate(0));
1681     __ j(kNotEqual, slow_path->GetEntryLabel());
1682   }
1683 
1684   uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1685   MemberOffset  offset = instruction->IsMethodExitHook() ?
1686       instrumentation::Instrumentation::HaveMethodExitListenersOffset()
1687       : instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1688   __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value()));
1689   __ cmpb(Address(CpuRegister(TMP), 0),
1690           Immediate(instrumentation::Instrumentation::kFastTraceListeners));
1691   // Check if there are any method entry / exit listeners. If no, continue with execution.
1692   __ j(kLess, slow_path->GetExitLabel());
1693   // Check if there are any slow method entry / exit listeners. If yes, take the slow path.
1694   __ j(kGreater, slow_path->GetEntryLabel());
1695 
1696   // Check if there is place in the buffer for a new entry, if no, take slow path.
1697   CpuRegister init_entry = locations->GetTemp(0).AsRegister<CpuRegister>();
1698   // Use a register that is different from RAX and RDX. RDTSC returns result in RAX and RDX and we
1699   // use curr entry to store the result into the buffer.
1700   CpuRegister curr_entry = CpuRegister(TMP);
1701   DCHECK(curr_entry.AsRegister() != RAX);
1702   DCHECK(curr_entry.AsRegister() != RDX);
1703   uint64_t trace_buffer_curr_entry_offset =
1704       Thread::TraceBufferCurrPtrOffset<kX86_64PointerSize>().SizeValue();
1705   __ gs()->movq(CpuRegister(curr_entry),
1706                 Address::Absolute(trace_buffer_curr_entry_offset, /* no_rip= */ true));
1707   __ subq(CpuRegister(curr_entry), Immediate(kNumEntriesForWallClock * sizeof(void*)));
1708   __ gs()->movq(init_entry,
1709                 Address::Absolute(Thread::TraceBufferPtrOffset<kX86_64PointerSize>().SizeValue(),
1710                                   /* no_rip= */ true));
1711   __ cmpq(curr_entry, init_entry);
1712   __ j(kLess, slow_path->GetEntryLabel());
1713 
1714   // Update the index in the `Thread`.
1715   __ gs()->movq(Address::Absolute(trace_buffer_curr_entry_offset, /* no_rip= */ true),
1716                 CpuRegister(curr_entry));
1717 
1718   // Record method pointer and action.
1719   CpuRegister method = init_entry;
1720   __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1721   // Use last two bits to encode trace method action. For MethodEntry it is 0
1722   // so no need to set the bits since they are 0 already.
1723   if (instruction->IsMethodExitHook()) {
1724     DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1725     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1726     static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1727     __ orq(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1728   }
1729   __ movq(Address(curr_entry, kMethodOffsetInBytes), CpuRegister(method));
1730   // Get the timestamp. rdtsc returns timestamp in RAX + RDX even in 64-bit architectures.
1731   __ rdtsc();
1732   __ shlq(CpuRegister(RDX), Immediate(32));
1733   __ orq(CpuRegister(RAX), CpuRegister(RDX));
1734   __ movq(Address(curr_entry, kTimestampOffsetInBytes), CpuRegister(RAX));
1735   __ Bind(slow_path->GetExitLabel());
1736 }
1737 
VisitMethodEntryHook(HMethodEntryHook * instruction)1738 void InstructionCodeGeneratorX86_64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1739   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1740   DCHECK(codegen_->RequiresCurrentMethod());
1741   GenerateMethodEntryExitHook(instruction);
1742 }
1743 
SetInForReturnValue(HInstruction * instr,LocationSummary * locations)1744 void SetInForReturnValue(HInstruction* instr, LocationSummary* locations) {
1745   switch (instr->InputAt(0)->GetType()) {
1746     case DataType::Type::kReference:
1747     case DataType::Type::kBool:
1748     case DataType::Type::kUint8:
1749     case DataType::Type::kInt8:
1750     case DataType::Type::kUint16:
1751     case DataType::Type::kInt16:
1752     case DataType::Type::kInt32:
1753     case DataType::Type::kInt64:
1754       locations->SetInAt(0, Location::RegisterLocation(RAX));
1755       break;
1756 
1757     case DataType::Type::kFloat32:
1758     case DataType::Type::kFloat64:
1759       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1760       break;
1761 
1762     case DataType::Type::kVoid:
1763       locations->SetInAt(0, Location::NoLocation());
1764       break;
1765 
1766     default:
1767       LOG(FATAL) << "Unexpected return type " << instr->InputAt(0)->GetType();
1768   }
1769 }
1770 
VisitMethodExitHook(HMethodExitHook * method_hook)1771 void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1772   LocationSummary* locations = new (GetGraph()->GetAllocator())
1773       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1774   SetInForReturnValue(method_hook, locations);
1775   // We use rdtsc to record the timestamp for method profiling. rdtsc returns
1776   // two 32-bit values in EAX + EDX even on 64-bit architectures.
1777   locations->AddTemp(Location::RegisterLocation(RAX));
1778   locations->AddTemp(Location::RegisterLocation(RDX));
1779 }
1780 
VisitMethodExitHook(HMethodExitHook * instruction)1781 void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
1782   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1783   DCHECK(codegen_->RequiresCurrentMethod());
1784   GenerateMethodEntryExitHook(instruction);
1785 }
1786 
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1787 void CodeGeneratorX86_64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1788   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1789     NearLabel overflow;
1790     Register method = kMethodRegisterArgument;
1791     if (!is_frame_entry) {
1792       CHECK(RequiresCurrentMethod());
1793       method = TMP;
1794       __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1795     }
1796     __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1797             Immediate(interpreter::kNterpHotnessValue));
1798     __ j(kEqual, &overflow);
1799     __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1800             Immediate(-1));
1801     __ Bind(&overflow);
1802   }
1803 
1804   if (GetGraph()->IsCompilingBaseline() &&
1805       GetGraph()->IsUsefulOptimizing() &&
1806       !Runtime::Current()->IsAotCompiler()) {
1807     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1808     DCHECK(info != nullptr);
1809     CHECK(!HasEmptyFrame());
1810     uint64_t address = reinterpret_cast64<uint64_t>(info) +
1811         ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1812     SlowPathCode* slow_path =
1813         new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64(suspend_check, address);
1814     AddSlowPath(slow_path);
1815     // Note: if the address was in the 32bit range, we could use
1816     // Address::Absolute and avoid this movq.
1817     __ movq(CpuRegister(TMP), Immediate(address));
1818     // With multiple threads, this can overflow. This is OK, we will eventually get to see
1819     // it reaching 0. Also, at this point we have no register available to look
1820     // at the counter directly.
1821     __ addw(Address(CpuRegister(TMP), 0), Immediate(-1));
1822     __ j(kEqual, slow_path->GetEntryLabel());
1823     __ Bind(slow_path->GetExitLabel());
1824   }
1825 }
1826 
GenerateFrameEntry()1827 void CodeGeneratorX86_64::GenerateFrameEntry() {
1828   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1829 
1830   // Check if we need to generate the clinit check. We will jump to the
1831   // resolution stub if the class is not initialized and the executing thread is
1832   // not the thread initializing it.
1833   // We do this before constructing the frame to get the correct stack trace if
1834   // an exception is thrown.
1835   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1836     NearLabel resolution;
1837     // Check if we're visibly initialized.
1838 
1839     // We don't emit a read barrier here to save on code size. We rely on the
1840     // resolution trampoline to do a suspend check before re-entering this code.
1841     __ movl(CpuRegister(TMP),
1842             Address(CpuRegister(kMethodRegisterArgument),
1843                     ArtMethod::DeclaringClassOffset().Int32Value()));
1844     __ cmpb(Address(CpuRegister(TMP), kClassStatusByteOffset),
1845             Immediate(kShiftedVisiblyInitializedValue));
1846     __ j(kAboveEqual, &frame_entry_label_);
1847 
1848     // Check if we're initializing and the thread initializing is the one
1849     // executing the code.
1850     __ cmpb(Address(CpuRegister(TMP), kClassStatusByteOffset),
1851             Immediate(kShiftedInitializingValue));
1852     __ j(kBelow, &resolution);
1853 
1854     __ movl(CpuRegister(TMP),
1855             Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value()));
1856     __ gs()->cmpl(
1857         CpuRegister(TMP),
1858         Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true));
1859     __ j(kEqual, &frame_entry_label_);
1860     __ Bind(&resolution);
1861 
1862     // Jump to the resolution stub.
1863     ThreadOffset64 entrypoint_offset =
1864         GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline);
1865     __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true));
1866   }
1867 
1868   __ Bind(&frame_entry_label_);
1869   bool skip_overflow_check = IsLeafMethod()
1870       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1871   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1872 
1873 
1874   if (!skip_overflow_check) {
1875     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1876     __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1877     RecordPcInfoForFrameOrBlockEntry();
1878   }
1879 
1880   if (!HasEmptyFrame()) {
1881     // Make sure the frame size isn't unreasonably large.
1882     DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1883 
1884     for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1885       Register reg = kCoreCalleeSaves[i];
1886       if (allocated_registers_.ContainsCoreRegister(reg)) {
1887         __ pushq(CpuRegister(reg));
1888         __ cfi().AdjustCFAOffset(kX86_64WordSize);
1889         __ cfi().RelOffset(DWARFReg(reg), 0);
1890       }
1891     }
1892 
1893     int adjust = GetFrameSize() - GetCoreSpillSize();
1894     IncreaseFrame(adjust);
1895     uint32_t xmm_spill_location = GetFpuSpillStart();
1896     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1897 
1898     for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1899       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1900         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1901         __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1902         __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1903       }
1904     }
1905 
1906     // Save the current method if we need it. Note that we do not
1907     // do this in HCurrentMethod, as the instruction might have been removed
1908     // in the SSA graph.
1909     if (RequiresCurrentMethod()) {
1910       CHECK(!HasEmptyFrame());
1911       __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1912               CpuRegister(kMethodRegisterArgument));
1913     }
1914 
1915     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1916       CHECK(!HasEmptyFrame());
1917       // Initialize should_deoptimize flag to 0.
1918       __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1919     }
1920   }
1921 
1922   MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1923 }
1924 
GenerateFrameExit()1925 void CodeGeneratorX86_64::GenerateFrameExit() {
1926   __ cfi().RememberState();
1927   if (!HasEmptyFrame()) {
1928     uint32_t xmm_spill_location = GetFpuSpillStart();
1929     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1930     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1931       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1932         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1933         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1934         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1935       }
1936     }
1937 
1938     int adjust = GetFrameSize() - GetCoreSpillSize();
1939     DecreaseFrame(adjust);
1940 
1941     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1942       Register reg = kCoreCalleeSaves[i];
1943       if (allocated_registers_.ContainsCoreRegister(reg)) {
1944         __ popq(CpuRegister(reg));
1945         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1946         __ cfi().Restore(DWARFReg(reg));
1947       }
1948     }
1949   }
1950   __ ret();
1951   __ cfi().RestoreState();
1952   __ cfi().DefCFAOffset(GetFrameSize());
1953 }
1954 
Bind(HBasicBlock * block)1955 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1956   __ Bind(GetLabelOf(block));
1957 }
1958 
Move(Location destination,Location source)1959 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1960   if (source.Equals(destination)) {
1961     return;
1962   }
1963   if (destination.IsRegister()) {
1964     CpuRegister dest = destination.AsRegister<CpuRegister>();
1965     if (source.IsRegister()) {
1966       __ movq(dest, source.AsRegister<CpuRegister>());
1967     } else if (source.IsFpuRegister()) {
1968       __ movq(dest, source.AsFpuRegister<XmmRegister>());
1969     } else if (source.IsStackSlot()) {
1970       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1971     } else if (source.IsConstant()) {
1972       HConstant* constant = source.GetConstant();
1973       if (constant->IsLongConstant()) {
1974         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1975       } else if (constant->IsDoubleConstant()) {
1976         Load64BitValue(dest, GetInt64ValueOf(constant));
1977       } else {
1978         Load32BitValue(dest, GetInt32ValueOf(constant));
1979       }
1980     } else {
1981       DCHECK(source.IsDoubleStackSlot());
1982       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1983     }
1984   } else if (destination.IsFpuRegister()) {
1985     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1986     if (source.IsRegister()) {
1987       __ movq(dest, source.AsRegister<CpuRegister>());
1988     } else if (source.IsFpuRegister()) {
1989       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1990     } else if (source.IsConstant()) {
1991       HConstant* constant = source.GetConstant();
1992       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1993       if (constant->IsFloatConstant()) {
1994         Load32BitValue(dest, static_cast<int32_t>(value));
1995       } else {
1996         Load64BitValue(dest, value);
1997       }
1998     } else if (source.IsStackSlot()) {
1999       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
2000     } else {
2001       DCHECK(source.IsDoubleStackSlot());
2002       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
2003     }
2004   } else if (destination.IsStackSlot()) {
2005     if (source.IsRegister()) {
2006       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
2007               source.AsRegister<CpuRegister>());
2008     } else if (source.IsFpuRegister()) {
2009       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
2010                source.AsFpuRegister<XmmRegister>());
2011     } else if (source.IsConstant()) {
2012       HConstant* constant = source.GetConstant();
2013       int32_t value = GetInt32ValueOf(constant);
2014       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
2015     } else {
2016       DCHECK(source.IsStackSlot()) << source;
2017       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
2018       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
2019     }
2020   } else {
2021     DCHECK(destination.IsDoubleStackSlot());
2022     if (source.IsRegister()) {
2023       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
2024               source.AsRegister<CpuRegister>());
2025     } else if (source.IsFpuRegister()) {
2026       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
2027                source.AsFpuRegister<XmmRegister>());
2028     } else if (source.IsConstant()) {
2029       HConstant* constant = source.GetConstant();
2030       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
2031       int64_t value = GetInt64ValueOf(constant);
2032       Store64BitValueToStack(destination, value);
2033     } else {
2034       DCHECK(source.IsDoubleStackSlot());
2035       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
2036       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
2037     }
2038   }
2039 }
2040 
LoadFromMemoryNoReference(DataType::Type type,Location dst,Address src)2041 void CodeGeneratorX86_64::LoadFromMemoryNoReference(DataType::Type type,
2042                                                     Location dst,
2043                                                     Address src) {
2044   switch (type) {
2045     case DataType::Type::kBool:
2046     case DataType::Type::kUint8:
2047       __ movzxb(dst.AsRegister<CpuRegister>(), src);
2048       break;
2049     case DataType::Type::kInt8:
2050       __ movsxb(dst.AsRegister<CpuRegister>(), src);
2051       break;
2052     case DataType::Type::kUint16:
2053       __ movzxw(dst.AsRegister<CpuRegister>(), src);
2054       break;
2055     case DataType::Type::kInt16:
2056       __ movsxw(dst.AsRegister<CpuRegister>(), src);
2057       break;
2058     case DataType::Type::kInt32:
2059     case DataType::Type::kUint32:
2060       __ movl(dst.AsRegister<CpuRegister>(), src);
2061       break;
2062     case DataType::Type::kInt64:
2063     case DataType::Type::kUint64:
2064       __ movq(dst.AsRegister<CpuRegister>(), src);
2065       break;
2066     case DataType::Type::kFloat32:
2067       __ movss(dst.AsFpuRegister<XmmRegister>(), src);
2068       break;
2069     case DataType::Type::kFloat64:
2070       __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
2071       break;
2072     case DataType::Type::kVoid:
2073     case DataType::Type::kReference:
2074       LOG(FATAL) << "Unreachable type " << type;
2075       UNREACHABLE();
2076   }
2077 }
2078 
MoveConstant(Location location,int32_t value)2079 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
2080   DCHECK(location.IsRegister());
2081   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
2082 }
2083 
MoveLocation(Location dst,Location src,DataType::Type dst_type)2084 void CodeGeneratorX86_64::MoveLocation(Location dst,
2085                                        Location src,
2086                                        [[maybe_unused]] DataType::Type dst_type) {
2087   Move(dst, src);
2088 }
2089 
AddLocationAsTemp(Location location,LocationSummary * locations)2090 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
2091   if (location.IsRegister()) {
2092     locations->AddTemp(location);
2093   } else {
2094     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2095   }
2096 }
2097 
HandleGoto(HInstruction * got,HBasicBlock * successor)2098 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2099   if (successor->IsExitBlock()) {
2100     DCHECK(got->GetPrevious()->AlwaysThrows());
2101     return;  // no code needed
2102   }
2103 
2104   HBasicBlock* block = got->GetBlock();
2105   HInstruction* previous = got->GetPrevious();
2106 
2107   HLoopInformation* info = block->GetLoopInformation();
2108   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2109     codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
2110     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2111     return;
2112   }
2113 
2114   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2115     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2116   }
2117   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
2118     __ jmp(codegen_->GetLabelOf(successor));
2119   }
2120 }
2121 
VisitGoto(HGoto * got)2122 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
2123   got->SetLocations(nullptr);
2124 }
2125 
VisitGoto(HGoto * got)2126 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
2127   HandleGoto(got, got->GetSuccessor());
2128 }
2129 
VisitTryBoundary(HTryBoundary * try_boundary)2130 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2131   try_boundary->SetLocations(nullptr);
2132 }
2133 
VisitTryBoundary(HTryBoundary * try_boundary)2134 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2135   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2136   if (!successor->IsExitBlock()) {
2137     HandleGoto(try_boundary, successor);
2138   }
2139 }
2140 
VisitExit(HExit * exit)2141 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
2142   exit->SetLocations(nullptr);
2143 }
2144 
VisitExit(HExit * exit)2145 void InstructionCodeGeneratorX86_64::VisitExit([[maybe_unused]] HExit* exit) {}
2146 
2147 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)2148 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
2149                                                      LabelType* true_label,
2150                                                      LabelType* false_label) {
2151   if (cond->IsFPConditionTrueIfNaN()) {
2152     __ j(kUnordered, true_label);
2153   } else if (cond->IsFPConditionFalseIfNaN()) {
2154     __ j(kUnordered, false_label);
2155   }
2156   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
2157 }
2158 
GenerateCompareTest(HCondition * condition)2159 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
2160   LocationSummary* locations = condition->GetLocations();
2161 
2162   Location left = locations->InAt(0);
2163   Location right = locations->InAt(1);
2164   DataType::Type type = condition->InputAt(0)->GetType();
2165   switch (type) {
2166     case DataType::Type::kBool:
2167     case DataType::Type::kUint8:
2168     case DataType::Type::kInt8:
2169     case DataType::Type::kUint16:
2170     case DataType::Type::kInt16:
2171     case DataType::Type::kInt32:
2172     case DataType::Type::kReference: {
2173       codegen_->GenerateIntCompare(left, right);
2174       break;
2175     }
2176     case DataType::Type::kInt64: {
2177       codegen_->GenerateLongCompare(left, right);
2178       break;
2179     }
2180     case DataType::Type::kFloat32: {
2181       if (right.IsFpuRegister()) {
2182         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2183       } else if (right.IsConstant()) {
2184         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2185                    codegen_->LiteralFloatAddress(
2186                        right.GetConstant()->AsFloatConstant()->GetValue()));
2187       } else {
2188         DCHECK(right.IsStackSlot());
2189         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2190                    Address(CpuRegister(RSP), right.GetStackIndex()));
2191       }
2192       break;
2193     }
2194     case DataType::Type::kFloat64: {
2195       if (right.IsFpuRegister()) {
2196         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2197       } else if (right.IsConstant()) {
2198         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2199                    codegen_->LiteralDoubleAddress(
2200                        right.GetConstant()->AsDoubleConstant()->GetValue()));
2201       } else {
2202         DCHECK(right.IsDoubleStackSlot());
2203         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2204                    Address(CpuRegister(RSP), right.GetStackIndex()));
2205       }
2206       break;
2207     }
2208     default:
2209       LOG(FATAL) << "Unexpected condition type " << type;
2210   }
2211 }
2212 
2213 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2214 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
2215                                                                   LabelType* true_target_in,
2216                                                                   LabelType* false_target_in) {
2217   // Generated branching requires both targets to be explicit. If either of the
2218   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2219   LabelType fallthrough_target;
2220   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2221   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2222 
2223   // Generate the comparison to set the CC.
2224   GenerateCompareTest(condition);
2225 
2226   // Now generate the correct jump(s).
2227   DataType::Type type = condition->InputAt(0)->GetType();
2228   switch (type) {
2229     case DataType::Type::kInt64: {
2230       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2231       break;
2232     }
2233     case DataType::Type::kFloat32: {
2234       GenerateFPJumps(condition, true_target, false_target);
2235       break;
2236     }
2237     case DataType::Type::kFloat64: {
2238       GenerateFPJumps(condition, true_target, false_target);
2239       break;
2240     }
2241     default:
2242       LOG(FATAL) << "Unexpected condition type " << type;
2243   }
2244 
2245   if (false_target != &fallthrough_target) {
2246     __ jmp(false_target);
2247   }
2248 
2249   if (fallthrough_target.IsLinked()) {
2250     __ Bind(&fallthrough_target);
2251   }
2252 }
2253 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch,const CompilerOptions & compiler_options)2254 static bool AreEflagsSetFrom(HInstruction* cond,
2255                              HInstruction* branch,
2256                              const CompilerOptions& compiler_options) {
2257   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2258   // are set only strictly before `branch`. We can't use the eflags on long
2259   // conditions if they are materialized due to the complex branching.
2260   return cond->IsCondition() &&
2261          cond->GetNext() == branch &&
2262          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
2263          !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() &&
2264            compiler_options.ProfileBranches());
2265 }
2266 
2267 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2268 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
2269                                                            size_t condition_input_index,
2270                                                            LabelType* true_target,
2271                                                            LabelType* false_target) {
2272   HInstruction* cond = instruction->InputAt(condition_input_index);
2273 
2274   if (true_target == nullptr && false_target == nullptr) {
2275     // Nothing to do. The code always falls through.
2276     return;
2277   } else if (cond->IsIntConstant()) {
2278     // Constant condition, statically compared against "true" (integer value 1).
2279     if (cond->AsIntConstant()->IsTrue()) {
2280       if (true_target != nullptr) {
2281         __ jmp(true_target);
2282       }
2283     } else {
2284       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2285       if (false_target != nullptr) {
2286         __ jmp(false_target);
2287       }
2288     }
2289     return;
2290   }
2291 
2292   // The following code generates these patterns:
2293   //  (1) true_target == nullptr && false_target != nullptr
2294   //        - opposite condition true => branch to false_target
2295   //  (2) true_target != nullptr && false_target == nullptr
2296   //        - condition true => branch to true_target
2297   //  (3) true_target != nullptr && false_target != nullptr
2298   //        - condition true => branch to true_target
2299   //        - branch to false_target
2300   if (IsBooleanValueOrMaterializedCondition(cond)) {
2301     if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) {
2302       if (true_target == nullptr) {
2303         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
2304       } else {
2305         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
2306       }
2307     } else {
2308       // Materialized condition, compare against 0.
2309       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2310       if (lhs.IsRegister()) {
2311         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
2312       } else {
2313         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
2314       }
2315       if (true_target == nullptr) {
2316         __ j(kEqual, false_target);
2317       } else {
2318         __ j(kNotEqual, true_target);
2319       }
2320     }
2321   } else {
2322     // Condition has not been materialized, use its inputs as the
2323     // comparison and its condition as the branch condition.
2324     HCondition* condition = cond->AsCondition();
2325 
2326     // If this is a long or FP comparison that has been folded into
2327     // the HCondition, generate the comparison directly.
2328     DataType::Type type = condition->InputAt(0)->GetType();
2329     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2330       GenerateCompareTestAndBranch(condition, true_target, false_target);
2331       return;
2332     }
2333 
2334     Location lhs = condition->GetLocations()->InAt(0);
2335     Location rhs = condition->GetLocations()->InAt(1);
2336     codegen_->GenerateIntCompare(lhs, rhs);
2337       if (true_target == nullptr) {
2338       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
2339     } else {
2340       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2341     }
2342   }
2343 
2344   // If neither branch falls through (case 3), the conditional branch to `true_target`
2345   // was already emitted (case 2) and we need to emit a jump to `false_target`.
2346   if (true_target != nullptr && false_target != nullptr) {
2347     __ jmp(false_target);
2348   }
2349 }
2350 
VisitIf(HIf * if_instr)2351 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
2352   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2353   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2354     if (GetGraph()->IsCompilingBaseline() &&
2355         codegen_->GetCompilerOptions().ProfileBranches() &&
2356         !Runtime::Current()->IsAotCompiler()) {
2357       locations->SetInAt(0, Location::RequiresRegister());
2358       locations->AddTemp(Location::RequiresRegister());
2359     } else {
2360       locations->SetInAt(0, Location::Any());
2361     }
2362   }
2363 }
2364 
VisitIf(HIf * if_instr)2365 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
2366   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2367   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2368   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2369       nullptr : codegen_->GetLabelOf(true_successor);
2370   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2371       nullptr : codegen_->GetLabelOf(false_successor);
2372   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2373     if (GetGraph()->IsCompilingBaseline() &&
2374         codegen_->GetCompilerOptions().ProfileBranches() &&
2375         !Runtime::Current()->IsAotCompiler()) {
2376       DCHECK(if_instr->InputAt(0)->IsCondition());
2377       CpuRegister temp = if_instr->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
2378       ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2379       DCHECK(info != nullptr);
2380       BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
2381       // Currently, not all If branches are profiled.
2382       if (cache != nullptr) {
2383         uint64_t address =
2384             reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
2385         static_assert(
2386             BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
2387             "Unexpected offsets for BranchCache");
2388         NearLabel done;
2389         Location lhs = if_instr->GetLocations()->InAt(0);
2390         __ movq(CpuRegister(TMP), Immediate(address));
2391         __ movzxw(temp, Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0));
2392         __ addw(temp, Immediate(1));
2393         __ j(kZero, &done);
2394         __ movw(Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0), temp);
2395         __ Bind(&done);
2396       }
2397     }
2398   }
2399   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2400 }
2401 
VisitDeoptimize(HDeoptimize * deoptimize)2402 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2403   LocationSummary* locations = new (GetGraph()->GetAllocator())
2404       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2405   InvokeRuntimeCallingConvention calling_convention;
2406   RegisterSet caller_saves = RegisterSet::Empty();
2407   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2408   locations->SetCustomSlowPathCallerSaves(caller_saves);
2409   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2410     locations->SetInAt(0, Location::Any());
2411   }
2412 }
2413 
VisitDeoptimize(HDeoptimize * deoptimize)2414 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2415   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
2416   GenerateTestAndBranch<Label>(deoptimize,
2417                                /* condition_input_index= */ 0,
2418                                slow_path->GetEntryLabel(),
2419                                /* false_target= */ nullptr);
2420 }
2421 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2422 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2423   LocationSummary* locations = new (GetGraph()->GetAllocator())
2424       LocationSummary(flag, LocationSummary::kNoCall);
2425   locations->SetOut(Location::RequiresRegister());
2426 }
2427 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2428 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2429   __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
2430           Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2431 }
2432 
SelectCanUseCMOV(HSelect * select)2433 static bool SelectCanUseCMOV(HSelect* select) {
2434   // There are no conditional move instructions for XMMs.
2435   if (DataType::IsFloatingPointType(select->GetType())) {
2436     return false;
2437   }
2438 
2439   // A FP condition doesn't generate the single CC that we need.
2440   HInstruction* condition = select->GetCondition();
2441   if (condition->IsCondition() &&
2442       DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
2443     return false;
2444   }
2445 
2446   // We can generate a CMOV for this Select.
2447   return true;
2448 }
2449 
VisitSelect(HSelect * select)2450 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
2451   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2452   if (DataType::IsFloatingPointType(select->GetType())) {
2453     locations->SetInAt(0, Location::RequiresFpuRegister());
2454     locations->SetInAt(1, Location::Any());
2455   } else {
2456     locations->SetInAt(0, Location::RequiresRegister());
2457     if (SelectCanUseCMOV(select)) {
2458       if (select->InputAt(1)->IsConstant()) {
2459         locations->SetInAt(1, Location::RequiresRegister());
2460       } else {
2461         locations->SetInAt(1, Location::Any());
2462       }
2463     } else {
2464       locations->SetInAt(1, Location::Any());
2465     }
2466   }
2467   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2468     locations->SetInAt(2, Location::RequiresRegister());
2469   }
2470   locations->SetOut(Location::SameAsFirstInput());
2471 }
2472 
VisitSelect(HSelect * select)2473 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
2474   LocationSummary* locations = select->GetLocations();
2475   if (SelectCanUseCMOV(select)) {
2476     // If both the condition and the source types are integer, we can generate
2477     // a CMOV to implement Select.
2478     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
2479     Location value_true_loc = locations->InAt(1);
2480     DCHECK(locations->InAt(0).Equals(locations->Out()));
2481 
2482     HInstruction* select_condition = select->GetCondition();
2483     Condition cond = kNotEqual;
2484 
2485     // Figure out how to test the 'condition'.
2486     if (select_condition->IsCondition()) {
2487       HCondition* condition = select_condition->AsCondition();
2488       if (!condition->IsEmittedAtUseSite()) {
2489         // This was a previously materialized condition.
2490         // Can we use the existing condition code?
2491         if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) {
2492           // Materialization was the previous instruction.  Condition codes are right.
2493           cond = X86_64IntegerCondition(condition->GetCondition());
2494         } else {
2495           // No, we have to recreate the condition code.
2496           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2497           __ testl(cond_reg, cond_reg);
2498         }
2499       } else {
2500         GenerateCompareTest(condition);
2501         cond = X86_64IntegerCondition(condition->GetCondition());
2502       }
2503     } else {
2504       // Must be a Boolean condition, which needs to be compared to 0.
2505       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2506       __ testl(cond_reg, cond_reg);
2507     }
2508 
2509     // If the condition is true, overwrite the output, which already contains false.
2510     // Generate the correct sized CMOV.
2511     bool is_64_bit = DataType::Is64BitType(select->GetType());
2512     if (value_true_loc.IsRegister()) {
2513       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
2514     } else {
2515       __ cmov(cond,
2516               value_false,
2517               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
2518     }
2519   } else {
2520     NearLabel false_target;
2521     GenerateTestAndBranch<NearLabel>(select,
2522                                      /* condition_input_index= */ 2,
2523                                      /* true_target= */ nullptr,
2524                                      &false_target);
2525     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2526     __ Bind(&false_target);
2527   }
2528 }
2529 
VisitNop(HNop * nop)2530 void LocationsBuilderX86_64::VisitNop(HNop* nop) {
2531   new (GetGraph()->GetAllocator()) LocationSummary(nop);
2532 }
2533 
VisitNop(HNop *)2534 void InstructionCodeGeneratorX86_64::VisitNop(HNop*) {
2535   // The environment recording already happened in CodeGenerator::Compile.
2536 }
2537 
IncreaseFrame(size_t adjustment)2538 void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
2539   __ subq(CpuRegister(RSP), Immediate(adjustment));
2540   __ cfi().AdjustCFAOffset(adjustment);
2541 }
2542 
DecreaseFrame(size_t adjustment)2543 void CodeGeneratorX86_64::DecreaseFrame(size_t adjustment) {
2544   __ addq(CpuRegister(RSP), Immediate(adjustment));
2545   __ cfi().AdjustCFAOffset(-adjustment);
2546 }
2547 
GenerateNop()2548 void CodeGeneratorX86_64::GenerateNop() {
2549   __ nop();
2550 }
2551 
HandleCondition(HCondition * cond)2552 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
2553   LocationSummary* locations =
2554       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2555   // Handle the long/FP comparisons made in instruction simplification.
2556   switch (cond->InputAt(0)->GetType()) {
2557     case DataType::Type::kInt64:
2558       locations->SetInAt(0, Location::RequiresRegister());
2559       locations->SetInAt(1, Location::Any());
2560       break;
2561     case DataType::Type::kFloat32:
2562     case DataType::Type::kFloat64:
2563       locations->SetInAt(0, Location::RequiresFpuRegister());
2564       locations->SetInAt(1, Location::Any());
2565       break;
2566     default:
2567       locations->SetInAt(0, Location::RequiresRegister());
2568       locations->SetInAt(1, Location::Any());
2569       break;
2570   }
2571   if (!cond->IsEmittedAtUseSite()) {
2572     locations->SetOut(Location::RequiresRegister());
2573   }
2574 }
2575 
HandleCondition(HCondition * cond)2576 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2577   if (cond->IsEmittedAtUseSite()) {
2578     return;
2579   }
2580 
2581   LocationSummary* locations = cond->GetLocations();
2582   Location lhs = locations->InAt(0);
2583   Location rhs = locations->InAt(1);
2584   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2585   NearLabel true_label, false_label;
2586 
2587   switch (cond->InputAt(0)->GetType()) {
2588     default:
2589       // Integer case.
2590 
2591       // Clear output register: setcc only sets the low byte.
2592       __ xorl(reg, reg);
2593 
2594       codegen_->GenerateIntCompare(lhs, rhs);
2595       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2596       return;
2597     case DataType::Type::kInt64:
2598       // Clear output register: setcc only sets the low byte.
2599       __ xorl(reg, reg);
2600 
2601       codegen_->GenerateLongCompare(lhs, rhs);
2602       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2603       return;
2604     case DataType::Type::kFloat32: {
2605       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2606       if (rhs.IsConstant()) {
2607         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2608         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2609       } else if (rhs.IsStackSlot()) {
2610         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2611       } else {
2612         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2613       }
2614       GenerateFPJumps(cond, &true_label, &false_label);
2615       break;
2616     }
2617     case DataType::Type::kFloat64: {
2618       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2619       if (rhs.IsConstant()) {
2620         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2621         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2622       } else if (rhs.IsDoubleStackSlot()) {
2623         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2624       } else {
2625         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2626       }
2627       GenerateFPJumps(cond, &true_label, &false_label);
2628       break;
2629     }
2630   }
2631 
2632   // Convert the jumps into the result.
2633   NearLabel done_label;
2634 
2635   // False case: result = 0.
2636   __ Bind(&false_label);
2637   __ xorl(reg, reg);
2638   __ jmp(&done_label);
2639 
2640   // True case: result = 1.
2641   __ Bind(&true_label);
2642   __ movl(reg, Immediate(1));
2643   __ Bind(&done_label);
2644 }
2645 
VisitEqual(HEqual * comp)2646 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2647   HandleCondition(comp);
2648 }
2649 
VisitEqual(HEqual * comp)2650 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2651   HandleCondition(comp);
2652 }
2653 
VisitNotEqual(HNotEqual * comp)2654 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2655   HandleCondition(comp);
2656 }
2657 
VisitNotEqual(HNotEqual * comp)2658 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2659   HandleCondition(comp);
2660 }
2661 
VisitLessThan(HLessThan * comp)2662 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2663   HandleCondition(comp);
2664 }
2665 
VisitLessThan(HLessThan * comp)2666 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2667   HandleCondition(comp);
2668 }
2669 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2670 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2671   HandleCondition(comp);
2672 }
2673 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2674 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2675   HandleCondition(comp);
2676 }
2677 
VisitGreaterThan(HGreaterThan * comp)2678 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2679   HandleCondition(comp);
2680 }
2681 
VisitGreaterThan(HGreaterThan * comp)2682 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2683   HandleCondition(comp);
2684 }
2685 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2686 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2687   HandleCondition(comp);
2688 }
2689 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2690 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2691   HandleCondition(comp);
2692 }
2693 
VisitBelow(HBelow * comp)2694 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2695   HandleCondition(comp);
2696 }
2697 
VisitBelow(HBelow * comp)2698 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2699   HandleCondition(comp);
2700 }
2701 
VisitBelowOrEqual(HBelowOrEqual * comp)2702 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2703   HandleCondition(comp);
2704 }
2705 
VisitBelowOrEqual(HBelowOrEqual * comp)2706 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2707   HandleCondition(comp);
2708 }
2709 
VisitAbove(HAbove * comp)2710 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2711   HandleCondition(comp);
2712 }
2713 
VisitAbove(HAbove * comp)2714 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2715   HandleCondition(comp);
2716 }
2717 
VisitAboveOrEqual(HAboveOrEqual * comp)2718 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2719   HandleCondition(comp);
2720 }
2721 
VisitAboveOrEqual(HAboveOrEqual * comp)2722 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2723   HandleCondition(comp);
2724 }
2725 
VisitCompare(HCompare * compare)2726 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2727   LocationSummary* locations =
2728       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2729   switch (compare->GetComparisonType()) {
2730     case DataType::Type::kBool:
2731     case DataType::Type::kUint8:
2732     case DataType::Type::kInt8:
2733     case DataType::Type::kUint16:
2734     case DataType::Type::kInt16:
2735     case DataType::Type::kInt32:
2736     case DataType::Type::kUint32:
2737     case DataType::Type::kInt64:
2738     case DataType::Type::kUint64: {
2739       locations->SetInAt(0, Location::RequiresRegister());
2740       locations->SetInAt(1, Location::Any());
2741       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2742       break;
2743     }
2744     case DataType::Type::kFloat32:
2745     case DataType::Type::kFloat64: {
2746       locations->SetInAt(0, Location::RequiresFpuRegister());
2747       locations->SetInAt(1, Location::Any());
2748       locations->SetOut(Location::RequiresRegister());
2749       break;
2750     }
2751     default:
2752       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2753   }
2754 }
2755 
VisitCompare(HCompare * compare)2756 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2757   LocationSummary* locations = compare->GetLocations();
2758   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2759   Location left = locations->InAt(0);
2760   Location right = locations->InAt(1);
2761 
2762   NearLabel less, greater, done;
2763   DataType::Type type = compare->GetComparisonType();
2764   Condition less_cond = kLess;
2765 
2766   switch (type) {
2767     case DataType::Type::kUint32:
2768       less_cond = kBelow;
2769       FALLTHROUGH_INTENDED;
2770     case DataType::Type::kBool:
2771     case DataType::Type::kUint8:
2772     case DataType::Type::kInt8:
2773     case DataType::Type::kUint16:
2774     case DataType::Type::kInt16:
2775     case DataType::Type::kInt32: {
2776       codegen_->GenerateIntCompare(left, right);
2777       break;
2778     }
2779     case DataType::Type::kUint64:
2780       less_cond = kBelow;
2781       FALLTHROUGH_INTENDED;
2782     case DataType::Type::kInt64: {
2783       codegen_->GenerateLongCompare(left, right);
2784       break;
2785     }
2786     case DataType::Type::kFloat32: {
2787       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2788       if (right.IsConstant()) {
2789         float value = right.GetConstant()->AsFloatConstant()->GetValue();
2790         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2791       } else if (right.IsStackSlot()) {
2792         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2793       } else {
2794         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2795       }
2796       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2797       less_cond = kBelow;  //  ucomis{s,d} sets CF
2798       break;
2799     }
2800     case DataType::Type::kFloat64: {
2801       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2802       if (right.IsConstant()) {
2803         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2804         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2805       } else if (right.IsDoubleStackSlot()) {
2806         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2807       } else {
2808         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2809       }
2810       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2811       less_cond = kBelow;  //  ucomis{s,d} sets CF
2812       break;
2813     }
2814     default:
2815       LOG(FATAL) << "Unexpected compare type " << type;
2816   }
2817 
2818   __ movl(out, Immediate(0));
2819   __ j(kEqual, &done);
2820   __ j(less_cond, &less);
2821 
2822   __ Bind(&greater);
2823   __ movl(out, Immediate(1));
2824   __ jmp(&done);
2825 
2826   __ Bind(&less);
2827   __ movl(out, Immediate(-1));
2828 
2829   __ Bind(&done);
2830 }
2831 
VisitIntConstant(HIntConstant * constant)2832 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2833   LocationSummary* locations =
2834       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2835   locations->SetOut(Location::ConstantLocation(constant));
2836 }
2837 
VisitIntConstant(HIntConstant * constant)2838 void InstructionCodeGeneratorX86_64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
2839   // Will be generated at use site.
2840 }
2841 
VisitNullConstant(HNullConstant * constant)2842 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2843   LocationSummary* locations =
2844       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2845   locations->SetOut(Location::ConstantLocation(constant));
2846 }
2847 
VisitNullConstant(HNullConstant * constant)2848 void InstructionCodeGeneratorX86_64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
2849   // Will be generated at use site.
2850 }
2851 
VisitLongConstant(HLongConstant * constant)2852 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2853   LocationSummary* locations =
2854       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2855   locations->SetOut(Location::ConstantLocation(constant));
2856 }
2857 
VisitLongConstant(HLongConstant * constant)2858 void InstructionCodeGeneratorX86_64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
2859   // Will be generated at use site.
2860 }
2861 
VisitFloatConstant(HFloatConstant * constant)2862 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2863   LocationSummary* locations =
2864       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2865   locations->SetOut(Location::ConstantLocation(constant));
2866 }
2867 
VisitFloatConstant(HFloatConstant * constant)2868 void InstructionCodeGeneratorX86_64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
2869   // Will be generated at use site.
2870 }
2871 
VisitDoubleConstant(HDoubleConstant * constant)2872 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2873   LocationSummary* locations =
2874       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2875   locations->SetOut(Location::ConstantLocation(constant));
2876 }
2877 
VisitDoubleConstant(HDoubleConstant * constant)2878 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2879     [[maybe_unused]] HDoubleConstant* constant) {
2880   // Will be generated at use site.
2881 }
2882 
VisitConstructorFence(HConstructorFence * constructor_fence)2883 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2884   constructor_fence->SetLocations(nullptr);
2885 }
2886 
VisitConstructorFence(HConstructorFence * constructor_fence)2887 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2888     [[maybe_unused]] HConstructorFence* constructor_fence) {
2889   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2890 }
2891 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2892 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2893   memory_barrier->SetLocations(nullptr);
2894 }
2895 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2896 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2897   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2898 }
2899 
VisitReturnVoid(HReturnVoid * ret)2900 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2901   ret->SetLocations(nullptr);
2902 }
2903 
VisitReturnVoid(HReturnVoid * ret)2904 void InstructionCodeGeneratorX86_64::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
2905   codegen_->GenerateFrameExit();
2906 }
2907 
VisitReturn(HReturn * ret)2908 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2909   LocationSummary* locations =
2910       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2911   SetInForReturnValue(ret, locations);
2912 }
2913 
VisitReturn(HReturn * ret)2914 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2915   switch (ret->InputAt(0)->GetType()) {
2916     case DataType::Type::kReference:
2917     case DataType::Type::kBool:
2918     case DataType::Type::kUint8:
2919     case DataType::Type::kInt8:
2920     case DataType::Type::kUint16:
2921     case DataType::Type::kInt16:
2922     case DataType::Type::kInt32:
2923     case DataType::Type::kInt64:
2924       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2925       break;
2926 
2927     case DataType::Type::kFloat32: {
2928       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2929                 XMM0);
2930       // To simplify callers of an OSR method, we put the return value in both
2931       // floating point and core register.
2932       if (GetGraph()->IsCompilingOsr()) {
2933         __ movd(CpuRegister(RAX), XmmRegister(XMM0));
2934       }
2935       break;
2936     }
2937     case DataType::Type::kFloat64: {
2938       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2939                 XMM0);
2940       // To simplify callers of an OSR method, we put the return value in both
2941       // floating point and core register.
2942       if (GetGraph()->IsCompilingOsr()) {
2943         __ movq(CpuRegister(RAX), XmmRegister(XMM0));
2944       }
2945       break;
2946     }
2947 
2948     default:
2949       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2950   }
2951   codegen_->GenerateFrameExit();
2952 }
2953 
GetReturnLocation(DataType::Type type) const2954 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2955   switch (type) {
2956     case DataType::Type::kReference:
2957     case DataType::Type::kBool:
2958     case DataType::Type::kUint8:
2959     case DataType::Type::kInt8:
2960     case DataType::Type::kUint16:
2961     case DataType::Type::kInt16:
2962     case DataType::Type::kUint32:
2963     case DataType::Type::kInt32:
2964     case DataType::Type::kUint64:
2965     case DataType::Type::kInt64:
2966       return Location::RegisterLocation(RAX);
2967 
2968     case DataType::Type::kVoid:
2969       return Location::NoLocation();
2970 
2971     case DataType::Type::kFloat64:
2972     case DataType::Type::kFloat32:
2973       return Location::FpuRegisterLocation(XMM0);
2974   }
2975 }
2976 
GetMethodLocation() const2977 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2978   return Location::RegisterLocation(kMethodRegisterArgument);
2979 }
2980 
GetNextLocation(DataType::Type type)2981 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2982   switch (type) {
2983     case DataType::Type::kReference:
2984     case DataType::Type::kBool:
2985     case DataType::Type::kUint8:
2986     case DataType::Type::kInt8:
2987     case DataType::Type::kUint16:
2988     case DataType::Type::kInt16:
2989     case DataType::Type::kInt32: {
2990       uint32_t index = gp_index_++;
2991       stack_index_++;
2992       if (index < calling_convention.GetNumberOfRegisters()) {
2993         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2994       } else {
2995         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2996       }
2997     }
2998 
2999     case DataType::Type::kInt64: {
3000       uint32_t index = gp_index_;
3001       stack_index_ += 2;
3002       if (index < calling_convention.GetNumberOfRegisters()) {
3003         gp_index_ += 1;
3004         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
3005       } else {
3006         gp_index_ += 2;
3007         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
3008       }
3009     }
3010 
3011     case DataType::Type::kFloat32: {
3012       uint32_t index = float_index_++;
3013       stack_index_++;
3014       if (index < calling_convention.GetNumberOfFpuRegisters()) {
3015         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
3016       } else {
3017         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
3018       }
3019     }
3020 
3021     case DataType::Type::kFloat64: {
3022       uint32_t index = float_index_++;
3023       stack_index_ += 2;
3024       if (index < calling_convention.GetNumberOfFpuRegisters()) {
3025         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
3026       } else {
3027         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
3028       }
3029     }
3030 
3031     case DataType::Type::kUint32:
3032     case DataType::Type::kUint64:
3033     case DataType::Type::kVoid:
3034       LOG(FATAL) << "Unexpected parameter type " << type;
3035       UNREACHABLE();
3036   }
3037   return Location::NoLocation();
3038 }
3039 
GetNextLocation(DataType::Type type)3040 Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
3041   DCHECK_NE(type, DataType::Type::kReference);
3042 
3043   Location location = Location::NoLocation();
3044   if (DataType::IsFloatingPointType(type)) {
3045     if (fpr_index_ < kParameterFloatRegistersLength) {
3046       location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
3047       ++fpr_index_;
3048     }
3049   } else {
3050     // Native ABI uses the same registers as managed, except that the method register RDI
3051     // is a normal argument.
3052     if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
3053       location = Location::RegisterLocation(
3054           gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
3055       ++gpr_index_;
3056     }
3057   }
3058   if (location.IsInvalid()) {
3059     if (DataType::Is64BitType(type)) {
3060       location = Location::DoubleStackSlot(stack_offset_);
3061     } else {
3062       location = Location::StackSlot(stack_offset_);
3063     }
3064     stack_offset_ += kFramePointerSize;
3065 
3066     if (for_register_allocation_) {
3067       location = Location::Any();
3068     }
3069   }
3070   return location;
3071 }
3072 
GetReturnLocation(DataType::Type type) const3073 Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
3074     const {
3075   // We perform conversion to the managed ABI return register after the call if needed.
3076   InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
3077   return dex_calling_convention.GetReturnLocation(type);
3078 }
3079 
GetMethodLocation() const3080 Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
3081   // Pass the method in the hidden argument RAX.
3082   return Location::RegisterLocation(RAX);
3083 }
3084 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3085 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3086   // The trampoline uses the same calling convention as dex calling conventions,
3087   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3088   // the method_idx.
3089   HandleInvoke(invoke);
3090 }
3091 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3092 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3093   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3094 }
3095 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3096 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3097   // Explicit clinit checks triggered by static invokes must have been pruned by
3098   // art::PrepareForRegisterAllocation.
3099   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3100 
3101   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3102   if (intrinsic.TryDispatch(invoke)) {
3103     return;
3104   }
3105 
3106   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3107     CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
3108         /*for_register_allocation=*/ true);
3109     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3110     CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
3111   } else {
3112     HandleInvoke(invoke);
3113   }
3114 }
3115 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3116 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3117   if (invoke->GetLocations()->Intrinsified()) {
3118     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
3119     intrinsic.Dispatch(invoke);
3120     return true;
3121   }
3122   return false;
3123 }
3124 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3125 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3126   // Explicit clinit checks triggered by static invokes must have been pruned by
3127   // art::PrepareForRegisterAllocation.
3128   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3129 
3130   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3131     return;
3132   }
3133 
3134   LocationSummary* locations = invoke->GetLocations();
3135   codegen_->GenerateStaticOrDirectCall(
3136       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3137 }
3138 
HandleInvoke(HInvoke * invoke)3139 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
3140   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
3141   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3142 }
3143 
VisitInvokeVirtual(HInvokeVirtual * invoke)3144 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3145   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3146   if (intrinsic.TryDispatch(invoke)) {
3147     return;
3148   }
3149 
3150   HandleInvoke(invoke);
3151 }
3152 
VisitInvokeVirtual(HInvokeVirtual * invoke)3153 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3154   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3155     return;
3156   }
3157 
3158   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3159   DCHECK(!codegen_->IsLeafMethod());
3160 }
3161 
VisitInvokeInterface(HInvokeInterface * invoke)3162 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3163   HandleInvoke(invoke);
3164   // Add the hidden argument.
3165   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3166     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
3167                                     Location::RegisterLocation(RAX));
3168   }
3169   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
3170 }
3171 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)3172 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3173                                                         CpuRegister klass) {
3174   DCHECK_EQ(RDI, klass.AsRegister());
3175   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
3176     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3177     DCHECK(info != nullptr);
3178     InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
3179         info, GetCompilerOptions(), instruction->AsInvoke());
3180     if (cache != nullptr) {
3181       uint64_t address = reinterpret_cast64<uint64_t>(cache);
3182       NearLabel done;
3183       __ movq(CpuRegister(TMP), Immediate(address));
3184       // Fast path for a monomorphic cache.
3185       __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
3186       __ j(kEqual, &done);
3187       GenerateInvokeRuntime(
3188           GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
3189       __ Bind(&done);
3190     } else {
3191       // This is unexpected, but we don't guarantee stable compilation across
3192       // JIT runs so just warn about it.
3193       ScopedObjectAccess soa(Thread::Current());
3194       LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
3195     }
3196   }
3197 }
3198 
VisitInvokeInterface(HInvokeInterface * invoke)3199 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3200   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3201   LocationSummary* locations = invoke->GetLocations();
3202   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3203   Location receiver = locations->InAt(0);
3204   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
3205 
3206   if (receiver.IsStackSlot()) {
3207     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
3208     // /* HeapReference<Class> */ temp = temp->klass_
3209     __ movl(temp, Address(temp, class_offset));
3210   } else {
3211     // /* HeapReference<Class> */ temp = receiver->klass_
3212     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
3213   }
3214   codegen_->MaybeRecordImplicitNullCheck(invoke);
3215   // Instead of simply (possibly) unpoisoning `temp` here, we should
3216   // emit a read barrier for the previous class reference load.
3217   // However this is not required in practice, as this is an
3218   // intermediate/temporary reference and because the current
3219   // concurrent copying collector keeps the from-space memory
3220   // intact/accessible until the end of the marking phase (the
3221   // concurrent copying collector may not in the future).
3222   __ MaybeUnpoisonHeapReference(temp);
3223 
3224   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3225 
3226   if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
3227       invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
3228     Location hidden_reg = locations->GetTemp(1);
3229     // Set the hidden argument. This is safe to do this here, as RAX
3230     // won't be modified thereafter, before the `call` instruction.
3231     // We also do it after MaybeGenerateInlineCache that may use RAX.
3232     DCHECK_EQ(RAX, hidden_reg.AsRegister<Register>());
3233     codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3234   }
3235 
3236   // temp = temp->GetAddressOfIMT()
3237   __ movq(temp,
3238       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
3239   // temp = temp->GetImtEntryAt(method_offset);
3240   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3241       invoke->GetImtIndex(), kX86_64PointerSize));
3242   // temp = temp->GetImtEntryAt(method_offset);
3243   __ movq(temp, Address(temp, method_offset));
3244   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3245     // We pass the method from the IMT in case of a conflict. This will ensure
3246     // we go into the runtime to resolve the actual method.
3247     Location hidden_reg = locations->GetTemp(1);
3248     __ movq(hidden_reg.AsRegister<CpuRegister>(), temp);
3249   }
3250   // call temp->GetEntryPoint();
3251   __ call(Address(
3252       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
3253 
3254   DCHECK(!codegen_->IsLeafMethod());
3255   codegen_->RecordPcInfo(invoke);
3256 }
3257 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3258 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3259   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3260   if (intrinsic.TryDispatch(invoke)) {
3261     return;
3262   }
3263   HandleInvoke(invoke);
3264 }
3265 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3266 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3267   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3268     return;
3269   }
3270   codegen_->GenerateInvokePolymorphicCall(invoke);
3271 }
3272 
VisitInvokeCustom(HInvokeCustom * invoke)3273 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3274   HandleInvoke(invoke);
3275 }
3276 
VisitInvokeCustom(HInvokeCustom * invoke)3277 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3278   codegen_->GenerateInvokeCustomCall(invoke);
3279 }
3280 
VisitNeg(HNeg * neg)3281 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
3282   LocationSummary* locations =
3283       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3284   switch (neg->GetResultType()) {
3285     case DataType::Type::kInt32:
3286     case DataType::Type::kInt64:
3287       locations->SetInAt(0, Location::RequiresRegister());
3288       locations->SetOut(Location::SameAsFirstInput());
3289       break;
3290 
3291     case DataType::Type::kFloat32:
3292     case DataType::Type::kFloat64:
3293       locations->SetInAt(0, Location::RequiresFpuRegister());
3294       locations->SetOut(Location::SameAsFirstInput());
3295       locations->AddTemp(Location::RequiresFpuRegister());
3296       break;
3297 
3298     default:
3299       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3300   }
3301 }
3302 
VisitNeg(HNeg * neg)3303 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
3304   LocationSummary* locations = neg->GetLocations();
3305   Location out = locations->Out();
3306   Location in = locations->InAt(0);
3307   switch (neg->GetResultType()) {
3308     case DataType::Type::kInt32:
3309       DCHECK(in.IsRegister());
3310       DCHECK(in.Equals(out));
3311       __ negl(out.AsRegister<CpuRegister>());
3312       break;
3313 
3314     case DataType::Type::kInt64:
3315       DCHECK(in.IsRegister());
3316       DCHECK(in.Equals(out));
3317       __ negq(out.AsRegister<CpuRegister>());
3318       break;
3319 
3320     case DataType::Type::kFloat32: {
3321       DCHECK(in.Equals(out));
3322       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3323       // Implement float negation with an exclusive or with value
3324       // 0x80000000 (mask for bit 31, representing the sign of a
3325       // single-precision floating-point number).
3326       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
3327       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3328       break;
3329     }
3330 
3331     case DataType::Type::kFloat64: {
3332       DCHECK(in.Equals(out));
3333       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3334       // Implement double negation with an exclusive or with value
3335       // 0x8000000000000000 (mask for bit 63, representing the sign of
3336       // a double-precision floating-point number).
3337       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
3338       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3339       break;
3340     }
3341 
3342     default:
3343       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3344   }
3345 }
3346 
VisitTypeConversion(HTypeConversion * conversion)3347 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3348   LocationSummary* locations =
3349       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
3350   DataType::Type result_type = conversion->GetResultType();
3351   DataType::Type input_type = conversion->GetInputType();
3352   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3353       << input_type << " -> " << result_type;
3354 
3355   switch (result_type) {
3356     case DataType::Type::kUint8:
3357     case DataType::Type::kInt8:
3358     case DataType::Type::kUint16:
3359     case DataType::Type::kInt16:
3360       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3361       locations->SetInAt(0, Location::Any());
3362       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3363       break;
3364 
3365     case DataType::Type::kInt32:
3366       switch (input_type) {
3367         case DataType::Type::kInt64:
3368           locations->SetInAt(0, Location::Any());
3369           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3370           break;
3371 
3372         case DataType::Type::kFloat32:
3373           locations->SetInAt(0, Location::RequiresFpuRegister());
3374           locations->SetOut(Location::RequiresRegister());
3375           break;
3376 
3377         case DataType::Type::kFloat64:
3378           locations->SetInAt(0, Location::RequiresFpuRegister());
3379           locations->SetOut(Location::RequiresRegister());
3380           break;
3381 
3382         default:
3383           LOG(FATAL) << "Unexpected type conversion from " << input_type
3384                      << " to " << result_type;
3385       }
3386       break;
3387 
3388     case DataType::Type::kInt64:
3389       switch (input_type) {
3390         case DataType::Type::kBool:
3391         case DataType::Type::kUint8:
3392         case DataType::Type::kInt8:
3393         case DataType::Type::kUint16:
3394         case DataType::Type::kInt16:
3395         case DataType::Type::kInt32:
3396           // TODO: We would benefit from a (to-be-implemented)
3397           // Location::RegisterOrStackSlot requirement for this input.
3398           locations->SetInAt(0, Location::RequiresRegister());
3399           locations->SetOut(Location::RequiresRegister());
3400           break;
3401 
3402         case DataType::Type::kFloat32:
3403           locations->SetInAt(0, Location::RequiresFpuRegister());
3404           locations->SetOut(Location::RequiresRegister());
3405           break;
3406 
3407         case DataType::Type::kFloat64:
3408           locations->SetInAt(0, Location::RequiresFpuRegister());
3409           locations->SetOut(Location::RequiresRegister());
3410           break;
3411 
3412         default:
3413           LOG(FATAL) << "Unexpected type conversion from " << input_type
3414                      << " to " << result_type;
3415       }
3416       break;
3417 
3418     case DataType::Type::kFloat32:
3419       switch (input_type) {
3420         case DataType::Type::kBool:
3421         case DataType::Type::kUint8:
3422         case DataType::Type::kInt8:
3423         case DataType::Type::kUint16:
3424         case DataType::Type::kInt16:
3425         case DataType::Type::kInt32:
3426           locations->SetInAt(0, Location::Any());
3427           locations->SetOut(Location::RequiresFpuRegister());
3428           break;
3429 
3430         case DataType::Type::kInt64:
3431           locations->SetInAt(0, Location::Any());
3432           locations->SetOut(Location::RequiresFpuRegister());
3433           break;
3434 
3435         case DataType::Type::kFloat64:
3436           locations->SetInAt(0, Location::Any());
3437           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3438           break;
3439 
3440         default:
3441           LOG(FATAL) << "Unexpected type conversion from " << input_type
3442                      << " to " << result_type;
3443       }
3444       break;
3445 
3446     case DataType::Type::kFloat64:
3447       switch (input_type) {
3448         case DataType::Type::kBool:
3449         case DataType::Type::kUint8:
3450         case DataType::Type::kInt8:
3451         case DataType::Type::kUint16:
3452         case DataType::Type::kInt16:
3453         case DataType::Type::kInt32:
3454           locations->SetInAt(0, Location::Any());
3455           locations->SetOut(Location::RequiresFpuRegister());
3456           break;
3457 
3458         case DataType::Type::kInt64:
3459           locations->SetInAt(0, Location::Any());
3460           locations->SetOut(Location::RequiresFpuRegister());
3461           break;
3462 
3463         case DataType::Type::kFloat32:
3464           locations->SetInAt(0, Location::Any());
3465           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3466           break;
3467 
3468         default:
3469           LOG(FATAL) << "Unexpected type conversion from " << input_type
3470                      << " to " << result_type;
3471       }
3472       break;
3473 
3474     default:
3475       LOG(FATAL) << "Unexpected type conversion from " << input_type
3476                  << " to " << result_type;
3477   }
3478 }
3479 
VisitTypeConversion(HTypeConversion * conversion)3480 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3481   LocationSummary* locations = conversion->GetLocations();
3482   Location out = locations->Out();
3483   Location in = locations->InAt(0);
3484   DataType::Type result_type = conversion->GetResultType();
3485   DataType::Type input_type = conversion->GetInputType();
3486   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3487       << input_type << " -> " << result_type;
3488   switch (result_type) {
3489     case DataType::Type::kUint8:
3490       switch (input_type) {
3491         case DataType::Type::kInt8:
3492         case DataType::Type::kUint16:
3493         case DataType::Type::kInt16:
3494         case DataType::Type::kInt32:
3495         case DataType::Type::kInt64:
3496           if (in.IsRegister()) {
3497             __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3498           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3499             __ movzxb(out.AsRegister<CpuRegister>(),
3500                       Address(CpuRegister(RSP), in.GetStackIndex()));
3501           } else {
3502             __ movl(out.AsRegister<CpuRegister>(),
3503                     Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
3504           }
3505           break;
3506 
3507         default:
3508           LOG(FATAL) << "Unexpected type conversion from " << input_type
3509                      << " to " << result_type;
3510       }
3511       break;
3512 
3513     case DataType::Type::kInt8:
3514       switch (input_type) {
3515         case DataType::Type::kUint8:
3516         case DataType::Type::kUint16:
3517         case DataType::Type::kInt16:
3518         case DataType::Type::kInt32:
3519         case DataType::Type::kInt64:
3520           if (in.IsRegister()) {
3521             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3522           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3523             __ movsxb(out.AsRegister<CpuRegister>(),
3524                       Address(CpuRegister(RSP), in.GetStackIndex()));
3525           } else {
3526             __ movl(out.AsRegister<CpuRegister>(),
3527                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
3528           }
3529           break;
3530 
3531         default:
3532           LOG(FATAL) << "Unexpected type conversion from " << input_type
3533                      << " to " << result_type;
3534       }
3535       break;
3536 
3537     case DataType::Type::kUint16:
3538       switch (input_type) {
3539         case DataType::Type::kInt8:
3540         case DataType::Type::kInt16:
3541         case DataType::Type::kInt32:
3542         case DataType::Type::kInt64:
3543           if (in.IsRegister()) {
3544             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3545           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3546             __ movzxw(out.AsRegister<CpuRegister>(),
3547                       Address(CpuRegister(RSP), in.GetStackIndex()));
3548           } else {
3549             __ movl(out.AsRegister<CpuRegister>(),
3550                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
3551           }
3552           break;
3553 
3554         default:
3555           LOG(FATAL) << "Unexpected type conversion from " << input_type
3556                      << " to " << result_type;
3557       }
3558       break;
3559 
3560     case DataType::Type::kInt16:
3561       switch (input_type) {
3562         case DataType::Type::kUint16:
3563         case DataType::Type::kInt32:
3564         case DataType::Type::kInt64:
3565           if (in.IsRegister()) {
3566             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3567           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3568             __ movsxw(out.AsRegister<CpuRegister>(),
3569                       Address(CpuRegister(RSP), in.GetStackIndex()));
3570           } else {
3571             __ movl(out.AsRegister<CpuRegister>(),
3572                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
3573           }
3574           break;
3575 
3576         default:
3577           LOG(FATAL) << "Unexpected type conversion from " << input_type
3578                      << " to " << result_type;
3579       }
3580       break;
3581 
3582     case DataType::Type::kInt32:
3583       switch (input_type) {
3584         case DataType::Type::kInt64:
3585           if (in.IsRegister()) {
3586             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3587           } else if (in.IsDoubleStackSlot()) {
3588             __ movl(out.AsRegister<CpuRegister>(),
3589                     Address(CpuRegister(RSP), in.GetStackIndex()));
3590           } else {
3591             DCHECK(in.IsConstant());
3592             DCHECK(in.GetConstant()->IsLongConstant());
3593             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3594             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3595           }
3596           break;
3597 
3598         case DataType::Type::kFloat32: {
3599           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3600           CpuRegister output = out.AsRegister<CpuRegister>();
3601           NearLabel done, nan;
3602 
3603           __ movl(output, Immediate(kPrimIntMax));
3604           // if input >= (float)INT_MAX goto done
3605           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
3606           __ j(kAboveEqual, &done);
3607           // if input == NaN goto nan
3608           __ j(kUnordered, &nan);
3609           // output = float-to-int-truncate(input)
3610           __ cvttss2si(output, input, false);
3611           __ jmp(&done);
3612           __ Bind(&nan);
3613           //  output = 0
3614           __ xorl(output, output);
3615           __ Bind(&done);
3616           break;
3617         }
3618 
3619         case DataType::Type::kFloat64: {
3620           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3621           CpuRegister output = out.AsRegister<CpuRegister>();
3622           NearLabel done, nan;
3623 
3624           __ movl(output, Immediate(kPrimIntMax));
3625           // if input >= (double)INT_MAX goto done
3626           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3627           __ j(kAboveEqual, &done);
3628           // if input == NaN goto nan
3629           __ j(kUnordered, &nan);
3630           // output = double-to-int-truncate(input)
3631           __ cvttsd2si(output, input);
3632           __ jmp(&done);
3633           __ Bind(&nan);
3634           //  output = 0
3635           __ xorl(output, output);
3636           __ Bind(&done);
3637           break;
3638         }
3639 
3640         default:
3641           LOG(FATAL) << "Unexpected type conversion from " << input_type
3642                      << " to " << result_type;
3643       }
3644       break;
3645 
3646     case DataType::Type::kInt64:
3647       switch (input_type) {
3648         DCHECK(out.IsRegister());
3649         case DataType::Type::kBool:
3650         case DataType::Type::kUint8:
3651         case DataType::Type::kInt8:
3652         case DataType::Type::kUint16:
3653         case DataType::Type::kInt16:
3654         case DataType::Type::kInt32:
3655           DCHECK(in.IsRegister());
3656           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3657           break;
3658 
3659         case DataType::Type::kFloat32: {
3660           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3661           CpuRegister output = out.AsRegister<CpuRegister>();
3662           NearLabel done, nan;
3663 
3664           codegen_->Load64BitValue(output, kPrimLongMax);
3665           // if input >= (float)LONG_MAX goto done
3666           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3667           __ j(kAboveEqual, &done);
3668           // if input == NaN goto nan
3669           __ j(kUnordered, &nan);
3670           // output = float-to-long-truncate(input)
3671           __ cvttss2si(output, input, true);
3672           __ jmp(&done);
3673           __ Bind(&nan);
3674           //  output = 0
3675           __ xorl(output, output);
3676           __ Bind(&done);
3677           break;
3678         }
3679 
3680         case DataType::Type::kFloat64: {
3681           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3682           CpuRegister output = out.AsRegister<CpuRegister>();
3683           NearLabel done, nan;
3684 
3685           codegen_->Load64BitValue(output, kPrimLongMax);
3686           // if input >= (double)LONG_MAX goto done
3687           __ comisd(input, codegen_->LiteralDoubleAddress(
3688                 static_cast<double>(kPrimLongMax)));
3689           __ j(kAboveEqual, &done);
3690           // if input == NaN goto nan
3691           __ j(kUnordered, &nan);
3692           // output = double-to-long-truncate(input)
3693           __ cvttsd2si(output, input, true);
3694           __ jmp(&done);
3695           __ Bind(&nan);
3696           //  output = 0
3697           __ xorl(output, output);
3698           __ Bind(&done);
3699           break;
3700         }
3701 
3702         default:
3703           LOG(FATAL) << "Unexpected type conversion from " << input_type
3704                      << " to " << result_type;
3705       }
3706       break;
3707 
3708     case DataType::Type::kFloat32:
3709       switch (input_type) {
3710         case DataType::Type::kBool:
3711         case DataType::Type::kUint8:
3712         case DataType::Type::kInt8:
3713         case DataType::Type::kUint16:
3714         case DataType::Type::kInt16:
3715         case DataType::Type::kInt32:
3716           if (in.IsRegister()) {
3717             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3718           } else if (in.IsConstant()) {
3719             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3720             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3721             codegen_->Load32BitValue(dest, static_cast<float>(v));
3722           } else {
3723             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3724                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3725           }
3726           break;
3727 
3728         case DataType::Type::kInt64:
3729           if (in.IsRegister()) {
3730             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3731           } else if (in.IsConstant()) {
3732             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3733             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3734             codegen_->Load32BitValue(dest, static_cast<float>(v));
3735           } else {
3736             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3737                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3738           }
3739           break;
3740 
3741         case DataType::Type::kFloat64:
3742           if (in.IsFpuRegister()) {
3743             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3744           } else if (in.IsConstant()) {
3745             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3746             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3747             codegen_->Load32BitValue(dest, static_cast<float>(v));
3748           } else {
3749             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3750                         Address(CpuRegister(RSP), in.GetStackIndex()));
3751           }
3752           break;
3753 
3754         default:
3755           LOG(FATAL) << "Unexpected type conversion from " << input_type
3756                      << " to " << result_type;
3757       }
3758       break;
3759 
3760     case DataType::Type::kFloat64:
3761       switch (input_type) {
3762         case DataType::Type::kBool:
3763         case DataType::Type::kUint8:
3764         case DataType::Type::kInt8:
3765         case DataType::Type::kUint16:
3766         case DataType::Type::kInt16:
3767         case DataType::Type::kInt32:
3768           if (in.IsRegister()) {
3769             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3770           } else if (in.IsConstant()) {
3771             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3772             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3773             codegen_->Load64BitValue(dest, static_cast<double>(v));
3774           } else {
3775             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3776                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3777           }
3778           break;
3779 
3780         case DataType::Type::kInt64:
3781           if (in.IsRegister()) {
3782             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3783           } else if (in.IsConstant()) {
3784             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3785             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3786             codegen_->Load64BitValue(dest, static_cast<double>(v));
3787           } else {
3788             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3789                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3790           }
3791           break;
3792 
3793         case DataType::Type::kFloat32:
3794           if (in.IsFpuRegister()) {
3795             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3796           } else if (in.IsConstant()) {
3797             float v = in.GetConstant()->AsFloatConstant()->GetValue();
3798             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3799             codegen_->Load64BitValue(dest, static_cast<double>(v));
3800           } else {
3801             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3802                         Address(CpuRegister(RSP), in.GetStackIndex()));
3803           }
3804           break;
3805 
3806         default:
3807           LOG(FATAL) << "Unexpected type conversion from " << input_type
3808                      << " to " << result_type;
3809       }
3810       break;
3811 
3812     default:
3813       LOG(FATAL) << "Unexpected type conversion from " << input_type
3814                  << " to " << result_type;
3815   }
3816 }
3817 
VisitAdd(HAdd * add)3818 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3819   LocationSummary* locations =
3820       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3821   switch (add->GetResultType()) {
3822     case DataType::Type::kInt32: {
3823       locations->SetInAt(0, Location::RequiresRegister());
3824       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3825       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3826       break;
3827     }
3828 
3829     case DataType::Type::kInt64: {
3830       locations->SetInAt(0, Location::RequiresRegister());
3831       // We can use a leaq or addq if the constant can fit in an immediate.
3832       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3833       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3834       break;
3835     }
3836 
3837     case DataType::Type::kFloat64:
3838     case DataType::Type::kFloat32: {
3839       locations->SetInAt(0, Location::RequiresFpuRegister());
3840       locations->SetInAt(1, Location::Any());
3841       locations->SetOut(Location::SameAsFirstInput());
3842       break;
3843     }
3844 
3845     default:
3846       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3847   }
3848 }
3849 
VisitAdd(HAdd * add)3850 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3851   LocationSummary* locations = add->GetLocations();
3852   Location first = locations->InAt(0);
3853   Location second = locations->InAt(1);
3854   Location out = locations->Out();
3855 
3856   switch (add->GetResultType()) {
3857     case DataType::Type::kInt32: {
3858       if (second.IsRegister()) {
3859         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3860           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3861         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3862           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3863         } else {
3864           __ leal(out.AsRegister<CpuRegister>(), Address(
3865               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3866         }
3867       } else if (second.IsConstant()) {
3868         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3869           __ addl(out.AsRegister<CpuRegister>(),
3870                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3871         } else {
3872           __ leal(out.AsRegister<CpuRegister>(), Address(
3873               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3874         }
3875       } else {
3876         DCHECK(first.Equals(locations->Out()));
3877         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3878       }
3879       break;
3880     }
3881 
3882     case DataType::Type::kInt64: {
3883       if (second.IsRegister()) {
3884         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3885           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3886         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3887           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3888         } else {
3889           __ leaq(out.AsRegister<CpuRegister>(), Address(
3890               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3891         }
3892       } else {
3893         DCHECK(second.IsConstant());
3894         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3895         int32_t int32_value = Low32Bits(value);
3896         DCHECK_EQ(int32_value, value);
3897         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3898           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3899         } else {
3900           __ leaq(out.AsRegister<CpuRegister>(), Address(
3901               first.AsRegister<CpuRegister>(), int32_value));
3902         }
3903       }
3904       break;
3905     }
3906 
3907     case DataType::Type::kFloat32: {
3908       if (second.IsFpuRegister()) {
3909         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3910       } else if (second.IsConstant()) {
3911         __ addss(first.AsFpuRegister<XmmRegister>(),
3912                  codegen_->LiteralFloatAddress(
3913                      second.GetConstant()->AsFloatConstant()->GetValue()));
3914       } else {
3915         DCHECK(second.IsStackSlot());
3916         __ addss(first.AsFpuRegister<XmmRegister>(),
3917                  Address(CpuRegister(RSP), second.GetStackIndex()));
3918       }
3919       break;
3920     }
3921 
3922     case DataType::Type::kFloat64: {
3923       if (second.IsFpuRegister()) {
3924         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3925       } else if (second.IsConstant()) {
3926         __ addsd(first.AsFpuRegister<XmmRegister>(),
3927                  codegen_->LiteralDoubleAddress(
3928                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3929       } else {
3930         DCHECK(second.IsDoubleStackSlot());
3931         __ addsd(first.AsFpuRegister<XmmRegister>(),
3932                  Address(CpuRegister(RSP), second.GetStackIndex()));
3933       }
3934       break;
3935     }
3936 
3937     default:
3938       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3939   }
3940 }
3941 
VisitSub(HSub * sub)3942 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3943   LocationSummary* locations =
3944       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3945   switch (sub->GetResultType()) {
3946     case DataType::Type::kInt32: {
3947       locations->SetInAt(0, Location::RequiresRegister());
3948       locations->SetInAt(1, Location::Any());
3949       locations->SetOut(Location::SameAsFirstInput());
3950       break;
3951     }
3952     case DataType::Type::kInt64: {
3953       locations->SetInAt(0, Location::RequiresRegister());
3954       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3955       locations->SetOut(Location::SameAsFirstInput());
3956       break;
3957     }
3958     case DataType::Type::kFloat32:
3959     case DataType::Type::kFloat64: {
3960       locations->SetInAt(0, Location::RequiresFpuRegister());
3961       locations->SetInAt(1, Location::Any());
3962       locations->SetOut(Location::SameAsFirstInput());
3963       break;
3964     }
3965     default:
3966       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3967   }
3968 }
3969 
VisitSub(HSub * sub)3970 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3971   LocationSummary* locations = sub->GetLocations();
3972   Location first = locations->InAt(0);
3973   Location second = locations->InAt(1);
3974   DCHECK(first.Equals(locations->Out()));
3975   switch (sub->GetResultType()) {
3976     case DataType::Type::kInt32: {
3977       if (second.IsRegister()) {
3978         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3979       } else if (second.IsConstant()) {
3980         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3981         __ subl(first.AsRegister<CpuRegister>(), imm);
3982       } else {
3983         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3984       }
3985       break;
3986     }
3987     case DataType::Type::kInt64: {
3988       if (second.IsConstant()) {
3989         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3990         DCHECK(IsInt<32>(value));
3991         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3992       } else {
3993         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3994       }
3995       break;
3996     }
3997 
3998     case DataType::Type::kFloat32: {
3999       if (second.IsFpuRegister()) {
4000         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4001       } else if (second.IsConstant()) {
4002         __ subss(first.AsFpuRegister<XmmRegister>(),
4003                  codegen_->LiteralFloatAddress(
4004                      second.GetConstant()->AsFloatConstant()->GetValue()));
4005       } else {
4006         DCHECK(second.IsStackSlot());
4007         __ subss(first.AsFpuRegister<XmmRegister>(),
4008                  Address(CpuRegister(RSP), second.GetStackIndex()));
4009       }
4010       break;
4011     }
4012 
4013     case DataType::Type::kFloat64: {
4014       if (second.IsFpuRegister()) {
4015         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4016       } else if (second.IsConstant()) {
4017         __ subsd(first.AsFpuRegister<XmmRegister>(),
4018                  codegen_->LiteralDoubleAddress(
4019                      second.GetConstant()->AsDoubleConstant()->GetValue()));
4020       } else {
4021         DCHECK(second.IsDoubleStackSlot());
4022         __ subsd(first.AsFpuRegister<XmmRegister>(),
4023                  Address(CpuRegister(RSP), second.GetStackIndex()));
4024       }
4025       break;
4026     }
4027 
4028     default:
4029       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4030   }
4031 }
4032 
VisitMul(HMul * mul)4033 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
4034   LocationSummary* locations =
4035       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4036   switch (mul->GetResultType()) {
4037     case DataType::Type::kInt32: {
4038       locations->SetInAt(0, Location::RequiresRegister());
4039       locations->SetInAt(1, Location::Any());
4040       if (mul->InputAt(1)->IsIntConstant()) {
4041         // Can use 3 operand multiply.
4042         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4043       } else {
4044         locations->SetOut(Location::SameAsFirstInput());
4045       }
4046       break;
4047     }
4048     case DataType::Type::kInt64: {
4049       locations->SetInAt(0, Location::RequiresRegister());
4050       locations->SetInAt(1, Location::Any());
4051       if (mul->InputAt(1)->IsLongConstant() &&
4052           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
4053         // Can use 3 operand multiply.
4054         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4055       } else {
4056         locations->SetOut(Location::SameAsFirstInput());
4057       }
4058       break;
4059     }
4060     case DataType::Type::kFloat32:
4061     case DataType::Type::kFloat64: {
4062       locations->SetInAt(0, Location::RequiresFpuRegister());
4063       locations->SetInAt(1, Location::Any());
4064       locations->SetOut(Location::SameAsFirstInput());
4065       break;
4066     }
4067 
4068     default:
4069       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4070   }
4071 }
4072 
VisitMul(HMul * mul)4073 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
4074   LocationSummary* locations = mul->GetLocations();
4075   Location first = locations->InAt(0);
4076   Location second = locations->InAt(1);
4077   Location out = locations->Out();
4078   switch (mul->GetResultType()) {
4079     case DataType::Type::kInt32:
4080       // The constant may have ended up in a register, so test explicitly to avoid
4081       // problems where the output may not be the same as the first operand.
4082       if (mul->InputAt(1)->IsIntConstant()) {
4083         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
4084         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
4085       } else if (second.IsRegister()) {
4086         DCHECK(first.Equals(out));
4087         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4088       } else {
4089         DCHECK(first.Equals(out));
4090         DCHECK(second.IsStackSlot());
4091         __ imull(first.AsRegister<CpuRegister>(),
4092                  Address(CpuRegister(RSP), second.GetStackIndex()));
4093       }
4094       break;
4095     case DataType::Type::kInt64: {
4096       // The constant may have ended up in a register, so test explicitly to avoid
4097       // problems where the output may not be the same as the first operand.
4098       if (mul->InputAt(1)->IsLongConstant()) {
4099         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
4100         if (IsInt<32>(value)) {
4101           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
4102                    Immediate(static_cast<int32_t>(value)));
4103         } else {
4104           // Have to use the constant area.
4105           DCHECK(first.Equals(out));
4106           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
4107         }
4108       } else if (second.IsRegister()) {
4109         DCHECK(first.Equals(out));
4110         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4111       } else {
4112         DCHECK(second.IsDoubleStackSlot());
4113         DCHECK(first.Equals(out));
4114         __ imulq(first.AsRegister<CpuRegister>(),
4115                  Address(CpuRegister(RSP), second.GetStackIndex()));
4116       }
4117       break;
4118     }
4119 
4120     case DataType::Type::kFloat32: {
4121       DCHECK(first.Equals(out));
4122       if (second.IsFpuRegister()) {
4123         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4124       } else if (second.IsConstant()) {
4125         __ mulss(first.AsFpuRegister<XmmRegister>(),
4126                  codegen_->LiteralFloatAddress(
4127                      second.GetConstant()->AsFloatConstant()->GetValue()));
4128       } else {
4129         DCHECK(second.IsStackSlot());
4130         __ mulss(first.AsFpuRegister<XmmRegister>(),
4131                  Address(CpuRegister(RSP), second.GetStackIndex()));
4132       }
4133       break;
4134     }
4135 
4136     case DataType::Type::kFloat64: {
4137       DCHECK(first.Equals(out));
4138       if (second.IsFpuRegister()) {
4139         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4140       } else if (second.IsConstant()) {
4141         __ mulsd(first.AsFpuRegister<XmmRegister>(),
4142                  codegen_->LiteralDoubleAddress(
4143                      second.GetConstant()->AsDoubleConstant()->GetValue()));
4144       } else {
4145         DCHECK(second.IsDoubleStackSlot());
4146         __ mulsd(first.AsFpuRegister<XmmRegister>(),
4147                  Address(CpuRegister(RSP), second.GetStackIndex()));
4148       }
4149       break;
4150     }
4151 
4152     default:
4153       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4154   }
4155 }
4156 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)4157 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
4158                                                      uint32_t stack_adjustment, bool is_float) {
4159   if (source.IsStackSlot()) {
4160     DCHECK(is_float);
4161     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
4162   } else if (source.IsDoubleStackSlot()) {
4163     DCHECK(!is_float);
4164     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
4165   } else {
4166     // Write the value to the temporary location on the stack and load to FP stack.
4167     if (is_float) {
4168       Location stack_temp = Location::StackSlot(temp_offset);
4169       codegen_->Move(stack_temp, source);
4170       __ flds(Address(CpuRegister(RSP), temp_offset));
4171     } else {
4172       Location stack_temp = Location::DoubleStackSlot(temp_offset);
4173       codegen_->Move(stack_temp, source);
4174       __ fldl(Address(CpuRegister(RSP), temp_offset));
4175     }
4176   }
4177 }
4178 
GenerateRemFP(HRem * rem)4179 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
4180   DataType::Type type = rem->GetResultType();
4181   bool is_float = type == DataType::Type::kFloat32;
4182   size_t elem_size = DataType::Size(type);
4183   LocationSummary* locations = rem->GetLocations();
4184   Location first = locations->InAt(0);
4185   Location second = locations->InAt(1);
4186   Location out = locations->Out();
4187 
4188   // Create stack space for 2 elements.
4189   // TODO: enhance register allocator to ask for stack temporaries.
4190   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
4191 
4192   // Load the values to the FP stack in reverse order, using temporaries if needed.
4193   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
4194   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
4195 
4196   // Loop doing FPREM until we stabilize.
4197   NearLabel retry;
4198   __ Bind(&retry);
4199   __ fprem();
4200 
4201   // Move FP status to AX.
4202   __ fstsw();
4203 
4204   // And see if the argument reduction is complete. This is signaled by the
4205   // C2 FPU flag bit set to 0.
4206   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
4207   __ j(kNotEqual, &retry);
4208 
4209   // We have settled on the final value. Retrieve it into an XMM register.
4210   // Store FP top of stack to real stack.
4211   if (is_float) {
4212     __ fsts(Address(CpuRegister(RSP), 0));
4213   } else {
4214     __ fstl(Address(CpuRegister(RSP), 0));
4215   }
4216 
4217   // Pop the 2 items from the FP stack.
4218   __ fucompp();
4219 
4220   // Load the value from the stack into an XMM register.
4221   DCHECK(out.IsFpuRegister()) << out;
4222   if (is_float) {
4223     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4224   } else {
4225     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4226   }
4227 
4228   // And remove the temporary stack space we allocated.
4229   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
4230 }
4231 
DivRemOneOrMinusOne(HBinaryOperation * instruction)4232 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4233   DCHECK(instruction->IsDiv() || instruction->IsRem());
4234 
4235   LocationSummary* locations = instruction->GetLocations();
4236   Location second = locations->InAt(1);
4237   DCHECK(second.IsConstant());
4238 
4239   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4240   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
4241   int64_t imm = Int64FromConstant(second.GetConstant());
4242 
4243   DCHECK(imm == 1 || imm == -1);
4244 
4245   switch (instruction->GetResultType()) {
4246     case DataType::Type::kInt32: {
4247       if (instruction->IsRem()) {
4248         __ xorl(output_register, output_register);
4249       } else {
4250         __ movl(output_register, input_register);
4251         if (imm == -1) {
4252           __ negl(output_register);
4253         }
4254       }
4255       break;
4256     }
4257 
4258     case DataType::Type::kInt64: {
4259       if (instruction->IsRem()) {
4260         __ xorl(output_register, output_register);
4261       } else {
4262         __ movq(output_register, input_register);
4263         if (imm == -1) {
4264           __ negq(output_register);
4265         }
4266       }
4267       break;
4268     }
4269 
4270     default:
4271       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
4272   }
4273 }
RemByPowerOfTwo(HRem * instruction)4274 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
4275   LocationSummary* locations = instruction->GetLocations();
4276   Location second = locations->InAt(1);
4277   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4278   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4279   int64_t imm = Int64FromConstant(second.GetConstant());
4280   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4281   uint64_t abs_imm = AbsOrMin(imm);
4282   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4283   if (instruction->GetResultType() == DataType::Type::kInt32) {
4284     NearLabel done;
4285     __ movl(out, numerator);
4286     __ andl(out, Immediate(abs_imm-1));
4287     __ j(Condition::kZero, &done);
4288     __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4289     __ testl(numerator, numerator);
4290     __ cmov(Condition::kLess, out, tmp, false);
4291     __ Bind(&done);
4292 
4293   } else {
4294     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4295     codegen_->Load64BitValue(tmp, abs_imm - 1);
4296     NearLabel done;
4297 
4298     __ movq(out, numerator);
4299     __ andq(out, tmp);
4300     __ j(Condition::kZero, &done);
4301     __ movq(tmp, numerator);
4302     __ sarq(tmp, Immediate(63));
4303     __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
4304     __ orq(out, tmp);
4305     __ Bind(&done);
4306   }
4307 }
DivByPowerOfTwo(HDiv * instruction)4308 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
4309   LocationSummary* locations = instruction->GetLocations();
4310   Location second = locations->InAt(1);
4311 
4312   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4313   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4314 
4315   int64_t imm = Int64FromConstant(second.GetConstant());
4316   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4317   uint64_t abs_imm = AbsOrMin(imm);
4318 
4319   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4320 
4321   if (instruction->GetResultType() == DataType::Type::kInt32) {
4322     // When denominator is equal to 2, we can add signed bit and numerator to tmp.
4323     // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
4324     if (abs_imm == 2) {
4325       __ leal(tmp, Address(numerator, 0));
4326       __ shrl(tmp, Immediate(31));
4327       __ addl(tmp, numerator);
4328     } else {
4329       __ leal(tmp, Address(numerator, abs_imm - 1));
4330       __ testl(numerator, numerator);
4331       __ cmov(kGreaterEqual, tmp, numerator);
4332     }
4333     int shift = CTZ(imm);
4334     __ sarl(tmp, Immediate(shift));
4335 
4336     if (imm < 0) {
4337       __ negl(tmp);
4338     }
4339 
4340     __ movl(output_register, tmp);
4341   } else {
4342     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4343     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
4344     if (abs_imm == 2) {
4345       __ movq(rdx, numerator);
4346       __ shrq(rdx, Immediate(63));
4347       __ addq(rdx, numerator);
4348     } else {
4349       codegen_->Load64BitValue(rdx, abs_imm - 1);
4350       __ addq(rdx, numerator);
4351       __ testq(numerator, numerator);
4352       __ cmov(kGreaterEqual, rdx, numerator);
4353     }
4354     int shift = CTZ(imm);
4355     __ sarq(rdx, Immediate(shift));
4356 
4357     if (imm < 0) {
4358       __ negq(rdx);
4359     }
4360 
4361     __ movq(output_register, rdx);
4362   }
4363 }
4364 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4365 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4366   DCHECK(instruction->IsDiv() || instruction->IsRem());
4367 
4368   LocationSummary* locations = instruction->GetLocations();
4369   Location second = locations->InAt(1);
4370 
4371   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
4372       : locations->GetTemp(0).AsRegister<CpuRegister>();
4373   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
4374   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
4375       : locations->Out().AsRegister<CpuRegister>();
4376   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4377 
4378   DCHECK_EQ(RAX, eax.AsRegister());
4379   DCHECK_EQ(RDX, edx.AsRegister());
4380   if (instruction->IsDiv()) {
4381     DCHECK_EQ(RAX, out.AsRegister());
4382   } else {
4383     DCHECK_EQ(RDX, out.AsRegister());
4384   }
4385 
4386   int64_t magic;
4387   int shift;
4388 
4389   // TODO: can these branches be written as one?
4390   if (instruction->GetResultType() == DataType::Type::kInt32) {
4391     int imm = second.GetConstant()->AsIntConstant()->GetValue();
4392 
4393     CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
4394 
4395     __ movl(numerator, eax);
4396 
4397     __ movl(eax, Immediate(magic));
4398     __ imull(numerator);
4399 
4400     if (imm > 0 && magic < 0) {
4401       __ addl(edx, numerator);
4402     } else if (imm < 0 && magic > 0) {
4403       __ subl(edx, numerator);
4404     }
4405 
4406     if (shift != 0) {
4407       __ sarl(edx, Immediate(shift));
4408     }
4409 
4410     __ movl(eax, edx);
4411     __ shrl(edx, Immediate(31));
4412     __ addl(edx, eax);
4413 
4414     if (instruction->IsRem()) {
4415       __ movl(eax, numerator);
4416       __ imull(edx, Immediate(imm));
4417       __ subl(eax, edx);
4418       __ movl(edx, eax);
4419     } else {
4420       __ movl(eax, edx);
4421     }
4422   } else {
4423     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
4424 
4425     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4426 
4427     CpuRegister rax = eax;
4428     CpuRegister rdx = edx;
4429 
4430     CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
4431 
4432     // Save the numerator.
4433     __ movq(numerator, rax);
4434 
4435     // RAX = magic
4436     codegen_->Load64BitValue(rax, magic);
4437 
4438     // RDX:RAX = magic * numerator
4439     __ imulq(numerator);
4440 
4441     if (imm > 0 && magic < 0) {
4442       // RDX += numerator
4443       __ addq(rdx, numerator);
4444     } else if (imm < 0 && magic > 0) {
4445       // RDX -= numerator
4446       __ subq(rdx, numerator);
4447     }
4448 
4449     // Shift if needed.
4450     if (shift != 0) {
4451       __ sarq(rdx, Immediate(shift));
4452     }
4453 
4454     // RDX += 1 if RDX < 0
4455     __ movq(rax, rdx);
4456     __ shrq(rdx, Immediate(63));
4457     __ addq(rdx, rax);
4458 
4459     if (instruction->IsRem()) {
4460       __ movq(rax, numerator);
4461 
4462       if (IsInt<32>(imm)) {
4463         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
4464       } else {
4465         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
4466       }
4467 
4468       __ subq(rax, rdx);
4469       __ movq(rdx, rax);
4470     } else {
4471       __ movq(rax, rdx);
4472     }
4473   }
4474 }
4475 
GenerateDivRemIntegral(HBinaryOperation * instruction)4476 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4477   DCHECK(instruction->IsDiv() || instruction->IsRem());
4478   DataType::Type type = instruction->GetResultType();
4479   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4480 
4481   bool is_div = instruction->IsDiv();
4482   LocationSummary* locations = instruction->GetLocations();
4483 
4484   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4485   Location second = locations->InAt(1);
4486 
4487   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
4488   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
4489 
4490   if (second.IsConstant()) {
4491     int64_t imm = Int64FromConstant(second.GetConstant());
4492 
4493     if (imm == 0) {
4494       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4495     } else if (imm == 1 || imm == -1) {
4496       DivRemOneOrMinusOne(instruction);
4497     } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4498       if (is_div) {
4499         DivByPowerOfTwo(instruction->AsDiv());
4500       } else {
4501         RemByPowerOfTwo(instruction->AsRem());
4502       }
4503     } else {
4504       DCHECK(imm <= -2 || imm >= 2);
4505       GenerateDivRemWithAnyConstant(instruction);
4506     }
4507   } else {
4508     SlowPathCode* slow_path =
4509         new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
4510             instruction, out.AsRegister(), type, is_div);
4511     codegen_->AddSlowPath(slow_path);
4512 
4513     CpuRegister second_reg = second.AsRegister<CpuRegister>();
4514     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
4515     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
4516     // so it's safe to just use negl instead of more complex comparisons.
4517     if (type == DataType::Type::kInt32) {
4518       __ cmpl(second_reg, Immediate(-1));
4519       __ j(kEqual, slow_path->GetEntryLabel());
4520       // edx:eax <- sign-extended of eax
4521       __ cdq();
4522       // eax = quotient, edx = remainder
4523       __ idivl(second_reg);
4524     } else {
4525       __ cmpq(second_reg, Immediate(-1));
4526       __ j(kEqual, slow_path->GetEntryLabel());
4527       // rdx:rax <- sign-extended of rax
4528       __ cqo();
4529       // rax = quotient, rdx = remainder
4530       __ idivq(second_reg);
4531     }
4532     __ Bind(slow_path->GetExitLabel());
4533   }
4534 }
4535 
VisitDiv(HDiv * div)4536 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
4537   LocationSummary* locations =
4538       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
4539   switch (div->GetResultType()) {
4540     case DataType::Type::kInt32:
4541     case DataType::Type::kInt64: {
4542       locations->SetInAt(0, Location::RegisterLocation(RAX));
4543       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4544       locations->SetOut(Location::SameAsFirstInput());
4545       // Intel uses edx:eax as the dividend.
4546       locations->AddTemp(Location::RegisterLocation(RDX));
4547       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
4548       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
4549       // output and request another temp.
4550       if (div->InputAt(1)->IsConstant()) {
4551         locations->AddTemp(Location::RequiresRegister());
4552       }
4553       break;
4554     }
4555 
4556     case DataType::Type::kFloat32:
4557     case DataType::Type::kFloat64: {
4558       locations->SetInAt(0, Location::RequiresFpuRegister());
4559       locations->SetInAt(1, Location::Any());
4560       locations->SetOut(Location::SameAsFirstInput());
4561       break;
4562     }
4563 
4564     default:
4565       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4566   }
4567 }
4568 
VisitDiv(HDiv * div)4569 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
4570   LocationSummary* locations = div->GetLocations();
4571   Location first = locations->InAt(0);
4572   Location second = locations->InAt(1);
4573   DCHECK(first.Equals(locations->Out()));
4574 
4575   DataType::Type type = div->GetResultType();
4576   switch (type) {
4577     case DataType::Type::kInt32:
4578     case DataType::Type::kInt64: {
4579       GenerateDivRemIntegral(div);
4580       break;
4581     }
4582 
4583     case DataType::Type::kFloat32: {
4584       if (second.IsFpuRegister()) {
4585         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4586       } else if (second.IsConstant()) {
4587         __ divss(first.AsFpuRegister<XmmRegister>(),
4588                  codegen_->LiteralFloatAddress(
4589                      second.GetConstant()->AsFloatConstant()->GetValue()));
4590       } else {
4591         DCHECK(second.IsStackSlot());
4592         __ divss(first.AsFpuRegister<XmmRegister>(),
4593                  Address(CpuRegister(RSP), second.GetStackIndex()));
4594       }
4595       break;
4596     }
4597 
4598     case DataType::Type::kFloat64: {
4599       if (second.IsFpuRegister()) {
4600         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4601       } else if (second.IsConstant()) {
4602         __ divsd(first.AsFpuRegister<XmmRegister>(),
4603                  codegen_->LiteralDoubleAddress(
4604                      second.GetConstant()->AsDoubleConstant()->GetValue()));
4605       } else {
4606         DCHECK(second.IsDoubleStackSlot());
4607         __ divsd(first.AsFpuRegister<XmmRegister>(),
4608                  Address(CpuRegister(RSP), second.GetStackIndex()));
4609       }
4610       break;
4611     }
4612 
4613     default:
4614       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4615   }
4616 }
4617 
VisitRem(HRem * rem)4618 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4619   DataType::Type type = rem->GetResultType();
4620   LocationSummary* locations =
4621     new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4622 
4623   switch (type) {
4624     case DataType::Type::kInt32:
4625     case DataType::Type::kInt64: {
4626       locations->SetInAt(0, Location::RegisterLocation(RAX));
4627       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4628       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4629       locations->SetOut(Location::RegisterLocation(RDX));
4630       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4631       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4632       // output and request another temp.
4633       if (rem->InputAt(1)->IsConstant()) {
4634         locations->AddTemp(Location::RequiresRegister());
4635       }
4636       break;
4637     }
4638 
4639     case DataType::Type::kFloat32:
4640     case DataType::Type::kFloat64: {
4641       locations->SetInAt(0, Location::Any());
4642       locations->SetInAt(1, Location::Any());
4643       locations->SetOut(Location::RequiresFpuRegister());
4644       locations->AddTemp(Location::RegisterLocation(RAX));
4645       break;
4646     }
4647 
4648     default:
4649       LOG(FATAL) << "Unexpected rem type " << type;
4650   }
4651 }
4652 
VisitRem(HRem * rem)4653 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4654   DataType::Type type = rem->GetResultType();
4655   switch (type) {
4656     case DataType::Type::kInt32:
4657     case DataType::Type::kInt64: {
4658       GenerateDivRemIntegral(rem);
4659       break;
4660     }
4661     case DataType::Type::kFloat32:
4662     case DataType::Type::kFloat64: {
4663       GenerateRemFP(rem);
4664       break;
4665     }
4666     default:
4667       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4668   }
4669 }
4670 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4671 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4672   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4673   switch (minmax->GetResultType()) {
4674     case DataType::Type::kInt32:
4675     case DataType::Type::kInt64:
4676       locations->SetInAt(0, Location::RequiresRegister());
4677       locations->SetInAt(1, Location::RequiresRegister());
4678       locations->SetOut(Location::SameAsFirstInput());
4679       break;
4680     case DataType::Type::kFloat32:
4681     case DataType::Type::kFloat64:
4682       locations->SetInAt(0, Location::RequiresFpuRegister());
4683       locations->SetInAt(1, Location::RequiresFpuRegister());
4684       // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4685       // the second input to be the output (we can simply swap inputs).
4686       locations->SetOut(Location::SameAsFirstInput());
4687       break;
4688     default:
4689       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4690   }
4691 }
4692 
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4693 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4694                                                        bool is_min,
4695                                                        DataType::Type type) {
4696   Location op1_loc = locations->InAt(0);
4697   Location op2_loc = locations->InAt(1);
4698 
4699   // Shortcut for same input locations.
4700   if (op1_loc.Equals(op2_loc)) {
4701     // Can return immediately, as op1_loc == out_loc.
4702     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4703     //       a copy here.
4704     DCHECK(locations->Out().Equals(op1_loc));
4705     return;
4706   }
4707 
4708   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4709   CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4710 
4711   //  (out := op1)
4712   //  out <=? op2
4713   //  if out is min jmp done
4714   //  out := op2
4715   // done:
4716 
4717   if (type == DataType::Type::kInt64) {
4718     __ cmpq(out, op2);
4719     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4720   } else {
4721     DCHECK_EQ(type, DataType::Type::kInt32);
4722     __ cmpl(out, op2);
4723     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4724   }
4725 }
4726 
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4727 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4728                                                       bool is_min,
4729                                                       DataType::Type type) {
4730   Location op1_loc = locations->InAt(0);
4731   Location op2_loc = locations->InAt(1);
4732   Location out_loc = locations->Out();
4733   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4734 
4735   // Shortcut for same input locations.
4736   if (op1_loc.Equals(op2_loc)) {
4737     DCHECK(out_loc.Equals(op1_loc));
4738     return;
4739   }
4740 
4741   //  (out := op1)
4742   //  out <=? op2
4743   //  if Nan jmp Nan_label
4744   //  if out is min jmp done
4745   //  if op2 is min jmp op2_label
4746   //  handle -0/+0
4747   //  jmp done
4748   // Nan_label:
4749   //  out := NaN
4750   // op2_label:
4751   //  out := op2
4752   // done:
4753   //
4754   // This removes one jmp, but needs to copy one input (op1) to out.
4755   //
4756   // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4757 
4758   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4759 
4760   NearLabel nan, done, op2_label;
4761   if (type == DataType::Type::kFloat64) {
4762     __ ucomisd(out, op2);
4763   } else {
4764     DCHECK_EQ(type, DataType::Type::kFloat32);
4765     __ ucomiss(out, op2);
4766   }
4767 
4768   __ j(Condition::kParityEven, &nan);
4769 
4770   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4771   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4772 
4773   // Handle 0.0/-0.0.
4774   if (is_min) {
4775     if (type == DataType::Type::kFloat64) {
4776       __ orpd(out, op2);
4777     } else {
4778       __ orps(out, op2);
4779     }
4780   } else {
4781     if (type == DataType::Type::kFloat64) {
4782       __ andpd(out, op2);
4783     } else {
4784       __ andps(out, op2);
4785     }
4786   }
4787   __ jmp(&done);
4788 
4789   // NaN handling.
4790   __ Bind(&nan);
4791   if (type == DataType::Type::kFloat64) {
4792     __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4793   } else {
4794     __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4795   }
4796   __ jmp(&done);
4797 
4798   // out := op2;
4799   __ Bind(&op2_label);
4800   if (type == DataType::Type::kFloat64) {
4801     __ movsd(out, op2);
4802   } else {
4803     __ movss(out, op2);
4804   }
4805 
4806   // Done.
4807   __ Bind(&done);
4808 }
4809 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4810 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4811   DataType::Type type = minmax->GetResultType();
4812   switch (type) {
4813     case DataType::Type::kInt32:
4814     case DataType::Type::kInt64:
4815       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4816       break;
4817     case DataType::Type::kFloat32:
4818     case DataType::Type::kFloat64:
4819       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4820       break;
4821     default:
4822       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4823   }
4824 }
4825 
VisitMin(HMin * min)4826 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4827   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4828 }
4829 
VisitMin(HMin * min)4830 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4831   GenerateMinMax(min, /*is_min*/ true);
4832 }
4833 
VisitMax(HMax * max)4834 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4835   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4836 }
4837 
VisitMax(HMax * max)4838 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4839   GenerateMinMax(max, /*is_min*/ false);
4840 }
4841 
VisitAbs(HAbs * abs)4842 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4843   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4844   switch (abs->GetResultType()) {
4845     case DataType::Type::kInt32:
4846     case DataType::Type::kInt64:
4847       locations->SetInAt(0, Location::RequiresRegister());
4848       locations->SetOut(Location::SameAsFirstInput());
4849       locations->AddTemp(Location::RequiresRegister());
4850       break;
4851     case DataType::Type::kFloat32:
4852     case DataType::Type::kFloat64:
4853       locations->SetInAt(0, Location::RequiresFpuRegister());
4854       locations->SetOut(Location::SameAsFirstInput());
4855       locations->AddTemp(Location::RequiresFpuRegister());
4856       break;
4857     default:
4858       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4859   }
4860 }
4861 
VisitAbs(HAbs * abs)4862 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4863   LocationSummary* locations = abs->GetLocations();
4864   switch (abs->GetResultType()) {
4865     case DataType::Type::kInt32: {
4866       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4867       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4868       // Create mask.
4869       __ movl(mask, out);
4870       __ sarl(mask, Immediate(31));
4871       // Add mask.
4872       __ addl(out, mask);
4873       __ xorl(out, mask);
4874       break;
4875     }
4876     case DataType::Type::kInt64: {
4877       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4878       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4879       // Create mask.
4880       __ movq(mask, out);
4881       __ sarq(mask, Immediate(63));
4882       // Add mask.
4883       __ addq(out, mask);
4884       __ xorq(out, mask);
4885       break;
4886     }
4887     case DataType::Type::kFloat32: {
4888       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4889       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4890       __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4891       __ andps(out, mask);
4892       break;
4893     }
4894     case DataType::Type::kFloat64: {
4895       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4896       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4897       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4898       __ andpd(out, mask);
4899       break;
4900     }
4901     default:
4902       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4903   }
4904 }
4905 
VisitDivZeroCheck(HDivZeroCheck * instruction)4906 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4907   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4908   locations->SetInAt(0, Location::Any());
4909 }
4910 
VisitDivZeroCheck(HDivZeroCheck * instruction)4911 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4912   SlowPathCode* slow_path =
4913       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4914   codegen_->AddSlowPath(slow_path);
4915 
4916   LocationSummary* locations = instruction->GetLocations();
4917   Location value = locations->InAt(0);
4918 
4919   switch (instruction->GetType()) {
4920     case DataType::Type::kBool:
4921     case DataType::Type::kUint8:
4922     case DataType::Type::kInt8:
4923     case DataType::Type::kUint16:
4924     case DataType::Type::kInt16:
4925     case DataType::Type::kInt32: {
4926       if (value.IsRegister()) {
4927         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4928         __ j(kEqual, slow_path->GetEntryLabel());
4929       } else if (value.IsStackSlot()) {
4930         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4931         __ j(kEqual, slow_path->GetEntryLabel());
4932       } else {
4933         DCHECK(value.IsConstant()) << value;
4934         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4935           __ jmp(slow_path->GetEntryLabel());
4936         }
4937       }
4938       break;
4939     }
4940     case DataType::Type::kInt64: {
4941       if (value.IsRegister()) {
4942         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4943         __ j(kEqual, slow_path->GetEntryLabel());
4944       } else if (value.IsDoubleStackSlot()) {
4945         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4946         __ j(kEqual, slow_path->GetEntryLabel());
4947       } else {
4948         DCHECK(value.IsConstant()) << value;
4949         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4950           __ jmp(slow_path->GetEntryLabel());
4951         }
4952       }
4953       break;
4954     }
4955     default:
4956       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4957   }
4958 }
4959 
HandleShift(HBinaryOperation * op)4960 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4961   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4962 
4963   LocationSummary* locations =
4964       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4965 
4966   switch (op->GetResultType()) {
4967     case DataType::Type::kInt32:
4968     case DataType::Type::kInt64: {
4969       locations->SetInAt(0, Location::RequiresRegister());
4970       // The shift count needs to be in CL.
4971       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4972       locations->SetOut(Location::SameAsFirstInput());
4973       break;
4974     }
4975     default:
4976       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4977   }
4978 }
4979 
HandleShift(HBinaryOperation * op)4980 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4981   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4982 
4983   LocationSummary* locations = op->GetLocations();
4984   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4985   Location second = locations->InAt(1);
4986 
4987   switch (op->GetResultType()) {
4988     case DataType::Type::kInt32: {
4989       if (second.IsRegister()) {
4990         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4991         if (op->IsShl()) {
4992           __ shll(first_reg, second_reg);
4993         } else if (op->IsShr()) {
4994           __ sarl(first_reg, second_reg);
4995         } else {
4996           __ shrl(first_reg, second_reg);
4997         }
4998       } else {
4999         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5000         if (op->IsShl()) {
5001           __ shll(first_reg, imm);
5002         } else if (op->IsShr()) {
5003           __ sarl(first_reg, imm);
5004         } else {
5005           __ shrl(first_reg, imm);
5006         }
5007       }
5008       break;
5009     }
5010     case DataType::Type::kInt64: {
5011       if (second.IsRegister()) {
5012         CpuRegister second_reg = second.AsRegister<CpuRegister>();
5013         if (op->IsShl()) {
5014           __ shlq(first_reg, second_reg);
5015         } else if (op->IsShr()) {
5016           __ sarq(first_reg, second_reg);
5017         } else {
5018           __ shrq(first_reg, second_reg);
5019         }
5020       } else {
5021         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
5022         if (op->IsShl()) {
5023           __ shlq(first_reg, imm);
5024         } else if (op->IsShr()) {
5025           __ sarq(first_reg, imm);
5026         } else {
5027           __ shrq(first_reg, imm);
5028         }
5029       }
5030       break;
5031     }
5032     default:
5033       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5034       UNREACHABLE();
5035   }
5036 }
5037 
HandleRotate(HBinaryOperation * rotate)5038 void LocationsBuilderX86_64::HandleRotate(HBinaryOperation* rotate) {
5039   LocationSummary* locations =
5040       new (GetGraph()->GetAllocator()) LocationSummary(rotate, LocationSummary::kNoCall);
5041 
5042   switch (rotate->GetResultType()) {
5043     case DataType::Type::kInt32:
5044     case DataType::Type::kInt64: {
5045       locations->SetInAt(0, Location::RequiresRegister());
5046       // The shift count needs to be in CL (unless it is a constant).
5047       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, rotate->InputAt(1)));
5048       locations->SetOut(Location::SameAsFirstInput());
5049       break;
5050     }
5051     default:
5052       LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5053       UNREACHABLE();
5054   }
5055 }
5056 
HandleRotate(HBinaryOperation * rotate)5057 void InstructionCodeGeneratorX86_64::HandleRotate(HBinaryOperation* rotate) {
5058   LocationSummary* locations = rotate->GetLocations();
5059   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
5060   Location second = locations->InAt(1);
5061 
5062   switch (rotate->GetResultType()) {
5063     case DataType::Type::kInt32:
5064       if (second.IsRegister()) {
5065         CpuRegister second_reg = second.AsRegister<CpuRegister>();
5066         if (rotate->IsRor()) {
5067           __ rorl(first_reg, second_reg);
5068         } else {
5069           DCHECK(rotate->IsRol());
5070           __ roll(first_reg, second_reg);
5071         }
5072       } else {
5073         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5074         if (rotate->IsRor()) {
5075           __ rorl(first_reg, imm);
5076         } else {
5077           DCHECK(rotate->IsRol());
5078           __ roll(first_reg, imm);
5079         }
5080       }
5081       break;
5082     case DataType::Type::kInt64:
5083       if (second.IsRegister()) {
5084         CpuRegister second_reg = second.AsRegister<CpuRegister>();
5085         if (rotate->IsRor()) {
5086           __ rorq(first_reg, second_reg);
5087         } else {
5088           DCHECK(rotate->IsRol());
5089           __ rolq(first_reg, second_reg);
5090         }
5091       } else {
5092         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
5093         if (rotate->IsRor()) {
5094           __ rorq(first_reg, imm);
5095         } else {
5096           DCHECK(rotate->IsRol());
5097           __ rolq(first_reg, imm);
5098         }
5099       }
5100       break;
5101     default:
5102       LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5103       UNREACHABLE();
5104   }
5105 }
5106 
VisitRol(HRol * rol)5107 void LocationsBuilderX86_64::VisitRol(HRol* rol) {
5108   HandleRotate(rol);
5109 }
5110 
VisitRol(HRol * rol)5111 void InstructionCodeGeneratorX86_64::VisitRol(HRol* rol) {
5112   HandleRotate(rol);
5113 }
5114 
VisitRor(HRor * ror)5115 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
5116   HandleRotate(ror);
5117 }
5118 
VisitRor(HRor * ror)5119 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
5120   HandleRotate(ror);
5121 }
5122 
VisitShl(HShl * shl)5123 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
5124   HandleShift(shl);
5125 }
5126 
VisitShl(HShl * shl)5127 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
5128   HandleShift(shl);
5129 }
5130 
VisitShr(HShr * shr)5131 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
5132   HandleShift(shr);
5133 }
5134 
VisitShr(HShr * shr)5135 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
5136   HandleShift(shr);
5137 }
5138 
VisitUShr(HUShr * ushr)5139 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
5140   HandleShift(ushr);
5141 }
5142 
VisitUShr(HUShr * ushr)5143 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
5144   HandleShift(ushr);
5145 }
5146 
VisitNewInstance(HNewInstance * instruction)5147 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
5148   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5149       instruction, LocationSummary::kCallOnMainOnly);
5150   InvokeRuntimeCallingConvention calling_convention;
5151   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5152   locations->SetOut(Location::RegisterLocation(RAX));
5153 }
5154 
VisitNewInstance(HNewInstance * instruction)5155 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
5156   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction);
5157   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5158   DCHECK(!codegen_->IsLeafMethod());
5159 }
5160 
VisitNewArray(HNewArray * instruction)5161 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
5162   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5163       instruction, LocationSummary::kCallOnMainOnly);
5164   InvokeRuntimeCallingConvention calling_convention;
5165   locations->SetOut(Location::RegisterLocation(RAX));
5166   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5167   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5168 }
5169 
VisitNewArray(HNewArray * instruction)5170 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
5171   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5172   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5173   codegen_->InvokeRuntime(entrypoint, instruction);
5174   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5175   DCHECK(!codegen_->IsLeafMethod());
5176 }
5177 
VisitParameterValue(HParameterValue * instruction)5178 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
5179   LocationSummary* locations =
5180       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5181   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5182   if (location.IsStackSlot()) {
5183     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5184   } else if (location.IsDoubleStackSlot()) {
5185     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5186   }
5187   locations->SetOut(location);
5188 }
5189 
VisitParameterValue(HParameterValue * instruction)5190 void InstructionCodeGeneratorX86_64::VisitParameterValue(
5191     [[maybe_unused]] HParameterValue* instruction) {
5192   // Nothing to do, the parameter is already at its location.
5193 }
5194 
VisitCurrentMethod(HCurrentMethod * instruction)5195 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
5196   LocationSummary* locations =
5197       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5198   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5199 }
5200 
VisitCurrentMethod(HCurrentMethod * instruction)5201 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
5202     [[maybe_unused]] HCurrentMethod* instruction) {
5203   // Nothing to do, the method is already at its location.
5204 }
5205 
VisitClassTableGet(HClassTableGet * instruction)5206 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
5207   LocationSummary* locations =
5208       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5209   locations->SetInAt(0, Location::RequiresRegister());
5210   locations->SetOut(Location::RequiresRegister());
5211 }
5212 
VisitClassTableGet(HClassTableGet * instruction)5213 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
5214   LocationSummary* locations = instruction->GetLocations();
5215   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5216     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5217         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
5218     __ movq(locations->Out().AsRegister<CpuRegister>(),
5219             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
5220   } else {
5221     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5222         instruction->GetIndex(), kX86_64PointerSize));
5223     __ movq(locations->Out().AsRegister<CpuRegister>(),
5224             Address(locations->InAt(0).AsRegister<CpuRegister>(),
5225             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
5226     __ movq(locations->Out().AsRegister<CpuRegister>(),
5227             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
5228   }
5229 }
5230 
VisitNot(HNot * not_)5231 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
5232   LocationSummary* locations =
5233       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5234   locations->SetInAt(0, Location::RequiresRegister());
5235   locations->SetOut(Location::SameAsFirstInput());
5236 }
5237 
VisitNot(HNot * not_)5238 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
5239   LocationSummary* locations = not_->GetLocations();
5240   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5241             locations->Out().AsRegister<CpuRegister>().AsRegister());
5242   Location out = locations->Out();
5243   switch (not_->GetResultType()) {
5244     case DataType::Type::kInt32:
5245       __ notl(out.AsRegister<CpuRegister>());
5246       break;
5247 
5248     case DataType::Type::kInt64:
5249       __ notq(out.AsRegister<CpuRegister>());
5250       break;
5251 
5252     default:
5253       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5254   }
5255 }
5256 
VisitBooleanNot(HBooleanNot * bool_not)5257 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5258   LocationSummary* locations =
5259       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5260   locations->SetInAt(0, Location::RequiresRegister());
5261   locations->SetOut(Location::SameAsFirstInput());
5262 }
5263 
VisitBooleanNot(HBooleanNot * bool_not)5264 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5265   LocationSummary* locations = bool_not->GetLocations();
5266   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5267             locations->Out().AsRegister<CpuRegister>().AsRegister());
5268   Location out = locations->Out();
5269   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
5270 }
5271 
VisitPhi(HPhi * instruction)5272 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
5273   LocationSummary* locations =
5274       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5275   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5276     locations->SetInAt(i, Location::Any());
5277   }
5278   locations->SetOut(Location::Any());
5279 }
5280 
VisitPhi(HPhi * instruction)5281 void InstructionCodeGeneratorX86_64::VisitPhi([[maybe_unused]] HPhi* instruction) {
5282   LOG(FATAL) << "Unimplemented";
5283 }
5284 
GenerateMemoryBarrier(MemBarrierKind kind)5285 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
5286   /*
5287    * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
5288    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
5289    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5290    */
5291   switch (kind) {
5292     case MemBarrierKind::kAnyAny: {
5293       MemoryFence();
5294       break;
5295     }
5296     case MemBarrierKind::kAnyStore:
5297     case MemBarrierKind::kLoadAny:
5298     case MemBarrierKind::kStoreStore: {
5299       // nop
5300       break;
5301     }
5302     case MemBarrierKind::kNTStoreStore:
5303       // Non-Temporal Store/Store needs an explicit fence.
5304       MemoryFence(/* non-temporal= */ true);
5305       break;
5306   }
5307 }
5308 
HandleFieldGet(HInstruction * instruction)5309 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
5310   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5311 
5312   bool object_field_get_with_read_barrier =
5313       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5314   LocationSummary* locations =
5315       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5316                                                        object_field_get_with_read_barrier
5317                                                            ? LocationSummary::kCallOnSlowPath
5318                                                            : LocationSummary::kNoCall);
5319   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5320     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5321   }
5322   // receiver_input
5323   locations->SetInAt(0, Location::RequiresRegister());
5324   if (DataType::IsFloatingPointType(instruction->GetType())) {
5325     locations->SetOut(Location::RequiresFpuRegister());
5326   } else {
5327     // The output overlaps for an object field get when read barriers are
5328     // enabled: we do not want the move to overwrite the object's location, as
5329     // we need it to emit the read barrier. For predicated instructions we can
5330     // always overlap since the output is SameAsFirst and the default value.
5331     locations->SetOut(
5332         Location::RequiresRegister(),
5333         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5334   }
5335 }
5336 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5337 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
5338                                                     const FieldInfo& field_info) {
5339   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5340 
5341   LocationSummary* locations = instruction->GetLocations();
5342   Location base_loc = locations->InAt(0);
5343   CpuRegister base = base_loc.AsRegister<CpuRegister>();
5344   Location out = locations->Out();
5345   bool is_volatile = field_info.IsVolatile();
5346   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5347   DataType::Type load_type = instruction->GetType();
5348   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5349 
5350   if (load_type == DataType::Type::kReference) {
5351     // /* HeapReference<Object> */ out = *(base + offset)
5352     if (codegen_->EmitBakerReadBarrier()) {
5353       // Note that a potential implicit null check is handled in this
5354       // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
5355       codegen_->GenerateFieldLoadWithBakerReadBarrier(
5356           instruction, out, base, offset, /* needs_null_check= */ true);
5357       if (is_volatile) {
5358         codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5359       }
5360     } else {
5361       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
5362       codegen_->MaybeRecordImplicitNullCheck(instruction);
5363       if (is_volatile) {
5364         codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5365       }
5366       // If read barriers are enabled, emit read barriers other than
5367       // Baker's using a slow path (and also unpoison the loaded
5368       // reference, if heap poisoning is enabled).
5369       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5370     }
5371   } else {
5372     codegen_->LoadFromMemoryNoReference(load_type, out, Address(base, offset));
5373     codegen_->MaybeRecordImplicitNullCheck(instruction);
5374     if (is_volatile) {
5375       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5376     }
5377   }
5378 }
5379 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5380 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
5381                                             const FieldInfo& field_info,
5382                                             WriteBarrierKind write_barrier_kind) {
5383   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5384 
5385   LocationSummary* locations =
5386       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5387   DataType::Type field_type = field_info.GetFieldType();
5388   bool is_volatile = field_info.IsVolatile();
5389   bool needs_write_barrier =
5390       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5391   bool check_gc_card =
5392       codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
5393 
5394   locations->SetInAt(0, Location::RequiresRegister());
5395   if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
5396     if (is_volatile) {
5397       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5398       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
5399     } else {
5400       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5401     }
5402   } else {
5403     if (is_volatile) {
5404       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5405       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
5406     } else {
5407       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5408     }
5409   }
5410 
5411   // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of
5412   // InstructionCodeGeneratorX86_64::HandleFieldSet, GenerateVarHandleSet due to `extra_temp_index`.
5413   if (needs_write_barrier ||
5414       check_gc_card ||
5415       (kPoisonHeapReferences && field_type == DataType::Type::kReference)) {
5416     // Temporary registers for the write barrier / reference poisoning.
5417     locations->AddRegisterTemps(2);
5418   }
5419 }
5420 
Bswap(Location value,DataType::Type type,CpuRegister * temp)5421 void InstructionCodeGeneratorX86_64::Bswap(Location value,
5422                                            DataType::Type type,
5423                                            CpuRegister* temp) {
5424   switch (type) {
5425     case DataType::Type::kInt16:
5426       // This should sign-extend, even if reimplemented with an XCHG of 8-bit registers.
5427       __ bswapl(value.AsRegister<CpuRegister>());
5428       __ sarl(value.AsRegister<CpuRegister>(), Immediate(16));
5429       break;
5430     case DataType::Type::kUint16:
5431       // TODO: Can be done with an XCHG of 8-bit registers. This is straight from Quick.
5432       __ bswapl(value.AsRegister<CpuRegister>());
5433       __ shrl(value.AsRegister<CpuRegister>(), Immediate(16));
5434       break;
5435     case DataType::Type::kInt32:
5436     case DataType::Type::kUint32:
5437       __ bswapl(value.AsRegister<CpuRegister>());
5438       break;
5439     case DataType::Type::kInt64:
5440     case DataType::Type::kUint64:
5441       __ bswapq(value.AsRegister<CpuRegister>());
5442       break;
5443     case DataType::Type::kFloat32: {
5444       DCHECK_NE(temp, nullptr);
5445       __ movd(*temp, value.AsFpuRegister<XmmRegister>());
5446       __ bswapl(*temp);
5447       __ movd(value.AsFpuRegister<XmmRegister>(), *temp);
5448       break;
5449     }
5450     case DataType::Type::kFloat64: {
5451       DCHECK_NE(temp, nullptr);
5452       __ movq(*temp, value.AsFpuRegister<XmmRegister>());
5453       __ bswapq(*temp);
5454       __ movq(value.AsFpuRegister<XmmRegister>(), *temp);
5455       break;
5456     }
5457     default:
5458       LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
5459       UNREACHABLE();
5460   }
5461 }
5462 
HandleFieldSet(HInstruction * instruction,uint32_t value_index,uint32_t extra_temp_index,DataType::Type field_type,Address field_addr,CpuRegister base,bool is_volatile,bool is_atomic,bool value_can_be_null,bool byte_swap,WriteBarrierKind write_barrier_kind)5463 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5464                                                     uint32_t value_index,
5465                                                     uint32_t extra_temp_index,
5466                                                     DataType::Type field_type,
5467                                                     Address field_addr,
5468                                                     CpuRegister base,
5469                                                     bool is_volatile,
5470                                                     bool is_atomic,
5471                                                     bool value_can_be_null,
5472                                                     bool byte_swap,
5473                                                     WriteBarrierKind write_barrier_kind) {
5474   LocationSummary* locations = instruction->GetLocations();
5475   Location value = locations->InAt(value_index);
5476 
5477   if (is_volatile) {
5478     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5479   }
5480 
5481   bool maybe_record_implicit_null_check_done = false;
5482 
5483   if (value.IsConstant()) {
5484     switch (field_type) {
5485       case DataType::Type::kBool:
5486       case DataType::Type::kUint8:
5487       case DataType::Type::kInt8:
5488         __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5489         break;
5490       case DataType::Type::kUint16:
5491       case DataType::Type::kInt16: {
5492         int16_t v = CodeGenerator::GetInt16ValueOf(value.GetConstant());
5493         if (byte_swap) {
5494           v = BSWAP(v);
5495         }
5496         __ movw(field_addr, Immediate(v));
5497         break;
5498       }
5499       case DataType::Type::kUint32:
5500       case DataType::Type::kInt32:
5501       case DataType::Type::kFloat32:
5502       case DataType::Type::kReference: {
5503         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5504         if (byte_swap) {
5505           v = BSWAP(v);
5506         }
5507         DCHECK_IMPLIES(field_type == DataType::Type::kReference, v == 0);
5508         // Note: if heap poisoning is enabled, no need to poison
5509         // (negate) `v` if it is a reference, as it would be null.
5510         __ movl(field_addr, Immediate(v));
5511         break;
5512       }
5513       case DataType::Type::kUint64:
5514       case DataType::Type::kInt64:
5515       case DataType::Type::kFloat64: {
5516         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5517         if (byte_swap) {
5518           v = BSWAP(v);
5519         }
5520         if (is_atomic) {
5521           // Move constant into a register, then atomically store the register to memory.
5522           CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5523           __ movq(temp, Immediate(v));
5524           __ movq(field_addr, temp);
5525         } else {
5526           Address field_addr2 = Address::displace(field_addr, sizeof(int32_t));
5527           codegen_->MoveInt64ToAddress(field_addr, field_addr2, v, instruction);
5528         }
5529         maybe_record_implicit_null_check_done = true;
5530         break;
5531       }
5532       case DataType::Type::kVoid:
5533         LOG(FATAL) << "Unreachable type " << field_type;
5534         UNREACHABLE();
5535     }
5536   } else {
5537     if (byte_swap) {
5538       // Swap byte order in-place in the input register (we will restore it later).
5539       CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5540       Bswap(value, field_type, &temp);
5541     }
5542 
5543     switch (field_type) {
5544       case DataType::Type::kBool:
5545       case DataType::Type::kUint8:
5546       case DataType::Type::kInt8:
5547         __ movb(field_addr, value.AsRegister<CpuRegister>());
5548         break;
5549       case DataType::Type::kUint16:
5550       case DataType::Type::kInt16:
5551         __ movw(field_addr, value.AsRegister<CpuRegister>());
5552         break;
5553       case DataType::Type::kUint32:
5554       case DataType::Type::kInt32:
5555       case DataType::Type::kReference:
5556         if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5557           CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5558           __ movl(temp, value.AsRegister<CpuRegister>());
5559           __ PoisonHeapReference(temp);
5560           __ movl(field_addr, temp);
5561         } else {
5562           __ movl(field_addr, value.AsRegister<CpuRegister>());
5563         }
5564         break;
5565       case DataType::Type::kUint64:
5566       case DataType::Type::kInt64:
5567         __ movq(field_addr, value.AsRegister<CpuRegister>());
5568         break;
5569       case DataType::Type::kFloat32:
5570         __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
5571         break;
5572       case DataType::Type::kFloat64:
5573         __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
5574         break;
5575       case DataType::Type::kVoid:
5576         LOG(FATAL) << "Unreachable type " << field_type;
5577         UNREACHABLE();
5578     }
5579 
5580     if (byte_swap) {
5581       // Restore byte order.
5582       CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5583       Bswap(value, field_type, &temp);
5584     }
5585   }
5586 
5587   if (!maybe_record_implicit_null_check_done) {
5588     codegen_->MaybeRecordImplicitNullCheck(instruction);
5589   }
5590 
5591   bool needs_write_barrier =
5592       codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5593   if (needs_write_barrier) {
5594     if (value.IsConstant()) {
5595       DCHECK(value.GetConstant()->IsNullConstant());
5596       if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
5597         DCHECK_NE(extra_temp_index, 0u);
5598         CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5599         CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5600         codegen_->MarkGCCard(temp, card, base);
5601       }
5602     } else {
5603       DCHECK_NE(extra_temp_index, 0u);
5604       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5605       CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5606       codegen_->MaybeMarkGCCard(
5607           temp,
5608           card,
5609           base,
5610           value.AsRegister<CpuRegister>(),
5611           value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
5612     }
5613   } else if (codegen_->ShouldCheckGCCard(
5614                  field_type, instruction->InputAt(value_index), write_barrier_kind)) {
5615     DCHECK_NE(extra_temp_index, 0u);
5616     DCHECK(value.IsRegister());
5617     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5618     CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5619     codegen_->CheckGCCardIsValid(temp, card, base);
5620   }
5621 
5622   if (is_volatile) {
5623     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5624   }
5625 }
5626 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)5627 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5628                                                     const FieldInfo& field_info,
5629                                                     bool value_can_be_null,
5630                                                     WriteBarrierKind write_barrier_kind) {
5631   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5632 
5633   LocationSummary* locations = instruction->GetLocations();
5634   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
5635   bool is_volatile = field_info.IsVolatile();
5636   DataType::Type field_type = field_info.GetFieldType();
5637   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5638 
5639   HandleFieldSet(instruction,
5640                  /*value_index=*/ 1,
5641                  /*extra_temp_index=*/ 1,
5642                  field_type,
5643                  Address(base, offset),
5644                  base,
5645                  is_volatile,
5646                  /*is_atomic=*/ false,
5647                  value_can_be_null,
5648                  /*byte_swap=*/ false,
5649                  write_barrier_kind);
5650 }
5651 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5652 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5653   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
5654 }
5655 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5656 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5657   HandleFieldSet(instruction,
5658                  instruction->GetFieldInfo(),
5659                  instruction->GetValueCanBeNull(),
5660                  instruction->GetWriteBarrierKind());
5661 }
5662 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5663 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5664   HandleFieldGet(instruction);
5665 }
5666 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5667 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5668   HandleFieldGet(instruction, instruction->GetFieldInfo());
5669 }
5670 
VisitStaticFieldGet(HStaticFieldGet * instruction)5671 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5672   HandleFieldGet(instruction);
5673 }
5674 
VisitStaticFieldGet(HStaticFieldGet * instruction)5675 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5676   HandleFieldGet(instruction, instruction->GetFieldInfo());
5677 }
5678 
VisitStaticFieldSet(HStaticFieldSet * instruction)5679 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5680   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
5681 }
5682 
VisitStaticFieldSet(HStaticFieldSet * instruction)5683 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5684   HandleFieldSet(instruction,
5685                  instruction->GetFieldInfo(),
5686                  instruction->GetValueCanBeNull(),
5687                  instruction->GetWriteBarrierKind());
5688 }
5689 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5690 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5691   codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
5692 }
5693 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5694 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5695   __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
5696   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction);
5697 }
5698 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5699 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
5700     HUnresolvedInstanceFieldGet* instruction) {
5701   FieldAccessCallingConventionX86_64 calling_convention;
5702   codegen_->CreateUnresolvedFieldLocationSummary(
5703       instruction, instruction->GetFieldType(), calling_convention);
5704 }
5705 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5706 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
5707     HUnresolvedInstanceFieldGet* instruction) {
5708   FieldAccessCallingConventionX86_64 calling_convention;
5709   codegen_->GenerateUnresolvedFieldAccess(instruction,
5710                                           instruction->GetFieldType(),
5711                                           instruction->GetFieldIndex(),
5712                                           calling_convention);
5713 }
5714 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5715 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5716     HUnresolvedInstanceFieldSet* instruction) {
5717   FieldAccessCallingConventionX86_64 calling_convention;
5718   codegen_->CreateUnresolvedFieldLocationSummary(
5719       instruction, instruction->GetFieldType(), calling_convention);
5720 }
5721 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5722 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5723     HUnresolvedInstanceFieldSet* instruction) {
5724   FieldAccessCallingConventionX86_64 calling_convention;
5725   codegen_->GenerateUnresolvedFieldAccess(instruction,
5726                                           instruction->GetFieldType(),
5727                                           instruction->GetFieldIndex(),
5728                                           calling_convention);
5729 }
5730 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5731 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5732     HUnresolvedStaticFieldGet* instruction) {
5733   FieldAccessCallingConventionX86_64 calling_convention;
5734   codegen_->CreateUnresolvedFieldLocationSummary(
5735       instruction, instruction->GetFieldType(), calling_convention);
5736 }
5737 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5738 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5739     HUnresolvedStaticFieldGet* instruction) {
5740   FieldAccessCallingConventionX86_64 calling_convention;
5741   codegen_->GenerateUnresolvedFieldAccess(instruction,
5742                                           instruction->GetFieldType(),
5743                                           instruction->GetFieldIndex(),
5744                                           calling_convention);
5745 }
5746 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5747 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5748     HUnresolvedStaticFieldSet* instruction) {
5749   FieldAccessCallingConventionX86_64 calling_convention;
5750   codegen_->CreateUnresolvedFieldLocationSummary(
5751       instruction, instruction->GetFieldType(), calling_convention);
5752 }
5753 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5754 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5755     HUnresolvedStaticFieldSet* instruction) {
5756   FieldAccessCallingConventionX86_64 calling_convention;
5757   codegen_->GenerateUnresolvedFieldAccess(instruction,
5758                                           instruction->GetFieldType(),
5759                                           instruction->GetFieldIndex(),
5760                                           calling_convention);
5761 }
5762 
VisitNullCheck(HNullCheck * instruction)5763 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5764   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5765   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5766       ? Location::RequiresRegister()
5767       : Location::Any();
5768   locations->SetInAt(0, loc);
5769 }
5770 
GenerateImplicitNullCheck(HNullCheck * instruction)5771 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5772   if (CanMoveNullCheckToUser(instruction)) {
5773     return;
5774   }
5775   LocationSummary* locations = instruction->GetLocations();
5776   Location obj = locations->InAt(0);
5777 
5778   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5779   RecordPcInfo(instruction);
5780 }
5781 
GenerateExplicitNullCheck(HNullCheck * instruction)5782 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5783   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5784   AddSlowPath(slow_path);
5785 
5786   LocationSummary* locations = instruction->GetLocations();
5787   Location obj = locations->InAt(0);
5788 
5789   if (obj.IsRegister()) {
5790     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5791   } else if (obj.IsStackSlot()) {
5792     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5793   } else {
5794     DCHECK(obj.IsConstant()) << obj;
5795     DCHECK(obj.GetConstant()->IsNullConstant());
5796     __ jmp(slow_path->GetEntryLabel());
5797     return;
5798   }
5799   __ j(kEqual, slow_path->GetEntryLabel());
5800 }
5801 
VisitNullCheck(HNullCheck * instruction)5802 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5803   codegen_->GenerateNullCheck(instruction);
5804 }
5805 
VisitArrayGet(HArrayGet * instruction)5806 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5807   bool object_array_get_with_read_barrier =
5808       (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5809   LocationSummary* locations =
5810       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5811                                                        object_array_get_with_read_barrier
5812                                                            ? LocationSummary::kCallOnSlowPath
5813                                                            : LocationSummary::kNoCall);
5814   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5815     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5816   }
5817   locations->SetInAt(0, Location::RequiresRegister());
5818   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5819   if (DataType::IsFloatingPointType(instruction->GetType())) {
5820     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5821   } else {
5822     // The output overlaps for an object array get when read barriers
5823     // are enabled: we do not want the move to overwrite the array's
5824     // location, as we need it to emit the read barrier.
5825     locations->SetOut(
5826         Location::RequiresRegister(),
5827         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5828   }
5829 }
5830 
VisitArrayGet(HArrayGet * instruction)5831 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5832   LocationSummary* locations = instruction->GetLocations();
5833   Location obj_loc = locations->InAt(0);
5834   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5835   Location index = locations->InAt(1);
5836   Location out_loc = locations->Out();
5837   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5838 
5839   DataType::Type type = instruction->GetType();
5840   if (type == DataType::Type::kReference) {
5841     static_assert(
5842         sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5843         "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5844     // /* HeapReference<Object> */ out =
5845     //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
5846     if (codegen_->EmitBakerReadBarrier()) {
5847       // Note that a potential implicit null check is handled in this
5848       // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5849       codegen_->GenerateArrayLoadWithBakerReadBarrier(
5850           instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5851     } else {
5852       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5853       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5854       codegen_->MaybeRecordImplicitNullCheck(instruction);
5855       // If read barriers are enabled, emit read barriers other than
5856       // Baker's using a slow path (and also unpoison the loaded
5857       // reference, if heap poisoning is enabled).
5858       if (index.IsConstant()) {
5859         uint32_t offset =
5860             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5861         codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5862       } else {
5863         codegen_->MaybeGenerateReadBarrierSlow(
5864             instruction, out_loc, out_loc, obj_loc, data_offset, index);
5865       }
5866     }
5867   } else {
5868     if (type == DataType::Type::kUint16
5869         && mirror::kUseStringCompression
5870         && instruction->IsStringCharAt()) {
5871       // Branch cases into compressed and uncompressed for each index's type.
5872       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5873       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5874       NearLabel done, not_compressed;
5875       __ testb(Address(obj, count_offset), Immediate(1));
5876       codegen_->MaybeRecordImplicitNullCheck(instruction);
5877       static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5878                     "Expecting 0=compressed, 1=uncompressed");
5879       __ j(kNotZero, &not_compressed);
5880       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5881       __ jmp(&done);
5882       __ Bind(&not_compressed);
5883       __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5884       __ Bind(&done);
5885     } else {
5886       ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
5887       Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, scale, data_offset);
5888       codegen_->LoadFromMemoryNoReference(type, out_loc, src);
5889     }
5890     codegen_->MaybeRecordImplicitNullCheck(instruction);
5891   }
5892 }
5893 
VisitArraySet(HArraySet * instruction)5894 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5895   DataType::Type value_type = instruction->GetComponentType();
5896 
5897   WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
5898   bool needs_write_barrier =
5899       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
5900   bool check_gc_card =
5901       codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
5902   bool needs_type_check = instruction->NeedsTypeCheck();
5903 
5904   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5905       instruction,
5906       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5907 
5908   locations->SetInAt(0, Location::RequiresRegister());
5909   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5910   if (DataType::IsFloatingPointType(value_type)) {
5911     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5912   } else {
5913     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5914   }
5915 
5916   if (needs_write_barrier || check_gc_card) {
5917     // Used by reference poisoning, type checking, emitting write barrier, or checking write
5918     // barrier.
5919     locations->AddTemp(Location::RequiresRegister());
5920     // Only used when emitting a write barrier, or when checking for the card table.
5921     locations->AddTemp(Location::RequiresRegister());
5922   } else if ((kPoisonHeapReferences && value_type == DataType::Type::kReference) ||
5923              instruction->NeedsTypeCheck()) {
5924     // Used for poisoning or type checking.
5925     locations->AddTemp(Location::RequiresRegister());
5926   }
5927 }
5928 
VisitArraySet(HArraySet * instruction)5929 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5930   LocationSummary* locations = instruction->GetLocations();
5931   Location array_loc = locations->InAt(0);
5932   CpuRegister array = array_loc.AsRegister<CpuRegister>();
5933   Location index = locations->InAt(1);
5934   Location value = locations->InAt(2);
5935   DataType::Type value_type = instruction->GetComponentType();
5936   bool needs_type_check = instruction->NeedsTypeCheck();
5937   const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
5938   bool needs_write_barrier =
5939       codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
5940 
5941   switch (value_type) {
5942     case DataType::Type::kBool:
5943     case DataType::Type::kUint8:
5944     case DataType::Type::kInt8: {
5945       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5946       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5947       if (value.IsRegister()) {
5948         __ movb(address, value.AsRegister<CpuRegister>());
5949       } else {
5950         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5951       }
5952       codegen_->MaybeRecordImplicitNullCheck(instruction);
5953       break;
5954     }
5955 
5956     case DataType::Type::kUint16:
5957     case DataType::Type::kInt16: {
5958       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5959       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5960       if (value.IsRegister()) {
5961         __ movw(address, value.AsRegister<CpuRegister>());
5962       } else {
5963         DCHECK(value.IsConstant()) << value;
5964         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5965       }
5966       codegen_->MaybeRecordImplicitNullCheck(instruction);
5967       break;
5968     }
5969 
5970     case DataType::Type::kReference: {
5971       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5972       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5973 
5974       if (!value.IsRegister()) {
5975         // Just setting null.
5976         DCHECK(instruction->InputAt(2)->IsNullConstant());
5977         DCHECK(value.IsConstant()) << value;
5978         __ movl(address, Immediate(0));
5979         codegen_->MaybeRecordImplicitNullCheck(instruction);
5980         if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
5981           // We need to set a write barrier here even though we are writing null, since this write
5982           // barrier is being relied on.
5983           DCHECK(needs_write_barrier);
5984           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5985           CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5986           codegen_->MarkGCCard(temp, card, array);
5987         }
5988         DCHECK(!needs_type_check);
5989         break;
5990       }
5991 
5992       CpuRegister register_value = value.AsRegister<CpuRegister>();
5993       const bool can_value_be_null = instruction->GetValueCanBeNull();
5994       // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
5995       // value is null (without an extra CompareAndBranchIfZero since we already checked if the
5996       // value is null for the type check).
5997       const bool skip_marking_gc_card =
5998           can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
5999       NearLabel do_store;
6000       NearLabel skip_writing_card;
6001       if (can_value_be_null) {
6002         __ testl(register_value, register_value);
6003         if (skip_marking_gc_card) {
6004           __ j(kEqual, &skip_writing_card);
6005         } else {
6006           __ j(kEqual, &do_store);
6007         }
6008       }
6009 
6010       SlowPathCode* slow_path = nullptr;
6011       if (needs_type_check) {
6012         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
6013         codegen_->AddSlowPath(slow_path);
6014 
6015         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6016         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6017         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6018 
6019         // Note that when Baker read barriers are enabled, the type
6020         // checks are performed without read barriers.  This is fine,
6021         // even in the case where a class object is in the from-space
6022         // after the flip, as a comparison involving such a type would
6023         // not produce a false positive; it may of course produce a
6024         // false negative, in which case we would take the ArraySet
6025         // slow path.
6026 
6027         CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6028         // /* HeapReference<Class> */ temp = array->klass_
6029         __ movl(temp, Address(array, class_offset));
6030         codegen_->MaybeRecordImplicitNullCheck(instruction);
6031         __ MaybeUnpoisonHeapReference(temp);
6032 
6033         // /* HeapReference<Class> */ temp = temp->component_type_
6034         __ movl(temp, Address(temp, component_offset));
6035         // If heap poisoning is enabled, no need to unpoison `temp`
6036         // nor the object reference in `register_value->klass`, as
6037         // we are comparing two poisoned references.
6038         __ cmpl(temp, Address(register_value, class_offset));
6039 
6040         if (instruction->StaticTypeOfArrayIsObjectArray()) {
6041           NearLabel do_put;
6042           __ j(kEqual, &do_put);
6043           // If heap poisoning is enabled, the `temp` reference has
6044           // not been unpoisoned yet; unpoison it now.
6045           __ MaybeUnpoisonHeapReference(temp);
6046 
6047           // If heap poisoning is enabled, no need to unpoison the
6048           // heap reference loaded below, as it is only used for a
6049           // comparison with null.
6050           __ cmpl(Address(temp, super_offset), Immediate(0));
6051           __ j(kNotEqual, slow_path->GetEntryLabel());
6052           __ Bind(&do_put);
6053         } else {
6054           __ j(kNotEqual, slow_path->GetEntryLabel());
6055         }
6056       }
6057 
6058       if (can_value_be_null && !skip_marking_gc_card) {
6059         DCHECK(do_store.IsLinked());
6060         __ Bind(&do_store);
6061       }
6062 
6063       if (needs_write_barrier) {
6064         CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6065         CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6066         codegen_->MarkGCCard(temp, card, array);
6067       } else if (codegen_->ShouldCheckGCCard(
6068                      value_type, instruction->GetValue(), write_barrier_kind)) {
6069         CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6070         CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6071         codegen_->CheckGCCardIsValid(temp, card, array);
6072       }
6073 
6074       if (skip_marking_gc_card) {
6075         // Note that we don't check that the GC card is valid as it can be correctly clean.
6076         DCHECK(skip_writing_card.IsLinked());
6077         __ Bind(&skip_writing_card);
6078       }
6079 
6080       Location source = value;
6081       if (kPoisonHeapReferences) {
6082         Location temp_loc = locations->GetTemp(0);
6083         CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6084         __ movl(temp, register_value);
6085         __ PoisonHeapReference(temp);
6086         source = temp_loc;
6087       }
6088 
6089       __ movl(address, source.AsRegister<CpuRegister>());
6090 
6091       if (can_value_be_null || !needs_type_check) {
6092         codegen_->MaybeRecordImplicitNullCheck(instruction);
6093       }
6094 
6095       if (slow_path != nullptr) {
6096         __ Bind(slow_path->GetExitLabel());
6097       }
6098 
6099       break;
6100     }
6101 
6102     case DataType::Type::kInt32: {
6103       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6104       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
6105       if (value.IsRegister()) {
6106         __ movl(address, value.AsRegister<CpuRegister>());
6107       } else {
6108         DCHECK(value.IsConstant()) << value;
6109         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6110         __ movl(address, Immediate(v));
6111       }
6112       codegen_->MaybeRecordImplicitNullCheck(instruction);
6113       break;
6114     }
6115 
6116     case DataType::Type::kInt64: {
6117       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6118       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
6119       if (value.IsRegister()) {
6120         __ movq(address, value.AsRegister<CpuRegister>());
6121         codegen_->MaybeRecordImplicitNullCheck(instruction);
6122       } else {
6123         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
6124         Address address_high =
6125             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
6126         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
6127       }
6128       break;
6129     }
6130 
6131     case DataType::Type::kFloat32: {
6132       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6133       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
6134       if (value.IsFpuRegister()) {
6135         __ movss(address, value.AsFpuRegister<XmmRegister>());
6136       } else {
6137         DCHECK(value.IsConstant());
6138         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6139         __ movl(address, Immediate(v));
6140       }
6141       codegen_->MaybeRecordImplicitNullCheck(instruction);
6142       break;
6143     }
6144 
6145     case DataType::Type::kFloat64: {
6146       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6147       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
6148       if (value.IsFpuRegister()) {
6149         __ movsd(address, value.AsFpuRegister<XmmRegister>());
6150         codegen_->MaybeRecordImplicitNullCheck(instruction);
6151       } else {
6152         int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6153         Address address_high =
6154             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
6155         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
6156       }
6157       break;
6158     }
6159 
6160     case DataType::Type::kUint32:
6161     case DataType::Type::kUint64:
6162     case DataType::Type::kVoid:
6163       LOG(FATAL) << "Unreachable type " << instruction->GetType();
6164       UNREACHABLE();
6165   }
6166 }
6167 
VisitArrayLength(HArrayLength * instruction)6168 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
6169   LocationSummary* locations =
6170       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6171   locations->SetInAt(0, Location::RequiresRegister());
6172   if (!instruction->IsEmittedAtUseSite()) {
6173     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6174   }
6175 }
6176 
VisitArrayLength(HArrayLength * instruction)6177 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
6178   if (instruction->IsEmittedAtUseSite()) {
6179     return;
6180   }
6181 
6182   LocationSummary* locations = instruction->GetLocations();
6183   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6184   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
6185   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
6186   __ movl(out, Address(obj, offset));
6187   codegen_->MaybeRecordImplicitNullCheck(instruction);
6188   // Mask out most significant bit in case the array is String's array of char.
6189   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6190     __ shrl(out, Immediate(1));
6191   }
6192 }
6193 
VisitBoundsCheck(HBoundsCheck * instruction)6194 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
6195   RegisterSet caller_saves = RegisterSet::Empty();
6196   InvokeRuntimeCallingConvention calling_convention;
6197   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6198   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6199   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6200   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6201   HInstruction* length = instruction->InputAt(1);
6202   if (!length->IsEmittedAtUseSite()) {
6203     locations->SetInAt(1, Location::RegisterOrConstant(length));
6204   }
6205 }
6206 
VisitBoundsCheck(HBoundsCheck * instruction)6207 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
6208   LocationSummary* locations = instruction->GetLocations();
6209   Location index_loc = locations->InAt(0);
6210   Location length_loc = locations->InAt(1);
6211   SlowPathCode* slow_path =
6212       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
6213 
6214   if (length_loc.IsConstant()) {
6215     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6216     if (index_loc.IsConstant()) {
6217       // BCE will remove the bounds check if we are guarenteed to pass.
6218       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6219       if (index < 0 || index >= length) {
6220         codegen_->AddSlowPath(slow_path);
6221         __ jmp(slow_path->GetEntryLabel());
6222       } else {
6223         // Some optimization after BCE may have generated this, and we should not
6224         // generate a bounds check if it is a valid range.
6225       }
6226       return;
6227     }
6228 
6229     // We have to reverse the jump condition because the length is the constant.
6230     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
6231     __ cmpl(index_reg, Immediate(length));
6232     codegen_->AddSlowPath(slow_path);
6233     __ j(kAboveEqual, slow_path->GetEntryLabel());
6234   } else {
6235     HInstruction* array_length = instruction->InputAt(1);
6236     if (array_length->IsEmittedAtUseSite()) {
6237       // Address the length field in the array.
6238       DCHECK(array_length->IsArrayLength());
6239       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6240       Location array_loc = array_length->GetLocations()->InAt(0);
6241       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
6242       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6243         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6244         // the string compression flag) with the in-memory length and avoid the temporary.
6245         CpuRegister length_reg = CpuRegister(TMP);
6246         __ movl(length_reg, array_len);
6247         codegen_->MaybeRecordImplicitNullCheck(array_length);
6248         __ shrl(length_reg, Immediate(1));
6249         codegen_->GenerateIntCompare(length_reg, index_loc);
6250       } else {
6251         // Checking the bound for general case:
6252         // Array of char or String's array when the compression feature off.
6253         if (index_loc.IsConstant()) {
6254           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6255           __ cmpl(array_len, Immediate(value));
6256         } else {
6257           __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
6258         }
6259         codegen_->MaybeRecordImplicitNullCheck(array_length);
6260       }
6261     } else {
6262       codegen_->GenerateIntCompare(length_loc, index_loc);
6263     }
6264     codegen_->AddSlowPath(slow_path);
6265     __ j(kBelowEqual, slow_path->GetEntryLabel());
6266   }
6267 }
6268 
MaybeMarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool emit_null_check)6269 void CodeGeneratorX86_64::MaybeMarkGCCard(CpuRegister temp,
6270                                           CpuRegister card,
6271                                           CpuRegister object,
6272                                           CpuRegister value,
6273                                           bool emit_null_check) {
6274   NearLabel is_null;
6275   if (emit_null_check) {
6276     __ testl(value, value);
6277     __ j(kEqual, &is_null);
6278   }
6279   MarkGCCard(temp, card, object);
6280   if (emit_null_check) {
6281     __ Bind(&is_null);
6282   }
6283 }
6284 
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object)6285 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object) {
6286   // Load the address of the card table into `card`.
6287   __ gs()->movq(card,
6288                 Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6289                                   /* no_rip= */ true));
6290   // Calculate the offset (in the card table) of the card corresponding to `object`.
6291   __ movq(temp, object);
6292   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6293   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
6294   // `object`'s card.
6295   //
6296   // Register `card` contains the address of the card table. Note that the card
6297   // table's base is biased during its creation so that it always starts at an
6298   // address whose least-significant byte is equal to `kCardDirty` (see
6299   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
6300   // below writes the `kCardDirty` (byte) value into the `object`'s card
6301   // (located at `card + object >> kCardShift`).
6302   //
6303   // This dual use of the value in register `card` (1. to calculate the location
6304   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
6305   // (no need to explicitly load `kCardDirty` as an immediate value).
6306   __ movb(Address(temp, card, TIMES_1, 0), card);
6307 }
6308 
CheckGCCardIsValid(CpuRegister temp,CpuRegister card,CpuRegister object)6309 void CodeGeneratorX86_64::CheckGCCardIsValid(CpuRegister temp,
6310                                              CpuRegister card,
6311                                              CpuRegister object) {
6312   NearLabel done;
6313   // Load the address of the card table into `card`.
6314   __ gs()->movq(card,
6315                 Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6316                                   /* no_rip= */ true));
6317   // Calculate the offset (in the card table) of the card corresponding to `object`.
6318   __ movq(temp, object);
6319   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6320   // assert (!clean || !self->is_gc_marking)
6321   __ cmpb(Address(temp, card, TIMES_1, 0), Immediate(gc::accounting::CardTable::kCardClean));
6322   __ j(kNotEqual, &done);
6323   __ gs()->cmpl(
6324       Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true),
6325       Immediate(0));
6326   __ j(kEqual, &done);
6327   __ int3();
6328   __ Bind(&done);
6329 }
6330 
VisitParallelMove(HParallelMove * instruction)6331 void LocationsBuilderX86_64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6332   LOG(FATAL) << "Unimplemented";
6333 }
6334 
VisitParallelMove(HParallelMove * instruction)6335 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
6336   if (instruction->GetNext()->IsSuspendCheck() &&
6337       instruction->GetBlock()->GetLoopInformation() != nullptr) {
6338     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6339     // The back edge will generate the suspend check.
6340     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6341   }
6342 
6343   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6344 }
6345 
VisitSuspendCheck(HSuspendCheck * instruction)6346 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6347   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6348       instruction, LocationSummary::kCallOnSlowPath);
6349   // In suspend check slow path, usually there are no caller-save registers at all.
6350   // If SIMD instructions are present, however, we force spilling all live SIMD
6351   // registers in full width (since the runtime only saves/restores lower part).
6352   locations->SetCustomSlowPathCallerSaves(
6353       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6354 }
6355 
VisitSuspendCheck(HSuspendCheck * instruction)6356 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6357   HBasicBlock* block = instruction->GetBlock();
6358   if (block->GetLoopInformation() != nullptr) {
6359     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6360     // The back edge will generate the suspend check.
6361     return;
6362   }
6363   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6364     // The goto will generate the suspend check.
6365     return;
6366   }
6367   GenerateSuspendCheck(instruction, nullptr);
6368 }
6369 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6370 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
6371                                                           HBasicBlock* successor) {
6372   SuspendCheckSlowPathX86_64* slow_path =
6373       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
6374   if (slow_path == nullptr) {
6375     slow_path =
6376         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
6377     instruction->SetSlowPath(slow_path);
6378     codegen_->AddSlowPath(slow_path);
6379     if (successor != nullptr) {
6380       DCHECK(successor->IsLoopHeader());
6381     }
6382   } else {
6383     DCHECK_EQ(slow_path->GetSuccessor(), successor);
6384   }
6385 
6386   __ gs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
6387                                    /* no_rip= */ true),
6388                  Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6389   if (successor == nullptr) {
6390     __ j(kNotZero, slow_path->GetEntryLabel());
6391     __ Bind(slow_path->GetReturnLabel());
6392   } else {
6393     __ j(kZero, codegen_->GetLabelOf(successor));
6394     __ jmp(slow_path->GetEntryLabel());
6395   }
6396 }
6397 
GetAssembler() const6398 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
6399   return codegen_->GetAssembler();
6400 }
6401 
EmitMove(size_t index)6402 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
6403   MoveOperands* move = moves_[index];
6404   Location source = move->GetSource();
6405   Location destination = move->GetDestination();
6406 
6407   if (source.IsRegister()) {
6408     if (destination.IsRegister()) {
6409       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
6410     } else if (destination.IsStackSlot()) {
6411       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
6412               source.AsRegister<CpuRegister>());
6413     } else {
6414       DCHECK(destination.IsDoubleStackSlot());
6415       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
6416               source.AsRegister<CpuRegister>());
6417     }
6418   } else if (source.IsStackSlot()) {
6419     if (destination.IsRegister()) {
6420       __ movl(destination.AsRegister<CpuRegister>(),
6421               Address(CpuRegister(RSP), source.GetStackIndex()));
6422     } else if (destination.IsFpuRegister()) {
6423       __ movss(destination.AsFpuRegister<XmmRegister>(),
6424               Address(CpuRegister(RSP), source.GetStackIndex()));
6425     } else {
6426       DCHECK(destination.IsStackSlot());
6427       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6428       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6429     }
6430   } else if (source.IsDoubleStackSlot()) {
6431     if (destination.IsRegister()) {
6432       __ movq(destination.AsRegister<CpuRegister>(),
6433               Address(CpuRegister(RSP), source.GetStackIndex()));
6434     } else if (destination.IsFpuRegister()) {
6435       __ movsd(destination.AsFpuRegister<XmmRegister>(),
6436                Address(CpuRegister(RSP), source.GetStackIndex()));
6437     } else {
6438       DCHECK(destination.IsDoubleStackSlot()) << destination;
6439       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6440       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6441     }
6442   } else if (source.IsSIMDStackSlot()) {
6443     if (destination.IsFpuRegister()) {
6444       __ movups(destination.AsFpuRegister<XmmRegister>(),
6445                 Address(CpuRegister(RSP), source.GetStackIndex()));
6446     } else {
6447       DCHECK(destination.IsSIMDStackSlot());
6448       size_t high = kX86_64WordSize;
6449       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6450       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6451       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
6452       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
6453     }
6454   } else if (source.IsConstant()) {
6455     HConstant* constant = source.GetConstant();
6456     if (constant->IsIntConstant() || constant->IsNullConstant()) {
6457       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6458       if (destination.IsRegister()) {
6459         if (value == 0) {
6460           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6461         } else {
6462           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
6463         }
6464       } else {
6465         DCHECK(destination.IsStackSlot()) << destination;
6466         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
6467       }
6468     } else if (constant->IsLongConstant()) {
6469       int64_t value = constant->AsLongConstant()->GetValue();
6470       if (destination.IsRegister()) {
6471         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
6472       } else {
6473         DCHECK(destination.IsDoubleStackSlot()) << destination;
6474         codegen_->Store64BitValueToStack(destination, value);
6475       }
6476     } else if (constant->IsFloatConstant()) {
6477       float fp_value = constant->AsFloatConstant()->GetValue();
6478       if (destination.IsFpuRegister()) {
6479         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6480         codegen_->Load32BitValue(dest, fp_value);
6481       } else {
6482         DCHECK(destination.IsStackSlot()) << destination;
6483         Immediate imm(bit_cast<int32_t, float>(fp_value));
6484         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
6485       }
6486     } else {
6487       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
6488       double fp_value =  constant->AsDoubleConstant()->GetValue();
6489       int64_t value = bit_cast<int64_t, double>(fp_value);
6490       if (destination.IsFpuRegister()) {
6491         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6492         codegen_->Load64BitValue(dest, fp_value);
6493       } else {
6494         DCHECK(destination.IsDoubleStackSlot()) << destination;
6495         codegen_->Store64BitValueToStack(destination, value);
6496       }
6497     }
6498   } else if (source.IsFpuRegister()) {
6499     if (destination.IsFpuRegister()) {
6500       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6501     } else if (destination.IsStackSlot()) {
6502       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
6503                source.AsFpuRegister<XmmRegister>());
6504     } else if (destination.IsDoubleStackSlot()) {
6505       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
6506                source.AsFpuRegister<XmmRegister>());
6507     } else {
6508        DCHECK(destination.IsSIMDStackSlot());
6509       __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
6510                 source.AsFpuRegister<XmmRegister>());
6511     }
6512   }
6513 }
6514 
Exchange32(CpuRegister reg,int mem)6515 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
6516   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6517   __ movl(Address(CpuRegister(RSP), mem), reg);
6518   __ movl(reg, CpuRegister(TMP));
6519 }
6520 
Exchange64(CpuRegister reg1,CpuRegister reg2)6521 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
6522   __ movq(CpuRegister(TMP), reg1);
6523   __ movq(reg1, reg2);
6524   __ movq(reg2, CpuRegister(TMP));
6525 }
6526 
Exchange64(CpuRegister reg,int mem)6527 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
6528   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6529   __ movq(Address(CpuRegister(RSP), mem), reg);
6530   __ movq(reg, CpuRegister(TMP));
6531 }
6532 
Exchange32(XmmRegister reg,int mem)6533 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
6534   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6535   __ movss(Address(CpuRegister(RSP), mem), reg);
6536   __ movd(reg, CpuRegister(TMP));
6537 }
6538 
Exchange64(XmmRegister reg,int mem)6539 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
6540   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6541   __ movsd(Address(CpuRegister(RSP), mem), reg);
6542   __ movq(reg, CpuRegister(TMP));
6543 }
6544 
Exchange128(XmmRegister reg,int mem)6545 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
6546   size_t extra_slot = 2 * kX86_64WordSize;
6547   __ subq(CpuRegister(RSP), Immediate(extra_slot));
6548   __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
6549   ExchangeMemory64(0, mem + extra_slot, 2);
6550   __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
6551   __ addq(CpuRegister(RSP), Immediate(extra_slot));
6552 }
6553 
ExchangeMemory32(int mem1,int mem2)6554 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
6555   ScratchRegisterScope ensure_scratch(
6556       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6557 
6558   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6559   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
6560   __ movl(CpuRegister(ensure_scratch.GetRegister()),
6561           Address(CpuRegister(RSP), mem2 + stack_offset));
6562   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
6563   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
6564           CpuRegister(ensure_scratch.GetRegister()));
6565 }
6566 
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)6567 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
6568   ScratchRegisterScope ensure_scratch(
6569       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6570 
6571   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6572 
6573   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6574   for (int i = 0; i < num_of_qwords; i++) {
6575     __ movq(CpuRegister(TMP),
6576             Address(CpuRegister(RSP), mem1 + stack_offset));
6577     __ movq(CpuRegister(ensure_scratch.GetRegister()),
6578             Address(CpuRegister(RSP), mem2 + stack_offset));
6579     __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
6580             CpuRegister(TMP));
6581     __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
6582             CpuRegister(ensure_scratch.GetRegister()));
6583     stack_offset += kX86_64WordSize;
6584   }
6585 }
6586 
EmitSwap(size_t index)6587 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
6588   MoveOperands* move = moves_[index];
6589   Location source = move->GetSource();
6590   Location destination = move->GetDestination();
6591 
6592   if (source.IsRegister() && destination.IsRegister()) {
6593     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6594   } else if (source.IsRegister() && destination.IsStackSlot()) {
6595     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6596   } else if (source.IsStackSlot() && destination.IsRegister()) {
6597     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6598   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6599     ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
6600   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
6601     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6602   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
6603     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6604   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
6605     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
6606   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6607     __ movq(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
6608     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6609     __ movq(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
6610   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6611     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6612   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
6613     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6614   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6615     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6616   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
6617     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6618   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6619     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
6620   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6621     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6622   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6623     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6624   } else {
6625     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
6626   }
6627 }
6628 
6629 
SpillScratch(int reg)6630 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
6631   __ pushq(CpuRegister(reg));
6632 }
6633 
6634 
RestoreScratch(int reg)6635 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
6636   __ popq(CpuRegister(reg));
6637 }
6638 
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)6639 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
6640     SlowPathCode* slow_path, CpuRegister class_reg) {
6641   __ cmpb(Address(class_reg, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
6642   __ j(kBelow, slow_path->GetEntryLabel());
6643   __ Bind(slow_path->GetExitLabel());
6644 }
6645 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)6646 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6647                                                                        CpuRegister temp) {
6648   uint32_t path_to_root = check->GetBitstringPathToRoot();
6649   uint32_t mask = check->GetBitstringMask();
6650   DCHECK(IsPowerOfTwo(mask + 1));
6651   size_t mask_bits = WhichPowerOf2(mask + 1);
6652 
6653   if (mask_bits == 16u) {
6654     // Compare the bitstring in memory.
6655     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6656   } else {
6657     // /* uint32_t */ temp = temp->status_
6658     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6659     // Compare the bitstring bits using SUB.
6660     __ subl(temp, Immediate(path_to_root));
6661     // Shift out bits that do not contribute to the comparison.
6662     __ shll(temp, Immediate(32u - mask_bits));
6663   }
6664 }
6665 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6666 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
6667     HLoadClass::LoadKind desired_class_load_kind) {
6668   switch (desired_class_load_kind) {
6669     case HLoadClass::LoadKind::kInvalid:
6670       LOG(FATAL) << "UNREACHABLE";
6671       UNREACHABLE();
6672     case HLoadClass::LoadKind::kReferrersClass:
6673       break;
6674     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6675     case HLoadClass::LoadKind::kBootImageRelRo:
6676     case HLoadClass::LoadKind::kAppImageRelRo:
6677     case HLoadClass::LoadKind::kBssEntry:
6678     case HLoadClass::LoadKind::kBssEntryPublic:
6679     case HLoadClass::LoadKind::kBssEntryPackage:
6680       DCHECK(!GetCompilerOptions().IsJitCompiler());
6681       break;
6682     case HLoadClass::LoadKind::kJitBootImageAddress:
6683     case HLoadClass::LoadKind::kJitTableAddress:
6684       DCHECK(GetCompilerOptions().IsJitCompiler());
6685       break;
6686     case HLoadClass::LoadKind::kRuntimeCall:
6687       break;
6688   }
6689   return desired_class_load_kind;
6690 }
6691 
VisitLoadClass(HLoadClass * cls)6692 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
6693   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6694   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6695     // Custom calling convention: RAX serves as both input and output.
6696     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6697         cls,
6698         Location::RegisterLocation(RAX),
6699         Location::RegisterLocation(RAX));
6700     return;
6701   }
6702   DCHECK_EQ(cls->NeedsAccessCheck(),
6703             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6704                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6705 
6706   const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
6707   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6708       ? LocationSummary::kCallOnSlowPath
6709       : LocationSummary::kNoCall;
6710   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6711   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6712     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6713   }
6714 
6715   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6716     locations->SetInAt(0, Location::RequiresRegister());
6717   }
6718   locations->SetOut(Location::RequiresRegister());
6719   if (load_kind == HLoadClass::LoadKind::kBssEntry ||
6720       load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6721       load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
6722     if (codegen_->EmitNonBakerReadBarrier()) {
6723       // For non-Baker read barrier we have a temp-clobbering call.
6724     } else {
6725       // Rely on the type resolution and/or initialization to save everything.
6726       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6727     }
6728   }
6729 }
6730 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6731 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6732                                                  dex::TypeIndex type_index,
6733                                                  Handle<mirror::Class> handle) {
6734   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6735   // Add a patch entry and return the label.
6736   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6737   PatchInfo<Label>* info = &jit_class_patches_.back();
6738   return &info->label;
6739 }
6740 
6741 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6742 // move.
VisitLoadClass(HLoadClass * cls)6743 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6744   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6745   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6746     codegen_->GenerateLoadClassRuntimeCall(cls);
6747     return;
6748   }
6749   DCHECK_EQ(cls->NeedsAccessCheck(),
6750             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6751                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6752 
6753   LocationSummary* locations = cls->GetLocations();
6754   Location out_loc = locations->Out();
6755   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6756 
6757   const ReadBarrierOption read_barrier_option =
6758       cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
6759   bool generate_null_check = false;
6760   switch (load_kind) {
6761     case HLoadClass::LoadKind::kReferrersClass: {
6762       DCHECK(!cls->CanCallRuntime());
6763       DCHECK(!cls->MustGenerateClinitCheck());
6764       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6765       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6766       GenerateGcRootFieldLoad(
6767           cls,
6768           out_loc,
6769           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6770           /* fixup_label= */ nullptr,
6771           read_barrier_option);
6772       break;
6773     }
6774     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6775       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6776              codegen_->GetCompilerOptions().IsBootImageExtension());
6777       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6778       __ leal(out,
6779               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6780       codegen_->RecordBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6781       break;
6782     case HLoadClass::LoadKind::kBootImageRelRo: {
6783       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6784       __ movl(out,
6785               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6786       codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(cls));
6787       break;
6788     }
6789     case HLoadClass::LoadKind::kAppImageRelRo: {
6790       DCHECK(codegen_->GetCompilerOptions().IsAppImage());
6791       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6792       __ movl(out,
6793               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6794       codegen_->RecordAppImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6795       break;
6796     }
6797     case HLoadClass::LoadKind::kBssEntry:
6798     case HLoadClass::LoadKind::kBssEntryPublic:
6799     case HLoadClass::LoadKind::kBssEntryPackage: {
6800       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6801                                           /* no_rip= */ false);
6802       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6803       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6804       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6805       // No need for memory fence, thanks to the x86-64 memory model.
6806       generate_null_check = true;
6807       break;
6808     }
6809     case HLoadClass::LoadKind::kJitBootImageAddress: {
6810       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6811       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6812       DCHECK_NE(address, 0u);
6813       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6814       break;
6815     }
6816     case HLoadClass::LoadKind::kJitTableAddress: {
6817       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6818                                           /* no_rip= */ true);
6819       Label* fixup_label =
6820           codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6821       // /* GcRoot<mirror::Class> */ out = *address
6822       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6823       break;
6824     }
6825     default:
6826       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6827       UNREACHABLE();
6828   }
6829 
6830   if (generate_null_check || cls->MustGenerateClinitCheck()) {
6831     DCHECK(cls->CanCallRuntime());
6832     SlowPathCode* slow_path =
6833         new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6834     codegen_->AddSlowPath(slow_path);
6835     if (generate_null_check) {
6836       __ testl(out, out);
6837       __ j(kEqual, slow_path->GetEntryLabel());
6838     }
6839     if (cls->MustGenerateClinitCheck()) {
6840       GenerateClassInitializationCheck(slow_path, out);
6841     } else {
6842       __ Bind(slow_path->GetExitLabel());
6843     }
6844   }
6845 }
6846 
VisitClinitCheck(HClinitCheck * check)6847 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6848   LocationSummary* locations =
6849       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6850   locations->SetInAt(0, Location::RequiresRegister());
6851   if (check->HasUses()) {
6852     locations->SetOut(Location::SameAsFirstInput());
6853   }
6854   // Rely on the type initialization to save everything we need.
6855   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6856 }
6857 
VisitLoadMethodHandle(HLoadMethodHandle * load)6858 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6859   // Custom calling convention: RAX serves as both input and output.
6860   Location location = Location::RegisterLocation(RAX);
6861   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6862 }
6863 
VisitLoadMethodHandle(HLoadMethodHandle * load)6864 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6865   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6866 }
6867 
NewJitRootMethodTypePatch(const DexFile & dex_file,dex::ProtoIndex proto_index,Handle<mirror::MethodType> handle)6868 Label* CodeGeneratorX86_64::NewJitRootMethodTypePatch(const DexFile& dex_file,
6869                                                       dex::ProtoIndex proto_index,
6870                                                       Handle<mirror::MethodType> handle) {
6871   ReserveJitMethodTypeRoot(ProtoReference(&dex_file, proto_index), handle);
6872   // Add a patch entry and return the label.
6873   jit_method_type_patches_.emplace_back(&dex_file, proto_index.index_);
6874   PatchInfo<Label>* info = &jit_method_type_patches_.back();
6875   return &info->label;
6876 }
6877 
VisitLoadMethodType(HLoadMethodType * load)6878 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6879   LocationSummary* locations =
6880       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
6881   if (load->GetLoadKind() == HLoadMethodType::LoadKind::kRuntimeCall) {
6882     Location location = Location::RegisterLocation(RAX);
6883     CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6884   } else {
6885     locations->SetOut(Location::RequiresRegister());
6886     if (load->GetLoadKind() == HLoadMethodType::LoadKind::kBssEntry) {
6887       if (codegen_->EmitNonBakerReadBarrier()) {
6888         // For non-Baker read barrier we have a temp-clobbering call.
6889       } else {
6890         // Rely on the pResolveMethodType to save everything.
6891         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6892       }
6893     }
6894   }
6895 }
6896 
VisitLoadMethodType(HLoadMethodType * load)6897 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6898   LocationSummary* locations = load->GetLocations();
6899   Location out_loc = locations->Out();
6900   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6901 
6902   switch (load->GetLoadKind()) {
6903     case HLoadMethodType::LoadKind::kBssEntry: {
6904       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6905                                           /* no_rip= */ false);
6906       Label* fixup_label = codegen_->NewMethodTypeBssEntryPatch(load);
6907       // /* GcRoot<mirror::MethodType> */ out = *address  /* PC-relative */
6908       GenerateGcRootFieldLoad(
6909           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6910       // No need for memory fence, thanks to the x86-64 memory model.
6911       SlowPathCode* slow_path =
6912           new (codegen_->GetScopedAllocator()) LoadMethodTypeSlowPathX86_64(load);
6913       codegen_->AddSlowPath(slow_path);
6914       __ testl(out, out);
6915       __ j(kEqual, slow_path->GetEntryLabel());
6916       __ Bind(slow_path->GetExitLabel());
6917       return;
6918     }
6919     case HLoadMethodType::LoadKind::kJitTableAddress: {
6920       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6921                                           /* no_rip= */ true);
6922       Handle<mirror::MethodType> method_type = load->GetMethodType();
6923       DCHECK(method_type != nullptr);
6924       Label* fixup_label = codegen_->NewJitRootMethodTypePatch(
6925           load->GetDexFile(), load->GetProtoIndex(), method_type);
6926       GenerateGcRootFieldLoad(
6927           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6928       return;
6929     }
6930     default:
6931       DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kRuntimeCall);
6932       codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6933       break;
6934   }
6935 }
6936 
VisitClinitCheck(HClinitCheck * check)6937 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6938   // We assume the class to not be null.
6939   SlowPathCode* slow_path =
6940       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6941   codegen_->AddSlowPath(slow_path);
6942   GenerateClassInitializationCheck(slow_path,
6943                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6944 }
6945 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6946 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6947     HLoadString::LoadKind desired_string_load_kind) {
6948   switch (desired_string_load_kind) {
6949     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6950     case HLoadString::LoadKind::kBootImageRelRo:
6951     case HLoadString::LoadKind::kBssEntry:
6952       DCHECK(!GetCompilerOptions().IsJitCompiler());
6953       break;
6954     case HLoadString::LoadKind::kJitBootImageAddress:
6955     case HLoadString::LoadKind::kJitTableAddress:
6956       DCHECK(GetCompilerOptions().IsJitCompiler());
6957       break;
6958     case HLoadString::LoadKind::kRuntimeCall:
6959       break;
6960   }
6961   return desired_string_load_kind;
6962 }
6963 
VisitLoadString(HLoadString * load)6964 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6965   LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
6966   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6967   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6968     locations->SetOut(Location::RegisterLocation(RAX));
6969   } else {
6970     locations->SetOut(Location::RequiresRegister());
6971     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6972       if (codegen_->EmitNonBakerReadBarrier()) {
6973         // For non-Baker read barrier we have a temp-clobbering call.
6974       } else {
6975         // Rely on the pResolveString to save everything.
6976         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6977       }
6978     }
6979   }
6980 }
6981 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6982 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6983                                                   dex::StringIndex string_index,
6984                                                   Handle<mirror::String> handle) {
6985   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6986   // Add a patch entry and return the label.
6987   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6988   PatchInfo<Label>* info = &jit_string_patches_.back();
6989   return &info->label;
6990 }
6991 
6992 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6993 // move.
VisitLoadString(HLoadString * load)6994 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6995   LocationSummary* locations = load->GetLocations();
6996   Location out_loc = locations->Out();
6997   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6998 
6999   switch (load->GetLoadKind()) {
7000     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7001       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7002              codegen_->GetCompilerOptions().IsBootImageExtension());
7003       __ leal(out,
7004               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
7005       codegen_->RecordBootImageStringPatch(load);
7006       return;
7007     }
7008     case HLoadString::LoadKind::kBootImageRelRo: {
7009       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7010       __ movl(out,
7011               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
7012       codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(load));
7013       return;
7014     }
7015     case HLoadString::LoadKind::kBssEntry: {
7016       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
7017                                           /* no_rip= */ false);
7018       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7019       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
7020       GenerateGcRootFieldLoad(
7021           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7022       // No need for memory fence, thanks to the x86-64 memory model.
7023       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
7024       codegen_->AddSlowPath(slow_path);
7025       __ testl(out, out);
7026       __ j(kEqual, slow_path->GetEntryLabel());
7027       __ Bind(slow_path->GetExitLabel());
7028       return;
7029     }
7030     case HLoadString::LoadKind::kJitBootImageAddress: {
7031       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7032       DCHECK_NE(address, 0u);
7033       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
7034       return;
7035     }
7036     case HLoadString::LoadKind::kJitTableAddress: {
7037       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
7038                                           /* no_rip= */ true);
7039       Label* fixup_label = codegen_->NewJitRootStringPatch(
7040           load->GetDexFile(), load->GetStringIndex(), load->GetString());
7041       // /* GcRoot<mirror::String> */ out = *address
7042       GenerateGcRootFieldLoad(
7043           load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7044       return;
7045     }
7046     default:
7047       break;
7048   }
7049 
7050   // Custom calling convention: RAX serves as both input and output.
7051   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
7052   codegen_->InvokeRuntime(kQuickResolveString, load);
7053   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7054 }
7055 
GetExceptionTlsAddress()7056 static Address GetExceptionTlsAddress() {
7057   return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
7058                            /* no_rip= */ true);
7059 }
7060 
VisitLoadException(HLoadException * load)7061 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
7062   LocationSummary* locations =
7063       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7064   locations->SetOut(Location::RequiresRegister());
7065 }
7066 
VisitLoadException(HLoadException * load)7067 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
7068   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
7069 }
7070 
VisitClearException(HClearException * clear)7071 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
7072   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7073 }
7074 
VisitClearException(HClearException * clear)7075 void InstructionCodeGeneratorX86_64::VisitClearException([[maybe_unused]] HClearException* clear) {
7076   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
7077 }
7078 
VisitThrow(HThrow * instruction)7079 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
7080   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7081       instruction, LocationSummary::kCallOnMainOnly);
7082   InvokeRuntimeCallingConvention calling_convention;
7083   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7084 }
7085 
VisitThrow(HThrow * instruction)7086 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
7087   codegen_->InvokeRuntime(kQuickDeliverException, instruction);
7088   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7089 }
7090 
7091 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7092 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7093   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7094     return 1;
7095   }
7096   if (emit_read_barrier &&
7097       !kUseBakerReadBarrier &&
7098       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7099        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7100        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7101     return 1;
7102   }
7103   return 0;
7104 }
7105 
7106 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7107 // interface pointer, the current interface is compared in memory.
7108 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7109 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7110   return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
7111 }
7112 
VisitInstanceOf(HInstanceOf * instruction)7113 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
7114   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7115   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7116   bool baker_read_barrier_slow_path = false;
7117   switch (type_check_kind) {
7118     case TypeCheckKind::kExactCheck:
7119     case TypeCheckKind::kAbstractClassCheck:
7120     case TypeCheckKind::kClassHierarchyCheck:
7121     case TypeCheckKind::kArrayObjectCheck:
7122     case TypeCheckKind::kInterfaceCheck: {
7123       bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
7124       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7125       baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
7126                                      (type_check_kind != TypeCheckKind::kInterfaceCheck);
7127       break;
7128     }
7129     case TypeCheckKind::kArrayCheck:
7130     case TypeCheckKind::kUnresolvedCheck:
7131       call_kind = LocationSummary::kCallOnSlowPath;
7132       break;
7133     case TypeCheckKind::kBitstringCheck:
7134       break;
7135   }
7136 
7137   LocationSummary* locations =
7138       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7139   if (baker_read_barrier_slow_path) {
7140     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7141   }
7142   locations->SetInAt(0, Location::RequiresRegister());
7143   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7144     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7145     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7146     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7147   } else if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7148     locations->SetInAt(1, Location::RequiresRegister());
7149   } else {
7150     locations->SetInAt(1, Location::Any());
7151   }
7152   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
7153   locations->SetOut(Location::RequiresRegister());
7154   locations->AddRegisterTemps(
7155       NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
7156 }
7157 
VisitInstanceOf(HInstanceOf * instruction)7158 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
7159   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7160   LocationSummary* locations = instruction->GetLocations();
7161   Location obj_loc = locations->InAt(0);
7162   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7163   Location cls = locations->InAt(1);
7164   Location out_loc =  locations->Out();
7165   CpuRegister out = out_loc.AsRegister<CpuRegister>();
7166   const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
7167   DCHECK_LE(num_temps, 1u);
7168   Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
7169   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7170   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7171   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7172   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7173   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7174   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7175   const uint32_t object_array_data_offset =
7176       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7177   SlowPathCode* slow_path = nullptr;
7178   NearLabel done, zero;
7179 
7180   // Return 0 if `obj` is null.
7181   // Avoid null check if we know obj is not null.
7182   if (instruction->MustDoNullCheck()) {
7183     __ testl(obj, obj);
7184     __ j(kEqual, &zero);
7185   }
7186 
7187   switch (type_check_kind) {
7188     case TypeCheckKind::kExactCheck: {
7189       ReadBarrierOption read_barrier_option =
7190           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7191       // /* HeapReference<Class> */ out = obj->klass_
7192       GenerateReferenceLoadTwoRegisters(instruction,
7193                                         out_loc,
7194                                         obj_loc,
7195                                         class_offset,
7196                                         read_barrier_option);
7197       if (cls.IsRegister()) {
7198         __ cmpl(out, cls.AsRegister<CpuRegister>());
7199       } else {
7200         DCHECK(cls.IsStackSlot()) << cls;
7201         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7202       }
7203       if (zero.IsLinked()) {
7204         // Classes must be equal for the instanceof to succeed.
7205         __ j(kNotEqual, &zero);
7206         __ movl(out, Immediate(1));
7207         __ jmp(&done);
7208       } else {
7209         __ setcc(kEqual, out);
7210         // setcc only sets the low byte.
7211         __ andl(out, Immediate(1));
7212       }
7213       break;
7214     }
7215 
7216     case TypeCheckKind::kAbstractClassCheck: {
7217       ReadBarrierOption read_barrier_option =
7218           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7219       // /* HeapReference<Class> */ out = obj->klass_
7220       GenerateReferenceLoadTwoRegisters(instruction,
7221                                         out_loc,
7222                                         obj_loc,
7223                                         class_offset,
7224                                         read_barrier_option);
7225       // If the class is abstract, we eagerly fetch the super class of the
7226       // object to avoid doing a comparison we know will fail.
7227       NearLabel loop, success;
7228       __ Bind(&loop);
7229       // /* HeapReference<Class> */ out = out->super_class_
7230       GenerateReferenceLoadOneRegister(instruction,
7231                                        out_loc,
7232                                        super_offset,
7233                                        maybe_temp_loc,
7234                                        read_barrier_option);
7235       __ testl(out, out);
7236       // If `out` is null, we use it for the result, and jump to `done`.
7237       __ j(kEqual, &done);
7238       if (cls.IsRegister()) {
7239         __ cmpl(out, cls.AsRegister<CpuRegister>());
7240       } else {
7241         DCHECK(cls.IsStackSlot()) << cls;
7242         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7243       }
7244       __ j(kNotEqual, &loop);
7245       __ movl(out, Immediate(1));
7246       if (zero.IsLinked()) {
7247         __ jmp(&done);
7248       }
7249       break;
7250     }
7251 
7252     case TypeCheckKind::kClassHierarchyCheck: {
7253       ReadBarrierOption read_barrier_option =
7254           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7255       // /* HeapReference<Class> */ out = obj->klass_
7256       GenerateReferenceLoadTwoRegisters(instruction,
7257                                         out_loc,
7258                                         obj_loc,
7259                                         class_offset,
7260                                         read_barrier_option);
7261       // Walk over the class hierarchy to find a match.
7262       NearLabel loop, success;
7263       __ Bind(&loop);
7264       if (cls.IsRegister()) {
7265         __ cmpl(out, cls.AsRegister<CpuRegister>());
7266       } else {
7267         DCHECK(cls.IsStackSlot()) << cls;
7268         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7269       }
7270       __ j(kEqual, &success);
7271       // /* HeapReference<Class> */ out = out->super_class_
7272       GenerateReferenceLoadOneRegister(instruction,
7273                                        out_loc,
7274                                        super_offset,
7275                                        maybe_temp_loc,
7276                                        read_barrier_option);
7277       __ testl(out, out);
7278       __ j(kNotEqual, &loop);
7279       // If `out` is null, we use it for the result, and jump to `done`.
7280       __ jmp(&done);
7281       __ Bind(&success);
7282       __ movl(out, Immediate(1));
7283       if (zero.IsLinked()) {
7284         __ jmp(&done);
7285       }
7286       break;
7287     }
7288 
7289     case TypeCheckKind::kArrayObjectCheck: {
7290       ReadBarrierOption read_barrier_option =
7291           codegen_->ReadBarrierOptionForInstanceOf(instruction);
7292       // /* HeapReference<Class> */ out = obj->klass_
7293       GenerateReferenceLoadTwoRegisters(instruction,
7294                                         out_loc,
7295                                         obj_loc,
7296                                         class_offset,
7297                                         read_barrier_option);
7298       // Do an exact check.
7299       NearLabel exact_check;
7300       if (cls.IsRegister()) {
7301         __ cmpl(out, cls.AsRegister<CpuRegister>());
7302       } else {
7303         DCHECK(cls.IsStackSlot()) << cls;
7304         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7305       }
7306       __ j(kEqual, &exact_check);
7307       // Otherwise, we need to check that the object's class is a non-primitive array.
7308       // /* HeapReference<Class> */ out = out->component_type_
7309       GenerateReferenceLoadOneRegister(instruction,
7310                                        out_loc,
7311                                        component_offset,
7312                                        maybe_temp_loc,
7313                                        read_barrier_option);
7314       __ testl(out, out);
7315       // If `out` is null, we use it for the result, and jump to `done`.
7316       __ j(kEqual, &done);
7317       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7318       __ j(kNotEqual, &zero);
7319       __ Bind(&exact_check);
7320       __ movl(out, Immediate(1));
7321       __ jmp(&done);
7322       break;
7323     }
7324 
7325     case TypeCheckKind::kArrayCheck: {
7326       // No read barrier since the slow path will retry upon failure.
7327       // /* HeapReference<Class> */ out = obj->klass_
7328       GenerateReferenceLoadTwoRegisters(instruction,
7329                                         out_loc,
7330                                         obj_loc,
7331                                         class_offset,
7332                                         kWithoutReadBarrier);
7333       if (cls.IsRegister()) {
7334         __ cmpl(out, cls.AsRegister<CpuRegister>());
7335       } else {
7336         DCHECK(cls.IsStackSlot()) << cls;
7337         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7338       }
7339       DCHECK(locations->OnlyCallsOnSlowPath());
7340       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7341           instruction, /* is_fatal= */ false);
7342       codegen_->AddSlowPath(slow_path);
7343       __ j(kNotEqual, slow_path->GetEntryLabel());
7344       __ movl(out, Immediate(1));
7345       if (zero.IsLinked()) {
7346         __ jmp(&done);
7347       }
7348       break;
7349     }
7350 
7351     case TypeCheckKind::kInterfaceCheck: {
7352       if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
7353         DCHECK(locations->OnlyCallsOnSlowPath());
7354         slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7355             instruction, /* is_fatal= */ false);
7356         codegen_->AddSlowPath(slow_path);
7357         if (codegen_->EmitNonBakerReadBarrier()) {
7358           __ jmp(slow_path->GetEntryLabel());
7359           break;
7360         }
7361         // For Baker read barrier, take the slow path while marking.
7362         __ gs()->cmpl(
7363             Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true),
7364             Immediate(0));
7365         __ j(kNotEqual, slow_path->GetEntryLabel());
7366       }
7367 
7368       // Fast-path without read barriers.
7369       CpuRegister temp = maybe_temp_loc.AsRegister<CpuRegister>();
7370       // /* HeapReference<Class> */ temp = obj->klass_
7371       __ movl(temp, Address(obj, class_offset));
7372       __ MaybeUnpoisonHeapReference(temp);
7373       // /* HeapReference<Class> */ temp = temp->iftable_
7374       __ movl(temp, Address(temp, iftable_offset));
7375       __ MaybeUnpoisonHeapReference(temp);
7376       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7377       __ movl(out, Address(temp, array_length_offset));
7378       // Maybe poison the `cls` for direct comparison with memory.
7379       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7380       // Loop through the iftable and check if any class matches.
7381       NearLabel loop, end;
7382       __ Bind(&loop);
7383       // Check if we still have an entry to compare.
7384       __ subl(out, Immediate(2));
7385       __ j(kNegative, (zero.IsLinked() && !kPoisonHeapReferences) ? &zero : &end);
7386       // Go to next interface if the classes do not match.
7387       __ cmpl(cls.AsRegister<CpuRegister>(),
7388               CodeGeneratorX86_64::ArrayAddress(temp, out_loc, TIMES_4, object_array_data_offset));
7389       __ j(kNotEqual, &loop);
7390       if (zero.IsLinked()) {
7391         __ movl(out, Immediate(1));
7392         // If `cls` was poisoned above, unpoison it.
7393         __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7394         __ jmp(&done);
7395         if (kPoisonHeapReferences) {
7396           // The false case needs to unpoison the class before jumping to `zero`.
7397           __ Bind(&end);
7398           __ UnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7399           __ jmp(&zero);
7400         }
7401       } else {
7402         // To reduce branching, use the fact that the false case branches with a `-2` in `out`.
7403         __ movl(out, Immediate(-1));
7404         __ Bind(&end);
7405         __ addl(out, Immediate(2));
7406         // If `cls` was poisoned above, unpoison it.
7407         __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7408       }
7409       break;
7410     }
7411 
7412     case TypeCheckKind::kUnresolvedCheck: {
7413       // Note that we indeed only call on slow path, but we always go
7414       // into the slow path for the unresolved check case.
7415       //
7416       // We cannot directly call the InstanceofNonTrivial runtime
7417       // entry point without resorting to a type checking slow path
7418       // here (i.e. by calling InvokeRuntime directly), as it would
7419       // require to assign fixed registers for the inputs of this
7420       // HInstanceOf instruction (following the runtime calling
7421       // convention), which might be cluttered by the potential first
7422       // read barrier emission at the beginning of this method.
7423       //
7424       // TODO: Introduce a new runtime entry point taking the object
7425       // to test (instead of its class) as argument, and let it deal
7426       // with the read barrier issues. This will let us refactor this
7427       // case of the `switch` code as it was previously (with a direct
7428       // call to the runtime not using a type checking slow path).
7429       // This should also be beneficial for the other cases above.
7430       DCHECK(locations->OnlyCallsOnSlowPath());
7431       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7432           instruction, /* is_fatal= */ false);
7433       codegen_->AddSlowPath(slow_path);
7434       __ jmp(slow_path->GetEntryLabel());
7435       break;
7436     }
7437 
7438     case TypeCheckKind::kBitstringCheck: {
7439       // /* HeapReference<Class> */ temp = obj->klass_
7440       GenerateReferenceLoadTwoRegisters(instruction,
7441                                         out_loc,
7442                                         obj_loc,
7443                                         class_offset,
7444                                         kWithoutReadBarrier);
7445 
7446       GenerateBitstringTypeCheckCompare(instruction, out);
7447       if (zero.IsLinked()) {
7448         __ j(kNotEqual, &zero);
7449         __ movl(out, Immediate(1));
7450         __ jmp(&done);
7451       } else {
7452         __ setcc(kEqual, out);
7453         // setcc only sets the low byte.
7454         __ andl(out, Immediate(1));
7455       }
7456       break;
7457     }
7458   }
7459 
7460   if (zero.IsLinked()) {
7461     __ Bind(&zero);
7462     __ xorl(out, out);
7463   }
7464 
7465   if (done.IsLinked()) {
7466     __ Bind(&done);
7467   }
7468 
7469   if (slow_path != nullptr) {
7470     __ Bind(slow_path->GetExitLabel());
7471   }
7472 }
7473 
VisitCheckCast(HCheckCast * instruction)7474 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
7475   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7476   LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
7477   LocationSummary* locations =
7478       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7479   locations->SetInAt(0, Location::RequiresRegister());
7480   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7481     // Require a register for the interface check since there is a loop that compares the class to
7482     // a memory address.
7483     locations->SetInAt(1, Location::RequiresRegister());
7484   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7485     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7486     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7487     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7488   } else {
7489     locations->SetInAt(1, Location::Any());
7490   }
7491   locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
7492 }
7493 
VisitCheckCast(HCheckCast * instruction)7494 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
7495   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7496   LocationSummary* locations = instruction->GetLocations();
7497   Location obj_loc = locations->InAt(0);
7498   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7499   Location cls = locations->InAt(1);
7500   Location temp_loc = locations->GetTemp(0);
7501   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
7502   const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
7503   DCHECK_GE(num_temps, 1u);
7504   DCHECK_LE(num_temps, 2u);
7505   Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
7506   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7507   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7508   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7509   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7510   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7511   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7512   const uint32_t object_array_data_offset =
7513       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7514 
7515   bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
7516   SlowPathCode* type_check_slow_path =
7517       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7518           instruction, is_type_check_slow_path_fatal);
7519   codegen_->AddSlowPath(type_check_slow_path);
7520 
7521 
7522   NearLabel done;
7523   // Avoid null check if we know obj is not null.
7524   if (instruction->MustDoNullCheck()) {
7525     __ testl(obj, obj);
7526     __ j(kEqual, &done);
7527   }
7528 
7529   switch (type_check_kind) {
7530     case TypeCheckKind::kExactCheck:
7531     case TypeCheckKind::kArrayCheck: {
7532       // /* HeapReference<Class> */ temp = obj->klass_
7533       GenerateReferenceLoadTwoRegisters(instruction,
7534                                         temp_loc,
7535                                         obj_loc,
7536                                         class_offset,
7537                                         kWithoutReadBarrier);
7538       if (cls.IsRegister()) {
7539         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7540       } else {
7541         DCHECK(cls.IsStackSlot()) << cls;
7542         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7543       }
7544       // Jump to slow path for throwing the exception or doing a
7545       // more involved array check.
7546       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7547       break;
7548     }
7549 
7550     case TypeCheckKind::kAbstractClassCheck: {
7551       // /* HeapReference<Class> */ temp = obj->klass_
7552       GenerateReferenceLoadTwoRegisters(instruction,
7553                                         temp_loc,
7554                                         obj_loc,
7555                                         class_offset,
7556                                         kWithoutReadBarrier);
7557       // If the class is abstract, we eagerly fetch the super class of the
7558       // object to avoid doing a comparison we know will fail.
7559       NearLabel loop;
7560       __ Bind(&loop);
7561       // /* HeapReference<Class> */ temp = temp->super_class_
7562       GenerateReferenceLoadOneRegister(instruction,
7563                                        temp_loc,
7564                                        super_offset,
7565                                        maybe_temp2_loc,
7566                                        kWithoutReadBarrier);
7567 
7568       // If the class reference currently in `temp` is null, jump to the slow path to throw the
7569       // exception.
7570       __ testl(temp, temp);
7571       // Otherwise, compare the classes.
7572       __ j(kZero, type_check_slow_path->GetEntryLabel());
7573       if (cls.IsRegister()) {
7574         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7575       } else {
7576         DCHECK(cls.IsStackSlot()) << cls;
7577         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7578       }
7579       __ j(kNotEqual, &loop);
7580       break;
7581     }
7582 
7583     case TypeCheckKind::kClassHierarchyCheck: {
7584       // /* HeapReference<Class> */ temp = obj->klass_
7585       GenerateReferenceLoadTwoRegisters(instruction,
7586                                         temp_loc,
7587                                         obj_loc,
7588                                         class_offset,
7589                                         kWithoutReadBarrier);
7590       // Walk over the class hierarchy to find a match.
7591       NearLabel loop;
7592       __ Bind(&loop);
7593       if (cls.IsRegister()) {
7594         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7595       } else {
7596         DCHECK(cls.IsStackSlot()) << cls;
7597         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7598       }
7599       __ j(kEqual, &done);
7600 
7601       // /* HeapReference<Class> */ temp = temp->super_class_
7602       GenerateReferenceLoadOneRegister(instruction,
7603                                        temp_loc,
7604                                        super_offset,
7605                                        maybe_temp2_loc,
7606                                        kWithoutReadBarrier);
7607 
7608       // If the class reference currently in `temp` is not null, jump
7609       // back at the beginning of the loop.
7610       __ testl(temp, temp);
7611       __ j(kNotZero, &loop);
7612       // Otherwise, jump to the slow path to throw the exception.
7613       __ jmp(type_check_slow_path->GetEntryLabel());
7614       break;
7615     }
7616 
7617     case TypeCheckKind::kArrayObjectCheck: {
7618       // /* HeapReference<Class> */ temp = obj->klass_
7619       GenerateReferenceLoadTwoRegisters(instruction,
7620                                         temp_loc,
7621                                         obj_loc,
7622                                         class_offset,
7623                                         kWithoutReadBarrier);
7624       // Do an exact check.
7625       NearLabel check_non_primitive_component_type;
7626       if (cls.IsRegister()) {
7627         __ cmpl(temp, cls.AsRegister<CpuRegister>());
7628       } else {
7629         DCHECK(cls.IsStackSlot()) << cls;
7630         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7631       }
7632       __ j(kEqual, &done);
7633 
7634       // Otherwise, we need to check that the object's class is a non-primitive array.
7635       // /* HeapReference<Class> */ temp = temp->component_type_
7636       GenerateReferenceLoadOneRegister(instruction,
7637                                        temp_loc,
7638                                        component_offset,
7639                                        maybe_temp2_loc,
7640                                        kWithoutReadBarrier);
7641 
7642       // If the component type is not null (i.e. the object is indeed
7643       // an array), jump to label `check_non_primitive_component_type`
7644       // to further check that this component type is not a primitive
7645       // type.
7646       __ testl(temp, temp);
7647       // Otherwise, jump to the slow path to throw the exception.
7648       __ j(kZero, type_check_slow_path->GetEntryLabel());
7649       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7650       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7651       break;
7652     }
7653 
7654     case TypeCheckKind::kUnresolvedCheck: {
7655       // We always go into the type check slow path for the unresolved case.
7656       //
7657       // We cannot directly call the CheckCast runtime entry point
7658       // without resorting to a type checking slow path here (i.e. by
7659       // calling InvokeRuntime directly), as it would require to
7660       // assign fixed registers for the inputs of this HInstanceOf
7661       // instruction (following the runtime calling convention), which
7662       // might be cluttered by the potential first read barrier
7663       // emission at the beginning of this method.
7664       __ jmp(type_check_slow_path->GetEntryLabel());
7665       break;
7666     }
7667 
7668     case TypeCheckKind::kInterfaceCheck: {
7669       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7670       // We can not get false positives by doing this.
7671       // /* HeapReference<Class> */ temp = obj->klass_
7672       GenerateReferenceLoadTwoRegisters(instruction,
7673                                         temp_loc,
7674                                         obj_loc,
7675                                         class_offset,
7676                                         kWithoutReadBarrier);
7677 
7678       // /* HeapReference<Class> */ temp = temp->iftable_
7679       GenerateReferenceLoadOneRegister(instruction,
7680                                        temp_loc,
7681                                        iftable_offset,
7682                                        maybe_temp2_loc,
7683                                        kWithoutReadBarrier);
7684       // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7685       __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
7686       // Maybe poison the `cls` for direct comparison with memory.
7687       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7688       // Loop through the iftable and check if any class matches.
7689       NearLabel start_loop;
7690       __ Bind(&start_loop);
7691       // Check if we still have an entry to compare.
7692       __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
7693       __ j(kNegative, type_check_slow_path->GetEntryLabel());
7694       // Go to next interface if the classes do not match.
7695       __ cmpl(cls.AsRegister<CpuRegister>(),
7696               CodeGeneratorX86_64::ArrayAddress(temp,
7697                                                 maybe_temp2_loc,
7698                                                 TIMES_4,
7699                                                 object_array_data_offset));
7700       __ j(kNotEqual, &start_loop);  // Return if same class.
7701       // If `cls` was poisoned above, unpoison it.
7702       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7703       break;
7704     }
7705 
7706     case TypeCheckKind::kBitstringCheck: {
7707       // /* HeapReference<Class> */ temp = obj->klass_
7708       GenerateReferenceLoadTwoRegisters(instruction,
7709                                         temp_loc,
7710                                         obj_loc,
7711                                         class_offset,
7712                                         kWithoutReadBarrier);
7713 
7714       GenerateBitstringTypeCheckCompare(instruction, temp);
7715       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7716       break;
7717     }
7718   }
7719 
7720   if (done.IsLinked()) {
7721     __ Bind(&done);
7722   }
7723 
7724   __ Bind(type_check_slow_path->GetExitLabel());
7725 }
7726 
VisitMonitorOperation(HMonitorOperation * instruction)7727 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7728   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7729       instruction, LocationSummary::kCallOnMainOnly);
7730   InvokeRuntimeCallingConvention calling_convention;
7731   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7732 }
7733 
VisitMonitorOperation(HMonitorOperation * instruction)7734 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7735   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
7736                           instruction);
7737   if (instruction->IsEnter()) {
7738     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7739   } else {
7740     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7741   }
7742 }
7743 
VisitX86AndNot(HX86AndNot * instruction)7744 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7745   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7746   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7747   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7748   locations->SetInAt(0, Location::RequiresRegister());
7749   // There is no immediate variant of negated bitwise and in X86.
7750   locations->SetInAt(1, Location::RequiresRegister());
7751   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7752 }
7753 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7754 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7755   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7756   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7757   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7758   locations->SetInAt(0, Location::RequiresRegister());
7759   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7760 }
7761 
VisitX86AndNot(HX86AndNot * instruction)7762 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7763   LocationSummary* locations = instruction->GetLocations();
7764   Location first = locations->InAt(0);
7765   Location second = locations->InAt(1);
7766   Location dest = locations->Out();
7767   __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7768 }
7769 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7770 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7771   LocationSummary* locations = instruction->GetLocations();
7772   Location src = locations->InAt(0);
7773   Location dest = locations->Out();
7774   switch (instruction->GetOpKind()) {
7775     case HInstruction::kAnd:
7776       __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7777       break;
7778     case HInstruction::kXor:
7779       __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7780       break;
7781     default:
7782       LOG(FATAL) << "Unreachable";
7783   }
7784 }
7785 
VisitAnd(HAnd * instruction)7786 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7787 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7788 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7789 
HandleBitwiseOperation(HBinaryOperation * instruction)7790 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7791   LocationSummary* locations =
7792       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7793   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7794          || instruction->GetResultType() == DataType::Type::kInt64);
7795   locations->SetInAt(0, Location::RequiresRegister());
7796   locations->SetInAt(1, Location::Any());
7797   locations->SetOut(Location::SameAsFirstInput());
7798 }
7799 
VisitAnd(HAnd * instruction)7800 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
7801   HandleBitwiseOperation(instruction);
7802 }
7803 
VisitOr(HOr * instruction)7804 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
7805   HandleBitwiseOperation(instruction);
7806 }
7807 
VisitXor(HXor * instruction)7808 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
7809   HandleBitwiseOperation(instruction);
7810 }
7811 
HandleBitwiseOperation(HBinaryOperation * instruction)7812 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7813   LocationSummary* locations = instruction->GetLocations();
7814   Location first = locations->InAt(0);
7815   Location second = locations->InAt(1);
7816   DCHECK(first.Equals(locations->Out()));
7817 
7818   if (instruction->GetResultType() == DataType::Type::kInt32) {
7819     if (second.IsRegister()) {
7820       if (instruction->IsAnd()) {
7821         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7822       } else if (instruction->IsOr()) {
7823         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7824       } else {
7825         DCHECK(instruction->IsXor());
7826         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7827       }
7828     } else if (second.IsConstant()) {
7829       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
7830       if (instruction->IsAnd()) {
7831         __ andl(first.AsRegister<CpuRegister>(), imm);
7832       } else if (instruction->IsOr()) {
7833         __ orl(first.AsRegister<CpuRegister>(), imm);
7834       } else {
7835         DCHECK(instruction->IsXor());
7836         __ xorl(first.AsRegister<CpuRegister>(), imm);
7837       }
7838     } else {
7839       Address address(CpuRegister(RSP), second.GetStackIndex());
7840       if (instruction->IsAnd()) {
7841         __ andl(first.AsRegister<CpuRegister>(), address);
7842       } else if (instruction->IsOr()) {
7843         __ orl(first.AsRegister<CpuRegister>(), address);
7844       } else {
7845         DCHECK(instruction->IsXor());
7846         __ xorl(first.AsRegister<CpuRegister>(), address);
7847       }
7848     }
7849   } else {
7850     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7851     CpuRegister first_reg = first.AsRegister<CpuRegister>();
7852     bool second_is_constant = false;
7853     int64_t value = 0;
7854     if (second.IsConstant()) {
7855       second_is_constant = true;
7856       value = second.GetConstant()->AsLongConstant()->GetValue();
7857     }
7858     bool is_int32_value = IsInt<32>(value);
7859 
7860     if (instruction->IsAnd()) {
7861       if (second_is_constant) {
7862         if (is_int32_value) {
7863           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7864         } else {
7865           __ andq(first_reg, codegen_->LiteralInt64Address(value));
7866         }
7867       } else if (second.IsDoubleStackSlot()) {
7868         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7869       } else {
7870         __ andq(first_reg, second.AsRegister<CpuRegister>());
7871       }
7872     } else if (instruction->IsOr()) {
7873       if (second_is_constant) {
7874         if (is_int32_value) {
7875           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7876         } else {
7877           __ orq(first_reg, codegen_->LiteralInt64Address(value));
7878         }
7879       } else if (second.IsDoubleStackSlot()) {
7880         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7881       } else {
7882         __ orq(first_reg, second.AsRegister<CpuRegister>());
7883       }
7884     } else {
7885       DCHECK(instruction->IsXor());
7886       if (second_is_constant) {
7887         if (is_int32_value) {
7888           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7889         } else {
7890           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7891         }
7892       } else if (second.IsDoubleStackSlot()) {
7893         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7894       } else {
7895         __ xorq(first_reg, second.AsRegister<CpuRegister>());
7896       }
7897     }
7898   }
7899 }
7900 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7901 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7902     HInstruction* instruction,
7903     Location out,
7904     uint32_t offset,
7905     Location maybe_temp,
7906     ReadBarrierOption read_barrier_option) {
7907   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7908   if (read_barrier_option == kWithReadBarrier) {
7909     DCHECK(codegen_->EmitReadBarrier());
7910     if (kUseBakerReadBarrier) {
7911       // Load with fast path based Baker's read barrier.
7912       // /* HeapReference<Object> */ out = *(out + offset)
7913       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7914           instruction, out, out_reg, offset, /* needs_null_check= */ false);
7915     } else {
7916       // Load with slow path based read barrier.
7917       // Save the value of `out` into `maybe_temp` before overwriting it
7918       // in the following move operation, as we will need it for the
7919       // read barrier below.
7920       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7921       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7922       // /* HeapReference<Object> */ out = *(out + offset)
7923       __ movl(out_reg, Address(out_reg, offset));
7924       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7925     }
7926   } else {
7927     // Plain load with no read barrier.
7928     // /* HeapReference<Object> */ out = *(out + offset)
7929     __ movl(out_reg, Address(out_reg, offset));
7930     __ MaybeUnpoisonHeapReference(out_reg);
7931   }
7932 }
7933 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7934 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7935     HInstruction* instruction,
7936     Location out,
7937     Location obj,
7938     uint32_t offset,
7939     ReadBarrierOption read_barrier_option) {
7940   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7941   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7942   if (read_barrier_option == kWithReadBarrier) {
7943     DCHECK(codegen_->EmitReadBarrier());
7944     if (kUseBakerReadBarrier) {
7945       // Load with fast path based Baker's read barrier.
7946       // /* HeapReference<Object> */ out = *(obj + offset)
7947       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7948           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7949     } else {
7950       // Load with slow path based read barrier.
7951       // /* HeapReference<Object> */ out = *(obj + offset)
7952       __ movl(out_reg, Address(obj_reg, offset));
7953       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7954     }
7955   } else {
7956     // Plain load with no read barrier.
7957     // /* HeapReference<Object> */ out = *(obj + offset)
7958     __ movl(out_reg, Address(obj_reg, offset));
7959     __ MaybeUnpoisonHeapReference(out_reg);
7960   }
7961 }
7962 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7963 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7964     HInstruction* instruction,
7965     Location root,
7966     const Address& address,
7967     Label* fixup_label,
7968     ReadBarrierOption read_barrier_option) {
7969   CpuRegister root_reg = root.AsRegister<CpuRegister>();
7970   if (read_barrier_option == kWithReadBarrier) {
7971     DCHECK(codegen_->EmitReadBarrier());
7972     if (kUseBakerReadBarrier) {
7973       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7974       // Baker's read barrier are used:
7975       //
7976       //   root = obj.field;
7977       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7978       //   if (temp != null) {
7979       //     root = temp(root)
7980       //   }
7981 
7982       // /* GcRoot<mirror::Object> */ root = *address
7983       __ movl(root_reg, address);
7984       if (fixup_label != nullptr) {
7985         __ Bind(fixup_label);
7986       }
7987       static_assert(
7988           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7989           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7990           "have different sizes.");
7991       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7992                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
7993                     "have different sizes.");
7994 
7995       // Slow path marking the GC root `root`.
7996       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7997           instruction, root, /* unpoison_ref_before_marking= */ false);
7998       codegen_->AddSlowPath(slow_path);
7999 
8000       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
8001       const int32_t entry_point_offset =
8002           Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
8003       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
8004       // The entrypoint is null when the GC is not marking.
8005       __ j(kNotEqual, slow_path->GetEntryLabel());
8006       __ Bind(slow_path->GetExitLabel());
8007     } else {
8008       // GC root loaded through a slow path for read barriers other
8009       // than Baker's.
8010       // /* GcRoot<mirror::Object>* */ root = address
8011       __ leaq(root_reg, address);
8012       if (fixup_label != nullptr) {
8013         __ Bind(fixup_label);
8014       }
8015       // /* mirror::Object* */ root = root->Read()
8016       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8017     }
8018   } else {
8019     // Plain GC root load with no read barrier.
8020     // /* GcRoot<mirror::Object> */ root = *address
8021     __ movl(root_reg, address);
8022     if (fixup_label != nullptr) {
8023       __ Bind(fixup_label);
8024     }
8025     // Note that GC roots are not affected by heap poisoning, thus we
8026     // do not have to unpoison `root_reg` here.
8027   }
8028 }
8029 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)8030 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8031                                                                 Location ref,
8032                                                                 CpuRegister obj,
8033                                                                 uint32_t offset,
8034                                                                 bool needs_null_check) {
8035   DCHECK(EmitBakerReadBarrier());
8036 
8037   // /* HeapReference<Object> */ ref = *(obj + offset)
8038   Address src(obj, offset);
8039   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8040 }
8041 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)8042 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8043                                                                 Location ref,
8044                                                                 CpuRegister obj,
8045                                                                 uint32_t data_offset,
8046                                                                 Location index,
8047                                                                 bool needs_null_check) {
8048   DCHECK(EmitBakerReadBarrier());
8049 
8050   static_assert(
8051       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8052       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8053   // /* HeapReference<Object> */ ref =
8054   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
8055   Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
8056   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8057 }
8058 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)8059 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8060                                                                     Location ref,
8061                                                                     CpuRegister obj,
8062                                                                     const Address& src,
8063                                                                     bool needs_null_check,
8064                                                                     bool always_update_field,
8065                                                                     CpuRegister* temp1,
8066                                                                     CpuRegister* temp2) {
8067   DCHECK(EmitBakerReadBarrier());
8068 
8069   // In slow path based read barriers, the read barrier call is
8070   // inserted after the original load. However, in fast path based
8071   // Baker's read barriers, we need to perform the load of
8072   // mirror::Object::monitor_ *before* the original reference load.
8073   // This load-load ordering is required by the read barrier.
8074   // The fast path/slow path (for Baker's algorithm) should look like:
8075   //
8076   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8077   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
8078   //   HeapReference<Object> ref = *src;  // Original reference load.
8079   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
8080   //   if (is_gray) {
8081   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
8082   //   }
8083   //
8084   // Note: the original implementation in ReadBarrier::Barrier is
8085   // slightly more complex as:
8086   // - it implements the load-load fence using a data dependency on
8087   //   the high-bits of rb_state, which are expected to be all zeroes
8088   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
8089   //   here, which is a no-op thanks to the x86-64 memory model);
8090   // - it performs additional checks that we do not do here for
8091   //   performance reasons.
8092 
8093   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
8094   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8095 
8096   // Given the numeric representation, it's enough to check the low bit of the rb_state.
8097   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8098   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8099   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8100   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8101   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8102 
8103   // if (rb_state == ReadBarrier::GrayState())
8104   //   ref = ReadBarrier::Mark(ref);
8105   // At this point, just do the "if" and make sure that flags are preserved until the branch.
8106   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8107   if (needs_null_check) {
8108     MaybeRecordImplicitNullCheck(instruction);
8109   }
8110 
8111   // Load fence to prevent load-load reordering.
8112   // Note that this is a no-op, thanks to the x86-64 memory model.
8113   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8114 
8115   // The actual reference load.
8116   // /* HeapReference<Object> */ ref = *src
8117   __ movl(ref_reg, src);  // Flags are unaffected.
8118 
8119   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8120   // Slow path marking the object `ref` when it is gray.
8121   SlowPathCode* slow_path;
8122   if (always_update_field) {
8123     DCHECK(temp1 != nullptr);
8124     DCHECK(temp2 != nullptr);
8125     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
8126         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
8127   } else {
8128     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
8129         instruction, ref, /* unpoison_ref_before_marking= */ true);
8130   }
8131   AddSlowPath(slow_path);
8132 
8133   // We have done the "if" of the gray bit check above, now branch based on the flags.
8134   __ j(kNotZero, slow_path->GetEntryLabel());
8135 
8136   // Object* ref = ref_addr->AsMirrorPtr()
8137   __ MaybeUnpoisonHeapReference(ref_reg);
8138 
8139   __ Bind(slow_path->GetExitLabel());
8140 }
8141 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8142 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
8143                                                   Location out,
8144                                                   Location ref,
8145                                                   Location obj,
8146                                                   uint32_t offset,
8147                                                   Location index) {
8148   DCHECK(EmitReadBarrier());
8149 
8150   // Insert a slow path based read barrier *after* the reference load.
8151   //
8152   // If heap poisoning is enabled, the unpoisoning of the loaded
8153   // reference will be carried out by the runtime within the slow
8154   // path.
8155   //
8156   // Note that `ref` currently does not get unpoisoned (when heap
8157   // poisoning is enabled), which is alright as the `ref` argument is
8158   // not used by the artReadBarrierSlow entry point.
8159   //
8160   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8161   SlowPathCode* slow_path = new (GetScopedAllocator())
8162       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
8163   AddSlowPath(slow_path);
8164 
8165   __ jmp(slow_path->GetEntryLabel());
8166   __ Bind(slow_path->GetExitLabel());
8167 }
8168 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8169 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8170                                                        Location out,
8171                                                        Location ref,
8172                                                        Location obj,
8173                                                        uint32_t offset,
8174                                                        Location index) {
8175   if (EmitReadBarrier()) {
8176     // Baker's read barriers shall be handled by the fast path
8177     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
8178     DCHECK(!kUseBakerReadBarrier);
8179     // If heap poisoning is enabled, unpoisoning will be taken care of
8180     // by the runtime within the slow path.
8181     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8182   } else if (kPoisonHeapReferences) {
8183     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
8184   }
8185 }
8186 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8187 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8188                                                          Location out,
8189                                                          Location root) {
8190   DCHECK(EmitReadBarrier());
8191 
8192   // Insert a slow path based read barrier *after* the GC root load.
8193   //
8194   // Note that GC roots are not affected by heap poisoning, so we do
8195   // not need to do anything special for this here.
8196   SlowPathCode* slow_path =
8197       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
8198   AddSlowPath(slow_path);
8199 
8200   __ jmp(slow_path->GetEntryLabel());
8201   __ Bind(slow_path->GetExitLabel());
8202 }
8203 
VisitBoundType(HBoundType * instruction)8204 void LocationsBuilderX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8205   // Nothing to do, this should be removed during prepare for register allocator.
8206   LOG(FATAL) << "Unreachable";
8207 }
8208 
VisitBoundType(HBoundType * instruction)8209 void InstructionCodeGeneratorX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8210   // Nothing to do, this should be removed during prepare for register allocator.
8211   LOG(FATAL) << "Unreachable";
8212 }
8213 
8214 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8215 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8216   LocationSummary* locations =
8217       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8218   locations->SetInAt(0, Location::RequiresRegister());
8219   locations->AddRegisterTemps(2);
8220 }
8221 
VisitPackedSwitch(HPackedSwitch * switch_instr)8222 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8223   int32_t lower_bound = switch_instr->GetStartValue();
8224   uint32_t num_entries = switch_instr->GetNumEntries();
8225   LocationSummary* locations = switch_instr->GetLocations();
8226   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
8227   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
8228   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
8229   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8230 
8231   // Should we generate smaller inline compare/jumps?
8232   if (num_entries <= kPackedSwitchJumpTableThreshold) {
8233     // Figure out the correct compare values and jump conditions.
8234     // Handle the first compare/branch as a special case because it might
8235     // jump to the default case.
8236     DCHECK_GT(num_entries, 2u);
8237     Condition first_condition;
8238     uint32_t index;
8239     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
8240     if (lower_bound != 0) {
8241       first_condition = kLess;
8242       __ cmpl(value_reg_in, Immediate(lower_bound));
8243       __ j(first_condition, codegen_->GetLabelOf(default_block));
8244       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8245 
8246       index = 1;
8247     } else {
8248       // Handle all the compare/jumps below.
8249       first_condition = kBelow;
8250       index = 0;
8251     }
8252 
8253     // Handle the rest of the compare/jumps.
8254     for (; index + 1 < num_entries; index += 2) {
8255       int32_t compare_to_value = lower_bound + index + 1;
8256       __ cmpl(value_reg_in, Immediate(compare_to_value));
8257       // Jump to successors[index] if value < case_value[index].
8258       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8259       // Jump to successors[index + 1] if value == case_value[index + 1].
8260       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8261     }
8262 
8263     if (index != num_entries) {
8264       // There are an odd number of entries. Handle the last one.
8265       DCHECK_EQ(index + 1, num_entries);
8266       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
8267       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8268     }
8269 
8270     // And the default for any other value.
8271     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
8272       __ jmp(codegen_->GetLabelOf(default_block));
8273     }
8274     return;
8275   }
8276 
8277   // Remove the bias, if needed.
8278   Register value_reg_out = value_reg_in.AsRegister();
8279   if (lower_bound != 0) {
8280     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
8281     value_reg_out = temp_reg.AsRegister();
8282   }
8283   CpuRegister value_reg(value_reg_out);
8284 
8285   // Is the value in range?
8286   __ cmpl(value_reg, Immediate(num_entries - 1));
8287   __ j(kAbove, codegen_->GetLabelOf(default_block));
8288 
8289   // We are in the range of the table.
8290   // Load the address of the jump table in the constant area.
8291   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
8292 
8293   // Load the (signed) offset from the jump table.
8294   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
8295 
8296   // Add the offset to the address of the table base.
8297   __ addq(temp_reg, base_reg);
8298 
8299   // And jump.
8300   __ jmp(temp_reg);
8301 }
8302 
VisitIntermediateAddress(HIntermediateAddress * instruction)8303 void LocationsBuilderX86_64::VisitIntermediateAddress(
8304     [[maybe_unused]] HIntermediateAddress* instruction) {
8305   LOG(FATAL) << "Unreachable";
8306 }
8307 
VisitIntermediateAddress(HIntermediateAddress * instruction)8308 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(
8309     [[maybe_unused]] HIntermediateAddress* instruction) {
8310   LOG(FATAL) << "Unreachable";
8311 }
8312 
Load32BitValue(CpuRegister dest,int32_t value)8313 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
8314   if (value == 0) {
8315     __ xorl(dest, dest);
8316   } else {
8317     __ movl(dest, Immediate(value));
8318   }
8319 }
8320 
Load64BitValue(CpuRegister dest,int64_t value)8321 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
8322   if (value == 0) {
8323     // Clears upper bits too.
8324     __ xorl(dest, dest);
8325   } else if (IsUint<32>(value)) {
8326     // We can use a 32 bit move, as it will zero-extend and is shorter.
8327     __ movl(dest, Immediate(static_cast<int32_t>(value)));
8328   } else {
8329     __ movq(dest, Immediate(value));
8330   }
8331 }
8332 
Load32BitValue(XmmRegister dest,int32_t value)8333 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
8334   if (value == 0) {
8335     __ xorps(dest, dest);
8336   } else {
8337     __ movss(dest, LiteralInt32Address(value));
8338   }
8339 }
8340 
Load64BitValue(XmmRegister dest,int64_t value)8341 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
8342   if (value == 0) {
8343     __ xorpd(dest, dest);
8344   } else {
8345     __ movsd(dest, LiteralInt64Address(value));
8346   }
8347 }
8348 
Load32BitValue(XmmRegister dest,float value)8349 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
8350   Load32BitValue(dest, bit_cast<int32_t, float>(value));
8351 }
8352 
Load64BitValue(XmmRegister dest,double value)8353 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
8354   Load64BitValue(dest, bit_cast<int64_t, double>(value));
8355 }
8356 
Compare32BitValue(CpuRegister dest,int32_t value)8357 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
8358   if (value == 0) {
8359     __ testl(dest, dest);
8360   } else {
8361     __ cmpl(dest, Immediate(value));
8362   }
8363 }
8364 
Compare64BitValue(CpuRegister dest,int64_t value)8365 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
8366   if (IsInt<32>(value)) {
8367     if (value == 0) {
8368       __ testq(dest, dest);
8369     } else {
8370       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
8371     }
8372   } else {
8373     // Value won't fit in an int.
8374     __ cmpq(dest, LiteralInt64Address(value));
8375   }
8376 }
8377 
GenerateIntCompare(Location lhs,Location rhs)8378 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
8379   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8380   GenerateIntCompare(lhs_reg, rhs);
8381 }
8382 
GenerateIntCompare(CpuRegister lhs,Location rhs)8383 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
8384   if (rhs.IsConstant()) {
8385     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8386     Compare32BitValue(lhs, value);
8387   } else if (rhs.IsStackSlot()) {
8388     __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8389   } else {
8390     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
8391   }
8392 }
8393 
GenerateLongCompare(Location lhs,Location rhs)8394 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
8395   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8396   if (rhs.IsConstant()) {
8397     int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
8398     Compare64BitValue(lhs_reg, value);
8399   } else if (rhs.IsDoubleStackSlot()) {
8400     __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8401   } else {
8402     __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
8403   }
8404 }
8405 
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)8406 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
8407                                           Location index,
8408                                           ScaleFactor scale,
8409                                           uint32_t data_offset) {
8410   return index.IsConstant()
8411       ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
8412       : Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
8413 }
8414 
Store64BitValueToStack(Location dest,int64_t value)8415 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
8416   DCHECK(dest.IsDoubleStackSlot());
8417   if (IsInt<32>(value)) {
8418     // Can move directly as an int32 constant.
8419     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
8420             Immediate(static_cast<int32_t>(value)));
8421   } else {
8422     Load64BitValue(CpuRegister(TMP), value);
8423     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
8424   }
8425 }
8426 
8427 /**
8428  * Class to handle late fixup of offsets into constant area.
8429  */
8430 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8431  public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)8432   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
8433       : codegen_(&codegen), offset_into_constant_area_(offset) {}
8434 
8435  protected:
SetOffset(size_t offset)8436   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8437 
8438   CodeGeneratorX86_64* codegen_;
8439 
8440  private:
Process(const MemoryRegion & region,int pos)8441   void Process(const MemoryRegion& region, int pos) override {
8442     // Patch the correct offset for the instruction.  We use the address of the
8443     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
8444     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8445     int32_t relative_position = constant_offset - pos;
8446 
8447     // Patch in the right value.
8448     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8449   }
8450 
8451   // Location in constant area that the fixup refers to.
8452   size_t offset_into_constant_area_;
8453 };
8454 
8455 /**
8456  t * Class to handle late fixup of offsets to a jump table that will be created in the
8457  * constant area.
8458  */
8459 class JumpTableRIPFixup : public RIPFixup {
8460  public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)8461   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
8462       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
8463 
CreateJumpTable()8464   void CreateJumpTable() {
8465     X86_64Assembler* assembler = codegen_->GetAssembler();
8466 
8467     // Ensure that the reference to the jump table has the correct offset.
8468     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8469     SetOffset(offset_in_constant_table);
8470 
8471     // Compute the offset from the start of the function to this jump table.
8472     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
8473 
8474     // Populate the jump table with the correct values for the jump table.
8475     int32_t num_entries = switch_instr_->GetNumEntries();
8476     HBasicBlock* block = switch_instr_->GetBlock();
8477     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8478     // The value that we want is the target offset - the position of the table.
8479     for (int32_t i = 0; i < num_entries; i++) {
8480       HBasicBlock* b = successors[i];
8481       Label* l = codegen_->GetLabelOf(b);
8482       DCHECK(l->IsBound());
8483       int32_t offset_to_block = l->Position() - current_table_offset;
8484       assembler->AppendInt32(offset_to_block);
8485     }
8486   }
8487 
8488  private:
8489   const HPackedSwitch* switch_instr_;
8490 };
8491 
Finalize()8492 void CodeGeneratorX86_64::Finalize() {
8493   // Generate the constant area if needed.
8494   X86_64Assembler* assembler = GetAssembler();
8495   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8496     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
8497     assembler->Align(4, 0);
8498     constant_area_start_ = assembler->CodeSize();
8499 
8500     // Populate any jump tables.
8501     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8502       jump_table->CreateJumpTable();
8503     }
8504 
8505     // And now add the constant area to the generated code.
8506     assembler->AddConstantArea();
8507   }
8508 
8509   // And finish up.
8510   CodeGenerator::Finalize();
8511 }
8512 
LiteralDoubleAddress(double v)8513 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
8514   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
8515   return Address::RIP(fixup);
8516 }
8517 
LiteralFloatAddress(float v)8518 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
8519   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
8520   return Address::RIP(fixup);
8521 }
8522 
LiteralInt32Address(int32_t v)8523 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
8524   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
8525   return Address::RIP(fixup);
8526 }
8527 
LiteralInt64Address(int64_t v)8528 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
8529   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
8530   return Address::RIP(fixup);
8531 }
8532 
8533 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)8534 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
8535   if (!trg.IsValid()) {
8536     DCHECK_EQ(type, DataType::Type::kVoid);
8537     return;
8538   }
8539 
8540   DCHECK_NE(type, DataType::Type::kVoid);
8541 
8542   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
8543   if (trg.Equals(return_loc)) {
8544     return;
8545   }
8546 
8547   // Let the parallel move resolver take care of all of this.
8548   HParallelMove parallel_move(GetGraph()->GetAllocator());
8549   parallel_move.AddMove(return_loc, trg, type, nullptr);
8550   GetMoveResolver()->EmitNativeCode(&parallel_move);
8551 }
8552 
LiteralCaseTable(HPackedSwitch * switch_instr)8553 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
8554   // Create a fixup to be used to create and address the jump table.
8555   JumpTableRIPFixup* table_fixup =
8556       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8557 
8558   // We have to populate the jump tables.
8559   fixups_to_jump_tables_.push_back(table_fixup);
8560   return Address::RIP(table_fixup);
8561 }
8562 
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)8563 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
8564                                              const Address& addr_high,
8565                                              int64_t v,
8566                                              HInstruction* instruction) {
8567   if (IsInt<32>(v)) {
8568     int32_t v_32 = v;
8569     __ movq(addr_low, Immediate(v_32));
8570     MaybeRecordImplicitNullCheck(instruction);
8571   } else {
8572     // Didn't fit in a register.  Do it in pieces.
8573     int32_t low_v = Low32Bits(v);
8574     int32_t high_v = High32Bits(v);
8575     __ movl(addr_low, Immediate(low_v));
8576     MaybeRecordImplicitNullCheck(instruction);
8577     __ movl(addr_high, Immediate(high_v));
8578   }
8579 }
8580 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8581 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
8582                                           const uint8_t* roots_data,
8583                                           const PatchInfo<Label>& info,
8584                                           uint64_t index_in_table) const {
8585   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8586   uintptr_t address =
8587       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8588   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8589   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8590       dchecked_integral_cast<uint32_t>(address);
8591 }
8592 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8593 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8594   for (const PatchInfo<Label>& info : jit_string_patches_) {
8595     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8596     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8597     PatchJitRootUse(code, roots_data, info, index_in_table);
8598   }
8599 
8600   for (const PatchInfo<Label>& info : jit_class_patches_) {
8601     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8602     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8603     PatchJitRootUse(code, roots_data, info, index_in_table);
8604   }
8605 
8606   for (const PatchInfo<Label>& info : jit_method_type_patches_) {
8607     ProtoReference proto_reference(info.target_dex_file, dex::ProtoIndex(info.offset_or_index));
8608     uint64_t index_in_table = GetJitMethodTypeRootIndex(proto_reference);
8609     PatchJitRootUse(code, roots_data, info, index_in_table);
8610   }
8611 }
8612 
CpuHasAvxFeatureFlag()8613 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
8614   return codegen_->GetInstructionSetFeatures().HasAVX();
8615 }
8616 
CpuHasAvx2FeatureFlag()8617 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
8618   return codegen_->GetInstructionSetFeatures().HasAVX2();
8619 }
8620 
CpuHasAvxFeatureFlag()8621 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
8622   return codegen_->GetInstructionSetFeatures().HasAVX();
8623 }
8624 
CpuHasAvx2FeatureFlag()8625 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
8626   return codegen_->GetInstructionSetFeatures().HasAVX2();
8627 }
8628 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8629 void LocationsBuilderX86_64::VisitBitwiseNegatedRight(
8630     [[maybe_unused]] HBitwiseNegatedRight* instruction) {
8631   LOG(FATAL) << "Unimplemented";
8632 }
8633 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8634 void InstructionCodeGeneratorX86_64::VisitBitwiseNegatedRight(
8635     [[maybe_unused]] HBitwiseNegatedRight* instruction) {
8636   LOG(FATAL) << "Unimplemented";
8637 }
8638 
8639 #undef __
8640 
8641 }  // namespace x86_64
8642 }  // namespace art
8643