• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "art_method.h"
20 #include "code_generator_utils.h"
21 #include "compiled_method.h"
22 #include "entrypoints/quick/quick_entrypoints.h"
23 #include "gc/accounting/card_table.h"
24 #include "intrinsics.h"
25 #include "intrinsics_x86_64.h"
26 #include "mirror/array-inl.h"
27 #include "mirror/class-inl.h"
28 #include "mirror/object_reference.h"
29 #include "thread.h"
30 #include "utils/assembler.h"
31 #include "utils/stack_checks.h"
32 #include "utils/x86_64/assembler_x86_64.h"
33 #include "utils/x86_64/managed_register_x86_64.h"
34 
35 namespace art {
36 
37 template<class MirrorType>
38 class GcRoot;
39 
40 namespace x86_64 {
41 
42 static constexpr int kCurrentMethodStackOffset = 0;
43 static constexpr Register kMethodRegisterArgument = RDI;
44 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
45 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
46 // generates less code/data with a small num_entries.
47 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
48 
49 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
50 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
51 
52 static constexpr int kC2ConditionMask = 0x400;
53 
54 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->
55 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
56 
57 class NullCheckSlowPathX86_64 : public SlowPathCode {
58  public:
NullCheckSlowPathX86_64(HNullCheck * instruction)59   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
60 
EmitNativeCode(CodeGenerator * codegen)61   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
62     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
63     __ Bind(GetEntryLabel());
64     if (instruction_->CanThrowIntoCatchBlock()) {
65       // Live registers will be restored in the catch block if caught.
66       SaveLiveRegisters(codegen, instruction_->GetLocations());
67     }
68     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
69                                   instruction_,
70                                   instruction_->GetDexPc(),
71                                   this);
72     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
73   }
74 
IsFatal() const75   bool IsFatal() const OVERRIDE { return true; }
76 
GetDescription() const77   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
78 
79  private:
80   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
81 };
82 
83 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
84  public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)85   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
86 
EmitNativeCode(CodeGenerator * codegen)87   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
88     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
89     __ Bind(GetEntryLabel());
90     if (instruction_->CanThrowIntoCatchBlock()) {
91       // Live registers will be restored in the catch block if caught.
92       SaveLiveRegisters(codegen, instruction_->GetLocations());
93     }
94     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
95                                   instruction_,
96                                   instruction_->GetDexPc(),
97                                   this);
98     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
99   }
100 
IsFatal() const101   bool IsFatal() const OVERRIDE { return true; }
102 
GetDescription() const103   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
104 
105  private:
106   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
107 };
108 
109 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
110  public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,Primitive::Type type,bool is_div)111   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div)
112       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
113 
EmitNativeCode(CodeGenerator * codegen)114   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
115     __ Bind(GetEntryLabel());
116     if (type_ == Primitive::kPrimInt) {
117       if (is_div_) {
118         __ negl(cpu_reg_);
119       } else {
120         __ xorl(cpu_reg_, cpu_reg_);
121       }
122 
123     } else {
124       DCHECK_EQ(Primitive::kPrimLong, type_);
125       if (is_div_) {
126         __ negq(cpu_reg_);
127       } else {
128         __ xorl(cpu_reg_, cpu_reg_);
129       }
130     }
131     __ jmp(GetExitLabel());
132   }
133 
GetDescription() const134   const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
135 
136  private:
137   const CpuRegister cpu_reg_;
138   const Primitive::Type type_;
139   const bool is_div_;
140   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
141 };
142 
143 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
144  public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)145   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
146       : SlowPathCode(instruction), successor_(successor) {}
147 
EmitNativeCode(CodeGenerator * codegen)148   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
149     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
150     __ Bind(GetEntryLabel());
151     SaveLiveRegisters(codegen, instruction_->GetLocations());
152     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
153                                   instruction_,
154                                   instruction_->GetDexPc(),
155                                   this);
156     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
157     RestoreLiveRegisters(codegen, instruction_->GetLocations());
158     if (successor_ == nullptr) {
159       __ jmp(GetReturnLabel());
160     } else {
161       __ jmp(x86_64_codegen->GetLabelOf(successor_));
162     }
163   }
164 
GetReturnLabel()165   Label* GetReturnLabel() {
166     DCHECK(successor_ == nullptr);
167     return &return_label_;
168   }
169 
GetSuccessor() const170   HBasicBlock* GetSuccessor() const {
171     return successor_;
172   }
173 
GetDescription() const174   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
175 
176  private:
177   HBasicBlock* const successor_;
178   Label return_label_;
179 
180   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
181 };
182 
183 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
184  public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)185   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
186     : SlowPathCode(instruction) {}
187 
EmitNativeCode(CodeGenerator * codegen)188   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
189     LocationSummary* locations = instruction_->GetLocations();
190     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
191     __ Bind(GetEntryLabel());
192     if (instruction_->CanThrowIntoCatchBlock()) {
193       // Live registers will be restored in the catch block if caught.
194       SaveLiveRegisters(codegen, instruction_->GetLocations());
195     }
196     // We're moving two locations to locations that could overlap, so we need a parallel
197     // move resolver.
198     InvokeRuntimeCallingConvention calling_convention;
199     codegen->EmitParallelMoves(
200         locations->InAt(0),
201         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
202         Primitive::kPrimInt,
203         locations->InAt(1),
204         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
205         Primitive::kPrimInt);
206     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
207                                   instruction_,
208                                   instruction_->GetDexPc(),
209                                   this);
210     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
211   }
212 
IsFatal() const213   bool IsFatal() const OVERRIDE { return true; }
214 
GetDescription() const215   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
216 
217  private:
218   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
219 };
220 
221 class LoadClassSlowPathX86_64 : public SlowPathCode {
222  public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)223   LoadClassSlowPathX86_64(HLoadClass* cls,
224                           HInstruction* at,
225                           uint32_t dex_pc,
226                           bool do_clinit)
227       : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
228     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
229   }
230 
EmitNativeCode(CodeGenerator * codegen)231   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
232     LocationSummary* locations = at_->GetLocations();
233     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
234     __ Bind(GetEntryLabel());
235 
236     SaveLiveRegisters(codegen, locations);
237 
238     InvokeRuntimeCallingConvention calling_convention;
239     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
240     x86_64_codegen->InvokeRuntime(do_clinit_ ?
241                                       QUICK_ENTRY_POINT(pInitializeStaticStorage) :
242                                       QUICK_ENTRY_POINT(pInitializeType),
243                                   at_,
244                                   dex_pc_,
245                                   this);
246     if (do_clinit_) {
247       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
248     } else {
249       CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
250     }
251 
252     Location out = locations->Out();
253     // Move the class to the desired location.
254     if (out.IsValid()) {
255       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
256       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
257     }
258 
259     RestoreLiveRegisters(codegen, locations);
260     __ jmp(GetExitLabel());
261   }
262 
GetDescription() const263   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
264 
265  private:
266   // The class this slow path will load.
267   HLoadClass* const cls_;
268 
269   // The instruction where this slow path is happening.
270   // (Might be the load class or an initialization check).
271   HInstruction* const at_;
272 
273   // The dex PC of `at_`.
274   const uint32_t dex_pc_;
275 
276   // Whether to initialize the class.
277   const bool do_clinit_;
278 
279   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
280 };
281 
282 class LoadStringSlowPathX86_64 : public SlowPathCode {
283  public:
LoadStringSlowPathX86_64(HLoadString * instruction)284   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
285 
EmitNativeCode(CodeGenerator * codegen)286   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
287     LocationSummary* locations = instruction_->GetLocations();
288     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
289 
290     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
291     __ Bind(GetEntryLabel());
292     SaveLiveRegisters(codegen, locations);
293 
294     InvokeRuntimeCallingConvention calling_convention;
295     const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
296     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
297     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
298                                   instruction_,
299                                   instruction_->GetDexPc(),
300                                   this);
301     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
302     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
303     RestoreLiveRegisters(codegen, locations);
304     __ jmp(GetExitLabel());
305   }
306 
GetDescription() const307   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
308 
309  private:
310   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
311 };
312 
313 class TypeCheckSlowPathX86_64 : public SlowPathCode {
314  public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)315   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
316       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
317 
EmitNativeCode(CodeGenerator * codegen)318   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
319     LocationSummary* locations = instruction_->GetLocations();
320     Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
321                                                         : locations->Out();
322     uint32_t dex_pc = instruction_->GetDexPc();
323     DCHECK(instruction_->IsCheckCast()
324            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
325 
326     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
327     __ Bind(GetEntryLabel());
328 
329     if (!is_fatal_) {
330       SaveLiveRegisters(codegen, locations);
331     }
332 
333     // We're moving two locations to locations that could overlap, so we need a parallel
334     // move resolver.
335     InvokeRuntimeCallingConvention calling_convention;
336     codegen->EmitParallelMoves(
337         locations->InAt(1),
338         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
339         Primitive::kPrimNot,
340         object_class,
341         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
342         Primitive::kPrimNot);
343 
344     if (instruction_->IsInstanceOf()) {
345       x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
346                                     instruction_,
347                                     dex_pc,
348                                     this);
349       CheckEntrypointTypes<
350           kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
351     } else {
352       DCHECK(instruction_->IsCheckCast());
353       x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
354                                     instruction_,
355                                     dex_pc,
356                                     this);
357       CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
358     }
359 
360     if (!is_fatal_) {
361       if (instruction_->IsInstanceOf()) {
362         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
363       }
364 
365       RestoreLiveRegisters(codegen, locations);
366       __ jmp(GetExitLabel());
367     }
368   }
369 
GetDescription() const370   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
371 
IsFatal() const372   bool IsFatal() const OVERRIDE { return is_fatal_; }
373 
374  private:
375   const bool is_fatal_;
376 
377   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
378 };
379 
380 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
381  public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)382   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
383       : SlowPathCode(instruction) {}
384 
EmitNativeCode(CodeGenerator * codegen)385   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
386     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
387     __ Bind(GetEntryLabel());
388     SaveLiveRegisters(codegen, instruction_->GetLocations());
389     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
390                                   instruction_,
391                                   instruction_->GetDexPc(),
392                                   this);
393     CheckEntrypointTypes<kQuickDeoptimize, void, void>();
394   }
395 
GetDescription() const396   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
397 
398  private:
399   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
400 };
401 
402 class ArraySetSlowPathX86_64 : public SlowPathCode {
403  public:
ArraySetSlowPathX86_64(HInstruction * instruction)404   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
405 
EmitNativeCode(CodeGenerator * codegen)406   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
407     LocationSummary* locations = instruction_->GetLocations();
408     __ Bind(GetEntryLabel());
409     SaveLiveRegisters(codegen, locations);
410 
411     InvokeRuntimeCallingConvention calling_convention;
412     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
413     parallel_move.AddMove(
414         locations->InAt(0),
415         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
416         Primitive::kPrimNot,
417         nullptr);
418     parallel_move.AddMove(
419         locations->InAt(1),
420         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
421         Primitive::kPrimInt,
422         nullptr);
423     parallel_move.AddMove(
424         locations->InAt(2),
425         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
426         Primitive::kPrimNot,
427         nullptr);
428     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
429 
430     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
431     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
432                                   instruction_,
433                                   instruction_->GetDexPc(),
434                                   this);
435     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
436     RestoreLiveRegisters(codegen, locations);
437     __ jmp(GetExitLabel());
438   }
439 
GetDescription() const440   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
441 
442  private:
443   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
444 };
445 
446 // Slow path marking an object during a read barrier.
447 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
448  public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location out,Location obj)449   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
450       : SlowPathCode(instruction), out_(out), obj_(obj) {
451     DCHECK(kEmitCompilerReadBarrier);
452   }
453 
GetDescription() const454   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
455 
EmitNativeCode(CodeGenerator * codegen)456   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
457     LocationSummary* locations = instruction_->GetLocations();
458     Register reg_out = out_.AsRegister<Register>();
459     DCHECK(locations->CanCall());
460     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
461     DCHECK(instruction_->IsInstanceFieldGet() ||
462            instruction_->IsStaticFieldGet() ||
463            instruction_->IsArrayGet() ||
464            instruction_->IsLoadClass() ||
465            instruction_->IsLoadString() ||
466            instruction_->IsInstanceOf() ||
467            instruction_->IsCheckCast())
468         << "Unexpected instruction in read barrier marking slow path: "
469         << instruction_->DebugName();
470 
471     __ Bind(GetEntryLabel());
472     SaveLiveRegisters(codegen, locations);
473 
474     InvokeRuntimeCallingConvention calling_convention;
475     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
476     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
477     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
478                                instruction_,
479                                instruction_->GetDexPc(),
480                                this);
481     CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
482     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
483 
484     RestoreLiveRegisters(codegen, locations);
485     __ jmp(GetExitLabel());
486   }
487 
488  private:
489   const Location out_;
490   const Location obj_;
491 
492   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
493 };
494 
495 // Slow path generating a read barrier for a heap reference.
496 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
497  public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)498   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
499                                             Location out,
500                                             Location ref,
501                                             Location obj,
502                                             uint32_t offset,
503                                             Location index)
504       : SlowPathCode(instruction),
505         out_(out),
506         ref_(ref),
507         obj_(obj),
508         offset_(offset),
509         index_(index) {
510     DCHECK(kEmitCompilerReadBarrier);
511     // If `obj` is equal to `out` or `ref`, it means the initial
512     // object has been overwritten by (or after) the heap object
513     // reference load to be instrumented, e.g.:
514     //
515     //   __ movl(out, Address(out, offset));
516     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
517     //
518     // In that case, we have lost the information about the original
519     // object, and the emitted read barrier cannot work properly.
520     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
521     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
522 }
523 
EmitNativeCode(CodeGenerator * codegen)524   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
525     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
526     LocationSummary* locations = instruction_->GetLocations();
527     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
528     DCHECK(locations->CanCall());
529     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
530     DCHECK(!instruction_->IsInvoke() ||
531            (instruction_->IsInvokeStaticOrDirect() &&
532             instruction_->GetLocations()->Intrinsified()))
533         << "Unexpected instruction in read barrier for heap reference slow path: "
534         << instruction_->DebugName();
535 
536     __ Bind(GetEntryLabel());
537     SaveLiveRegisters(codegen, locations);
538 
539     // We may have to change the index's value, but as `index_` is a
540     // constant member (like other "inputs" of this slow path),
541     // introduce a copy of it, `index`.
542     Location index = index_;
543     if (index_.IsValid()) {
544       // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
545       if (instruction_->IsArrayGet()) {
546         // Compute real offset and store it in index_.
547         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
548         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
549         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
550           // We are about to change the value of `index_reg` (see the
551           // calls to art::x86_64::X86_64Assembler::shll and
552           // art::x86_64::X86_64Assembler::AddImmediate below), but it
553           // has not been saved by the previous call to
554           // art::SlowPathCode::SaveLiveRegisters, as it is a
555           // callee-save register --
556           // art::SlowPathCode::SaveLiveRegisters does not consider
557           // callee-save registers, as it has been designed with the
558           // assumption that callee-save registers are supposed to be
559           // handled by the called function.  So, as a callee-save
560           // register, `index_reg` _would_ eventually be saved onto
561           // the stack, but it would be too late: we would have
562           // changed its value earlier.  Therefore, we manually save
563           // it here into another freely available register,
564           // `free_reg`, chosen of course among the caller-save
565           // registers (as a callee-save `free_reg` register would
566           // exhibit the same problem).
567           //
568           // Note we could have requested a temporary register from
569           // the register allocator instead; but we prefer not to, as
570           // this is a slow path, and we know we can find a
571           // caller-save register that is available.
572           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
573           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
574           index_reg = free_reg;
575           index = Location::RegisterLocation(index_reg);
576         } else {
577           // The initial register stored in `index_` has already been
578           // saved in the call to art::SlowPathCode::SaveLiveRegisters
579           // (as it is not a callee-save register), so we can freely
580           // use it.
581         }
582         // Shifting the index value contained in `index_reg` by the
583         // scale factor (2) cannot overflow in practice, as the
584         // runtime is unable to allocate object arrays with a size
585         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
586         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
587         static_assert(
588             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
589             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
590         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
591       } else {
592         DCHECK(instruction_->IsInvoke());
593         DCHECK(instruction_->GetLocations()->Intrinsified());
594         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
595                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
596             << instruction_->AsInvoke()->GetIntrinsic();
597         DCHECK_EQ(offset_, 0U);
598         DCHECK(index_.IsRegister());
599       }
600     }
601 
602     // We're moving two or three locations to locations that could
603     // overlap, so we need a parallel move resolver.
604     InvokeRuntimeCallingConvention calling_convention;
605     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
606     parallel_move.AddMove(ref_,
607                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
608                           Primitive::kPrimNot,
609                           nullptr);
610     parallel_move.AddMove(obj_,
611                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
612                           Primitive::kPrimNot,
613                           nullptr);
614     if (index.IsValid()) {
615       parallel_move.AddMove(index,
616                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
617                             Primitive::kPrimInt,
618                             nullptr);
619       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
620     } else {
621       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
622       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
623     }
624     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
625                                   instruction_,
626                                   instruction_->GetDexPc(),
627                                   this);
628     CheckEntrypointTypes<
629         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
630     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
631 
632     RestoreLiveRegisters(codegen, locations);
633     __ jmp(GetExitLabel());
634   }
635 
GetDescription() const636   const char* GetDescription() const OVERRIDE {
637     return "ReadBarrierForHeapReferenceSlowPathX86_64";
638   }
639 
640  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)641   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
642     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
643     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
644     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
645       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
646         return static_cast<CpuRegister>(i);
647       }
648     }
649     // We shall never fail to find a free caller-save register, as
650     // there are more than two core caller-save registers on x86-64
651     // (meaning it is possible to find one which is different from
652     // `ref` and `obj`).
653     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
654     LOG(FATAL) << "Could not find a free caller-save register";
655     UNREACHABLE();
656   }
657 
658   const Location out_;
659   const Location ref_;
660   const Location obj_;
661   const uint32_t offset_;
662   // An additional location containing an index to an array.
663   // Only used for HArrayGet and the UnsafeGetObject &
664   // UnsafeGetObjectVolatile intrinsics.
665   const Location index_;
666 
667   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
668 };
669 
670 // Slow path generating a read barrier for a GC root.
671 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
672  public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)673   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
674       : SlowPathCode(instruction), out_(out), root_(root) {
675     DCHECK(kEmitCompilerReadBarrier);
676   }
677 
EmitNativeCode(CodeGenerator * codegen)678   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
679     LocationSummary* locations = instruction_->GetLocations();
680     DCHECK(locations->CanCall());
681     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
682     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
683         << "Unexpected instruction in read barrier for GC root slow path: "
684         << instruction_->DebugName();
685 
686     __ Bind(GetEntryLabel());
687     SaveLiveRegisters(codegen, locations);
688 
689     InvokeRuntimeCallingConvention calling_convention;
690     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
691     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
692     x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
693                                   instruction_,
694                                   instruction_->GetDexPc(),
695                                   this);
696     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
697     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
698 
699     RestoreLiveRegisters(codegen, locations);
700     __ jmp(GetExitLabel());
701   }
702 
GetDescription() const703   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
704 
705  private:
706   const Location out_;
707   const Location root_;
708 
709   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
710 };
711 
712 #undef __
713 #define __ down_cast<X86_64Assembler*>(GetAssembler())->
714 
X86_64IntegerCondition(IfCondition cond)715 inline Condition X86_64IntegerCondition(IfCondition cond) {
716   switch (cond) {
717     case kCondEQ: return kEqual;
718     case kCondNE: return kNotEqual;
719     case kCondLT: return kLess;
720     case kCondLE: return kLessEqual;
721     case kCondGT: return kGreater;
722     case kCondGE: return kGreaterEqual;
723     case kCondB:  return kBelow;
724     case kCondBE: return kBelowEqual;
725     case kCondA:  return kAbove;
726     case kCondAE: return kAboveEqual;
727   }
728   LOG(FATAL) << "Unreachable";
729   UNREACHABLE();
730 }
731 
732 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)733 inline Condition X86_64FPCondition(IfCondition cond) {
734   switch (cond) {
735     case kCondEQ: return kEqual;
736     case kCondNE: return kNotEqual;
737     case kCondLT: return kBelow;
738     case kCondLE: return kBelowEqual;
739     case kCondGT: return kAbove;
740     case kCondGE: return kAboveEqual;
741     default:      break;  // should not happen
742   };
743   LOG(FATAL) << "Unreachable";
744   UNREACHABLE();
745 }
746 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,MethodReference target_method ATTRIBUTE_UNUSED)747 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
748       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
749       MethodReference target_method ATTRIBUTE_UNUSED) {
750   switch (desired_dispatch_info.code_ptr_location) {
751     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
752     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
753       // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
754       return HInvokeStaticOrDirect::DispatchInfo {
755         desired_dispatch_info.method_load_kind,
756         HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
757         desired_dispatch_info.method_load_data,
758         0u
759       };
760     default:
761       return desired_dispatch_info;
762   }
763 }
764 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)765 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
766                                                      Location temp) {
767   // All registers are assumed to be correctly set up.
768 
769   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
770   switch (invoke->GetMethodLoadKind()) {
771     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
772       // temp = thread->string_init_entrypoint
773       __ gs()->movq(temp.AsRegister<CpuRegister>(),
774                     Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
775       break;
776     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
777       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
778       break;
779     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
780       __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
781       break;
782     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
783       __ movl(temp.AsRegister<CpuRegister>(), Immediate(0));  // Placeholder.
784       method_patches_.emplace_back(invoke->GetTargetMethod());
785       __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
786       break;
787     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
788       __ movq(temp.AsRegister<CpuRegister>(),
789               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
790       // Bind a new fixup label at the end of the "movl" insn.
791       uint32_t offset = invoke->GetDexCacheArrayOffset();
792       __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
793       break;
794     }
795     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
796       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
797       Register method_reg;
798       CpuRegister reg = temp.AsRegister<CpuRegister>();
799       if (current_method.IsRegister()) {
800         method_reg = current_method.AsRegister<Register>();
801       } else {
802         DCHECK(invoke->GetLocations()->Intrinsified());
803         DCHECK(!current_method.IsValid());
804         method_reg = reg.AsRegister();
805         __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
806       }
807       // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
808       __ movq(reg,
809               Address(CpuRegister(method_reg),
810                       ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
811       // temp = temp[index_in_cache];
812       // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
813       uint32_t index_in_cache = invoke->GetDexMethodIndex();
814       __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
815       break;
816     }
817   }
818 
819   switch (invoke->GetCodePtrLocation()) {
820     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
821       __ call(&frame_entry_label_);
822       break;
823     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
824       relative_call_patches_.emplace_back(invoke->GetTargetMethod());
825       Label* label = &relative_call_patches_.back().label;
826       __ call(label);  // Bind to the patch label, override at link time.
827       __ Bind(label);  // Bind the label at the end of the "call" insn.
828       break;
829     }
830     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
831     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
832       // Filtered out by GetSupportedInvokeStaticOrDirectDispatch().
833       LOG(FATAL) << "Unsupported";
834       UNREACHABLE();
835     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
836       // (callee_method + offset_of_quick_compiled_code)()
837       __ call(Address(callee_method.AsRegister<CpuRegister>(),
838                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
839                           kX86_64WordSize).SizeValue()));
840       break;
841   }
842 
843   DCHECK(!IsLeafMethod());
844 }
845 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in)846 void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
847   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
848   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
849       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
850 
851   // Use the calling convention instead of the location of the receiver, as
852   // intrinsics may have put the receiver in a different register. In the intrinsics
853   // slow path, the arguments have been moved to the right place, so here we are
854   // guaranteed that the receiver is the first register of the calling convention.
855   InvokeDexCallingConvention calling_convention;
856   Register receiver = calling_convention.GetRegisterAt(0);
857 
858   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
859   // /* HeapReference<Class> */ temp = receiver->klass_
860   __ movl(temp, Address(CpuRegister(receiver), class_offset));
861   MaybeRecordImplicitNullCheck(invoke);
862   // Instead of simply (possibly) unpoisoning `temp` here, we should
863   // emit a read barrier for the previous class reference load.
864   // However this is not required in practice, as this is an
865   // intermediate/temporary reference and because the current
866   // concurrent copying collector keeps the from-space memory
867   // intact/accessible until the end of the marking phase (the
868   // concurrent copying collector may not in the future).
869   __ MaybeUnpoisonHeapReference(temp);
870   // temp = temp->GetMethodAt(method_offset);
871   __ movq(temp, Address(temp, method_offset));
872   // call temp->GetEntryPoint();
873   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
874       kX86_64WordSize).SizeValue()));
875 }
876 
RecordSimplePatch()877 void CodeGeneratorX86_64::RecordSimplePatch() {
878   if (GetCompilerOptions().GetIncludePatchInformation()) {
879     simple_patches_.emplace_back();
880     __ Bind(&simple_patches_.back());
881   }
882 }
883 
RecordStringPatch(HLoadString * load_string)884 void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) {
885   string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
886   __ Bind(&string_patches_.back().label);
887 }
888 
NewPcRelativeDexCacheArrayPatch(const DexFile & dex_file,uint32_t element_offset)889 Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
890                                                             uint32_t element_offset) {
891   // Add a patch entry and return the label.
892   pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
893   return &pc_relative_dex_cache_patches_.back().label;
894 }
895 
EmitLinkerPatches(ArenaVector<LinkerPatch> * linker_patches)896 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
897   DCHECK(linker_patches->empty());
898   size_t size =
899       method_patches_.size() +
900       relative_call_patches_.size() +
901       pc_relative_dex_cache_patches_.size() +
902       simple_patches_.size() +
903       string_patches_.size();
904   linker_patches->reserve(size);
905   // The label points to the end of the "movl" insn but the literal offset for method
906   // patch needs to point to the embedded constant which occupies the last 4 bytes.
907   constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
908   for (const MethodPatchInfo<Label>& info : method_patches_) {
909     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
910     linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
911                                                        info.target_method.dex_file,
912                                                        info.target_method.dex_method_index));
913   }
914   for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
915     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
916     linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
917                                                              info.target_method.dex_file,
918                                                              info.target_method.dex_method_index));
919   }
920   for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
921     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
922     linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
923                                                               &info.target_dex_file,
924                                                               info.label.Position(),
925                                                               info.element_offset));
926   }
927   for (const Label& label : simple_patches_) {
928     uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
929     linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
930   }
931   for (const StringPatchInfo<Label>& info : string_patches_) {
932     // These are always PC-relative, see GetSupportedLoadStringKind().
933     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
934     linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
935                                                                &info.dex_file,
936                                                                info.label.Position(),
937                                                                info.string_index));
938   }
939 }
940 
DumpCoreRegister(std::ostream & stream,int reg) const941 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
942   stream << Register(reg);
943 }
944 
DumpFloatingPointRegister(std::ostream & stream,int reg) const945 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
946   stream << FloatRegister(reg);
947 }
948 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)949 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
950   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
951   return kX86_64WordSize;
952 }
953 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)954 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
955   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
956   return kX86_64WordSize;
957 }
958 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)959 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
960   __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
961   return kX86_64WordSize;
962 }
963 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)964 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
965   __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
966   return kX86_64WordSize;
967 }
968 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)969 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
970                                         HInstruction* instruction,
971                                         uint32_t dex_pc,
972                                         SlowPathCode* slow_path) {
973   InvokeRuntime(GetThreadOffset<kX86_64WordSize>(entrypoint).Int32Value(),
974                 instruction,
975                 dex_pc,
976                 slow_path);
977 }
978 
InvokeRuntime(int32_t entry_point_offset,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)979 void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset,
980                                         HInstruction* instruction,
981                                         uint32_t dex_pc,
982                                         SlowPathCode* slow_path) {
983   ValidateInvokeRuntime(instruction, slow_path);
984   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
985   RecordPcInfo(instruction, dex_pc, slow_path);
986 }
987 
988 static constexpr int kNumberOfCpuRegisterPairs = 0;
989 // Use a fake return address register to mimic Quick.
990 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const X86_64InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)991 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
992                                          const X86_64InstructionSetFeatures& isa_features,
993                                          const CompilerOptions& compiler_options,
994                                          OptimizingCompilerStats* stats)
995       : CodeGenerator(graph,
996                       kNumberOfCpuRegisters,
997                       kNumberOfFloatRegisters,
998                       kNumberOfCpuRegisterPairs,
999                       ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1000                                           arraysize(kCoreCalleeSaves))
1001                           | (1 << kFakeReturnRegister),
1002                       ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1003                                           arraysize(kFpuCalleeSaves)),
1004                       compiler_options,
1005                       stats),
1006         block_labels_(nullptr),
1007         location_builder_(graph, this),
1008         instruction_visitor_(graph, this),
1009         move_resolver_(graph->GetArena(), this),
1010         assembler_(graph->GetArena()),
1011         isa_features_(isa_features),
1012         constant_area_start_(0),
1013         method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1014         relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1015         pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1016         simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1017         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1018         fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1019   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1020 }
1021 
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1022 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1023                                                                CodeGeneratorX86_64* codegen)
1024       : InstructionCodeGenerator(graph, codegen),
1025         assembler_(codegen->GetAssembler()),
1026         codegen_(codegen) {}
1027 
SetupBlockedRegisters() const1028 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1029   // Stack register is always reserved.
1030   blocked_core_registers_[RSP] = true;
1031 
1032   // Block the register used as TMP.
1033   blocked_core_registers_[TMP] = true;
1034 }
1035 
DWARFReg(Register reg)1036 static dwarf::Reg DWARFReg(Register reg) {
1037   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1038 }
1039 
DWARFReg(FloatRegister reg)1040 static dwarf::Reg DWARFReg(FloatRegister reg) {
1041   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1042 }
1043 
GenerateFrameEntry()1044 void CodeGeneratorX86_64::GenerateFrameEntry() {
1045   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1046   __ Bind(&frame_entry_label_);
1047   bool skip_overflow_check = IsLeafMethod()
1048       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1049   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1050 
1051   if (!skip_overflow_check) {
1052     __ testq(CpuRegister(RAX), Address(
1053         CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
1054     RecordPcInfo(nullptr, 0);
1055   }
1056 
1057   if (HasEmptyFrame()) {
1058     return;
1059   }
1060 
1061   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1062     Register reg = kCoreCalleeSaves[i];
1063     if (allocated_registers_.ContainsCoreRegister(reg)) {
1064       __ pushq(CpuRegister(reg));
1065       __ cfi().AdjustCFAOffset(kX86_64WordSize);
1066       __ cfi().RelOffset(DWARFReg(reg), 0);
1067     }
1068   }
1069 
1070   int adjust = GetFrameSize() - GetCoreSpillSize();
1071   __ subq(CpuRegister(RSP), Immediate(adjust));
1072   __ cfi().AdjustCFAOffset(adjust);
1073   uint32_t xmm_spill_location = GetFpuSpillStart();
1074   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1075 
1076   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1077     if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1078       int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1079       __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1080       __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1081     }
1082   }
1083 
1084   __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1085           CpuRegister(kMethodRegisterArgument));
1086 }
1087 
GenerateFrameExit()1088 void CodeGeneratorX86_64::GenerateFrameExit() {
1089   __ cfi().RememberState();
1090   if (!HasEmptyFrame()) {
1091     uint32_t xmm_spill_location = GetFpuSpillStart();
1092     size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1093     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1094       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1095         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1096         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1097         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1098       }
1099     }
1100 
1101     int adjust = GetFrameSize() - GetCoreSpillSize();
1102     __ addq(CpuRegister(RSP), Immediate(adjust));
1103     __ cfi().AdjustCFAOffset(-adjust);
1104 
1105     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1106       Register reg = kCoreCalleeSaves[i];
1107       if (allocated_registers_.ContainsCoreRegister(reg)) {
1108         __ popq(CpuRegister(reg));
1109         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1110         __ cfi().Restore(DWARFReg(reg));
1111       }
1112     }
1113   }
1114   __ ret();
1115   __ cfi().RestoreState();
1116   __ cfi().DefCFAOffset(GetFrameSize());
1117 }
1118 
Bind(HBasicBlock * block)1119 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1120   __ Bind(GetLabelOf(block));
1121 }
1122 
Move(Location destination,Location source)1123 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1124   if (source.Equals(destination)) {
1125     return;
1126   }
1127   if (destination.IsRegister()) {
1128     CpuRegister dest = destination.AsRegister<CpuRegister>();
1129     if (source.IsRegister()) {
1130       __ movq(dest, source.AsRegister<CpuRegister>());
1131     } else if (source.IsFpuRegister()) {
1132       __ movd(dest, source.AsFpuRegister<XmmRegister>());
1133     } else if (source.IsStackSlot()) {
1134       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1135     } else if (source.IsConstant()) {
1136       HConstant* constant = source.GetConstant();
1137       if (constant->IsLongConstant()) {
1138         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1139       } else {
1140         Load32BitValue(dest, GetInt32ValueOf(constant));
1141       }
1142     } else {
1143       DCHECK(source.IsDoubleStackSlot());
1144       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1145     }
1146   } else if (destination.IsFpuRegister()) {
1147     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1148     if (source.IsRegister()) {
1149       __ movd(dest, source.AsRegister<CpuRegister>());
1150     } else if (source.IsFpuRegister()) {
1151       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1152     } else if (source.IsConstant()) {
1153       HConstant* constant = source.GetConstant();
1154       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1155       if (constant->IsFloatConstant()) {
1156         Load32BitValue(dest, static_cast<int32_t>(value));
1157       } else {
1158         Load64BitValue(dest, value);
1159       }
1160     } else if (source.IsStackSlot()) {
1161       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1162     } else {
1163       DCHECK(source.IsDoubleStackSlot());
1164       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1165     }
1166   } else if (destination.IsStackSlot()) {
1167     if (source.IsRegister()) {
1168       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1169               source.AsRegister<CpuRegister>());
1170     } else if (source.IsFpuRegister()) {
1171       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1172                source.AsFpuRegister<XmmRegister>());
1173     } else if (source.IsConstant()) {
1174       HConstant* constant = source.GetConstant();
1175       int32_t value = GetInt32ValueOf(constant);
1176       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1177     } else {
1178       DCHECK(source.IsStackSlot()) << source;
1179       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1180       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1181     }
1182   } else {
1183     DCHECK(destination.IsDoubleStackSlot());
1184     if (source.IsRegister()) {
1185       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1186               source.AsRegister<CpuRegister>());
1187     } else if (source.IsFpuRegister()) {
1188       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1189                source.AsFpuRegister<XmmRegister>());
1190     } else if (source.IsConstant()) {
1191       HConstant* constant = source.GetConstant();
1192       int64_t value;
1193       if (constant->IsDoubleConstant()) {
1194         value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
1195       } else {
1196         DCHECK(constant->IsLongConstant());
1197         value = constant->AsLongConstant()->GetValue();
1198       }
1199       Store64BitValueToStack(destination, value);
1200     } else {
1201       DCHECK(source.IsDoubleStackSlot());
1202       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1203       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1204     }
1205   }
1206 }
1207 
MoveConstant(Location location,int32_t value)1208 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1209   DCHECK(location.IsRegister());
1210   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1211 }
1212 
MoveLocation(Location dst,Location src,Primitive::Type dst_type ATTRIBUTE_UNUSED)1213 void CodeGeneratorX86_64::MoveLocation(
1214     Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) {
1215   Move(dst, src);
1216 }
1217 
AddLocationAsTemp(Location location,LocationSummary * locations)1218 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1219   if (location.IsRegister()) {
1220     locations->AddTemp(location);
1221   } else {
1222     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1223   }
1224 }
1225 
HandleGoto(HInstruction * got,HBasicBlock * successor)1226 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1227   DCHECK(!successor->IsExitBlock());
1228 
1229   HBasicBlock* block = got->GetBlock();
1230   HInstruction* previous = got->GetPrevious();
1231 
1232   HLoopInformation* info = block->GetLoopInformation();
1233   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1234     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1235     return;
1236   }
1237 
1238   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1239     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1240   }
1241   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1242     __ jmp(codegen_->GetLabelOf(successor));
1243   }
1244 }
1245 
VisitGoto(HGoto * got)1246 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1247   got->SetLocations(nullptr);
1248 }
1249 
VisitGoto(HGoto * got)1250 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1251   HandleGoto(got, got->GetSuccessor());
1252 }
1253 
VisitTryBoundary(HTryBoundary * try_boundary)1254 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1255   try_boundary->SetLocations(nullptr);
1256 }
1257 
VisitTryBoundary(HTryBoundary * try_boundary)1258 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1259   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1260   if (!successor->IsExitBlock()) {
1261     HandleGoto(try_boundary, successor);
1262   }
1263 }
1264 
VisitExit(HExit * exit)1265 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1266   exit->SetLocations(nullptr);
1267 }
1268 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1269 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1270 }
1271 
1272 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1273 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1274                                                      LabelType* true_label,
1275                                                      LabelType* false_label) {
1276   if (cond->IsFPConditionTrueIfNaN()) {
1277     __ j(kUnordered, true_label);
1278   } else if (cond->IsFPConditionFalseIfNaN()) {
1279     __ j(kUnordered, false_label);
1280   }
1281   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1282 }
1283 
GenerateCompareTest(HCondition * condition)1284 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1285   LocationSummary* locations = condition->GetLocations();
1286 
1287   Location left = locations->InAt(0);
1288   Location right = locations->InAt(1);
1289   Primitive::Type type = condition->InputAt(0)->GetType();
1290   switch (type) {
1291     case Primitive::kPrimBoolean:
1292     case Primitive::kPrimByte:
1293     case Primitive::kPrimChar:
1294     case Primitive::kPrimShort:
1295     case Primitive::kPrimInt:
1296     case Primitive::kPrimNot: {
1297       CpuRegister left_reg = left.AsRegister<CpuRegister>();
1298       if (right.IsConstant()) {
1299         int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant());
1300         if (value == 0) {
1301           __ testl(left_reg, left_reg);
1302         } else {
1303           __ cmpl(left_reg, Immediate(value));
1304         }
1305       } else if (right.IsStackSlot()) {
1306         __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1307       } else {
1308         __ cmpl(left_reg, right.AsRegister<CpuRegister>());
1309       }
1310       break;
1311     }
1312     case Primitive::kPrimLong: {
1313       CpuRegister left_reg = left.AsRegister<CpuRegister>();
1314       if (right.IsConstant()) {
1315         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1316         codegen_->Compare64BitValue(left_reg, value);
1317       } else if (right.IsDoubleStackSlot()) {
1318         __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1319       } else {
1320         __ cmpq(left_reg, right.AsRegister<CpuRegister>());
1321       }
1322       break;
1323     }
1324     case Primitive::kPrimFloat: {
1325       if (right.IsFpuRegister()) {
1326         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1327       } else if (right.IsConstant()) {
1328         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1329                    codegen_->LiteralFloatAddress(
1330                      right.GetConstant()->AsFloatConstant()->GetValue()));
1331       } else {
1332         DCHECK(right.IsStackSlot());
1333         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1334                    Address(CpuRegister(RSP), right.GetStackIndex()));
1335       }
1336       break;
1337     }
1338     case Primitive::kPrimDouble: {
1339       if (right.IsFpuRegister()) {
1340         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1341       } else if (right.IsConstant()) {
1342         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1343                    codegen_->LiteralDoubleAddress(
1344                      right.GetConstant()->AsDoubleConstant()->GetValue()));
1345       } else {
1346         DCHECK(right.IsDoubleStackSlot());
1347         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1348                    Address(CpuRegister(RSP), right.GetStackIndex()));
1349       }
1350       break;
1351     }
1352     default:
1353       LOG(FATAL) << "Unexpected condition type " << type;
1354   }
1355 }
1356 
1357 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1358 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1359                                                                   LabelType* true_target_in,
1360                                                                   LabelType* false_target_in) {
1361   // Generated branching requires both targets to be explicit. If either of the
1362   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1363   LabelType fallthrough_target;
1364   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1365   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1366 
1367   // Generate the comparison to set the CC.
1368   GenerateCompareTest(condition);
1369 
1370   // Now generate the correct jump(s).
1371   Primitive::Type type = condition->InputAt(0)->GetType();
1372   switch (type) {
1373     case Primitive::kPrimLong: {
1374       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1375       break;
1376     }
1377     case Primitive::kPrimFloat: {
1378       GenerateFPJumps(condition, true_target, false_target);
1379       break;
1380     }
1381     case Primitive::kPrimDouble: {
1382       GenerateFPJumps(condition, true_target, false_target);
1383       break;
1384     }
1385     default:
1386       LOG(FATAL) << "Unexpected condition type " << type;
1387   }
1388 
1389   if (false_target != &fallthrough_target) {
1390     __ jmp(false_target);
1391   }
1392 
1393   if (fallthrough_target.IsLinked()) {
1394     __ Bind(&fallthrough_target);
1395   }
1396 }
1397 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1398 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1399   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1400   // are set only strictly before `branch`. We can't use the eflags on long
1401   // conditions if they are materialized due to the complex branching.
1402   return cond->IsCondition() &&
1403          cond->GetNext() == branch &&
1404          !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
1405 }
1406 
1407 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1408 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1409                                                            size_t condition_input_index,
1410                                                            LabelType* true_target,
1411                                                            LabelType* false_target) {
1412   HInstruction* cond = instruction->InputAt(condition_input_index);
1413 
1414   if (true_target == nullptr && false_target == nullptr) {
1415     // Nothing to do. The code always falls through.
1416     return;
1417   } else if (cond->IsIntConstant()) {
1418     // Constant condition, statically compared against "true" (integer value 1).
1419     if (cond->AsIntConstant()->IsTrue()) {
1420       if (true_target != nullptr) {
1421         __ jmp(true_target);
1422       }
1423     } else {
1424       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1425       if (false_target != nullptr) {
1426         __ jmp(false_target);
1427       }
1428     }
1429     return;
1430   }
1431 
1432   // The following code generates these patterns:
1433   //  (1) true_target == nullptr && false_target != nullptr
1434   //        - opposite condition true => branch to false_target
1435   //  (2) true_target != nullptr && false_target == nullptr
1436   //        - condition true => branch to true_target
1437   //  (3) true_target != nullptr && false_target != nullptr
1438   //        - condition true => branch to true_target
1439   //        - branch to false_target
1440   if (IsBooleanValueOrMaterializedCondition(cond)) {
1441     if (AreEflagsSetFrom(cond, instruction)) {
1442       if (true_target == nullptr) {
1443         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1444       } else {
1445         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1446       }
1447     } else {
1448       // Materialized condition, compare against 0.
1449       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1450       if (lhs.IsRegister()) {
1451         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1452       } else {
1453         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1454       }
1455       if (true_target == nullptr) {
1456         __ j(kEqual, false_target);
1457       } else {
1458         __ j(kNotEqual, true_target);
1459       }
1460     }
1461   } else {
1462     // Condition has not been materialized, use its inputs as the
1463     // comparison and its condition as the branch condition.
1464     HCondition* condition = cond->AsCondition();
1465 
1466     // If this is a long or FP comparison that has been folded into
1467     // the HCondition, generate the comparison directly.
1468     Primitive::Type type = condition->InputAt(0)->GetType();
1469     if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
1470       GenerateCompareTestAndBranch(condition, true_target, false_target);
1471       return;
1472     }
1473 
1474     Location lhs = condition->GetLocations()->InAt(0);
1475     Location rhs = condition->GetLocations()->InAt(1);
1476     if (rhs.IsRegister()) {
1477       __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1478     } else if (rhs.IsConstant()) {
1479       int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
1480       codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
1481     } else {
1482       __ cmpl(lhs.AsRegister<CpuRegister>(),
1483               Address(CpuRegister(RSP), rhs.GetStackIndex()));
1484     }
1485       if (true_target == nullptr) {
1486       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1487     } else {
1488       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1489     }
1490   }
1491 
1492   // If neither branch falls through (case 3), the conditional branch to `true_target`
1493   // was already emitted (case 2) and we need to emit a jump to `false_target`.
1494   if (true_target != nullptr && false_target != nullptr) {
1495     __ jmp(false_target);
1496   }
1497 }
1498 
VisitIf(HIf * if_instr)1499 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1500   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
1501   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1502     locations->SetInAt(0, Location::Any());
1503   }
1504 }
1505 
VisitIf(HIf * if_instr)1506 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1507   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1508   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1509   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1510       nullptr : codegen_->GetLabelOf(true_successor);
1511   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1512       nullptr : codegen_->GetLabelOf(false_successor);
1513   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1514 }
1515 
VisitDeoptimize(HDeoptimize * deoptimize)1516 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1517   LocationSummary* locations = new (GetGraph()->GetArena())
1518       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1519   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1520     locations->SetInAt(0, Location::Any());
1521   }
1522 }
1523 
VisitDeoptimize(HDeoptimize * deoptimize)1524 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1525   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1526   GenerateTestAndBranch<Label>(deoptimize,
1527                                /* condition_input_index */ 0,
1528                                slow_path->GetEntryLabel(),
1529                                /* false_target */ nullptr);
1530 }
1531 
SelectCanUseCMOV(HSelect * select)1532 static bool SelectCanUseCMOV(HSelect* select) {
1533   // There are no conditional move instructions for XMMs.
1534   if (Primitive::IsFloatingPointType(select->GetType())) {
1535     return false;
1536   }
1537 
1538   // A FP condition doesn't generate the single CC that we need.
1539   HInstruction* condition = select->GetCondition();
1540   if (condition->IsCondition() &&
1541       Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1542     return false;
1543   }
1544 
1545   // We can generate a CMOV for this Select.
1546   return true;
1547 }
1548 
VisitSelect(HSelect * select)1549 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1550   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
1551   if (Primitive::IsFloatingPointType(select->GetType())) {
1552     locations->SetInAt(0, Location::RequiresFpuRegister());
1553     locations->SetInAt(1, Location::Any());
1554   } else {
1555     locations->SetInAt(0, Location::RequiresRegister());
1556     if (SelectCanUseCMOV(select)) {
1557       if (select->InputAt(1)->IsConstant()) {
1558         locations->SetInAt(1, Location::RequiresRegister());
1559       } else {
1560         locations->SetInAt(1, Location::Any());
1561       }
1562     } else {
1563       locations->SetInAt(1, Location::Any());
1564     }
1565   }
1566   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1567     locations->SetInAt(2, Location::RequiresRegister());
1568   }
1569   locations->SetOut(Location::SameAsFirstInput());
1570 }
1571 
VisitSelect(HSelect * select)1572 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1573   LocationSummary* locations = select->GetLocations();
1574   if (SelectCanUseCMOV(select)) {
1575     // If both the condition and the source types are integer, we can generate
1576     // a CMOV to implement Select.
1577     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1578     Location value_true_loc = locations->InAt(1);
1579     DCHECK(locations->InAt(0).Equals(locations->Out()));
1580 
1581     HInstruction* select_condition = select->GetCondition();
1582     Condition cond = kNotEqual;
1583 
1584     // Figure out how to test the 'condition'.
1585     if (select_condition->IsCondition()) {
1586       HCondition* condition = select_condition->AsCondition();
1587       if (!condition->IsEmittedAtUseSite()) {
1588         // This was a previously materialized condition.
1589         // Can we use the existing condition code?
1590         if (AreEflagsSetFrom(condition, select)) {
1591           // Materialization was the previous instruction.  Condition codes are right.
1592           cond = X86_64IntegerCondition(condition->GetCondition());
1593         } else {
1594           // No, we have to recreate the condition code.
1595           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1596           __ testl(cond_reg, cond_reg);
1597         }
1598       } else {
1599         GenerateCompareTest(condition);
1600         cond = X86_64IntegerCondition(condition->GetCondition());
1601       }
1602     } else {
1603       // Must be a boolean condition, which needs to be compared to 0.
1604       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1605       __ testl(cond_reg, cond_reg);
1606     }
1607 
1608     // If the condition is true, overwrite the output, which already contains false.
1609     // Generate the correct sized CMOV.
1610     bool is_64_bit = Primitive::Is64BitType(select->GetType());
1611     if (value_true_loc.IsRegister()) {
1612       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1613     } else {
1614       __ cmov(cond,
1615               value_false,
1616               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1617     }
1618   } else {
1619     NearLabel false_target;
1620     GenerateTestAndBranch<NearLabel>(select,
1621                                      /* condition_input_index */ 2,
1622                                      /* true_target */ nullptr,
1623                                      &false_target);
1624     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1625     __ Bind(&false_target);
1626   }
1627 }
1628 
VisitNativeDebugInfo(HNativeDebugInfo * info)1629 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1630   new (GetGraph()->GetArena()) LocationSummary(info);
1631 }
1632 
VisitNativeDebugInfo(HNativeDebugInfo *)1633 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1634   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1635 }
1636 
GenerateNop()1637 void CodeGeneratorX86_64::GenerateNop() {
1638   __ nop();
1639 }
1640 
HandleCondition(HCondition * cond)1641 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1642   LocationSummary* locations =
1643       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
1644   // Handle the long/FP comparisons made in instruction simplification.
1645   switch (cond->InputAt(0)->GetType()) {
1646     case Primitive::kPrimLong:
1647       locations->SetInAt(0, Location::RequiresRegister());
1648       locations->SetInAt(1, Location::Any());
1649       break;
1650     case Primitive::kPrimFloat:
1651     case Primitive::kPrimDouble:
1652       locations->SetInAt(0, Location::RequiresFpuRegister());
1653       locations->SetInAt(1, Location::Any());
1654       break;
1655     default:
1656       locations->SetInAt(0, Location::RequiresRegister());
1657       locations->SetInAt(1, Location::Any());
1658       break;
1659   }
1660   if (!cond->IsEmittedAtUseSite()) {
1661     locations->SetOut(Location::RequiresRegister());
1662   }
1663 }
1664 
HandleCondition(HCondition * cond)1665 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
1666   if (cond->IsEmittedAtUseSite()) {
1667     return;
1668   }
1669 
1670   LocationSummary* locations = cond->GetLocations();
1671   Location lhs = locations->InAt(0);
1672   Location rhs = locations->InAt(1);
1673   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
1674   NearLabel true_label, false_label;
1675 
1676   switch (cond->InputAt(0)->GetType()) {
1677     default:
1678       // Integer case.
1679 
1680       // Clear output register: setcc only sets the low byte.
1681       __ xorl(reg, reg);
1682 
1683       if (rhs.IsRegister()) {
1684         __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1685       } else if (rhs.IsConstant()) {
1686         int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
1687         codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
1688       } else {
1689         __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
1690       }
1691       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1692       return;
1693     case Primitive::kPrimLong:
1694       // Clear output register: setcc only sets the low byte.
1695       __ xorl(reg, reg);
1696 
1697       if (rhs.IsRegister()) {
1698         __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1699       } else if (rhs.IsConstant()) {
1700         int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
1701         codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value);
1702       } else {
1703         __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
1704       }
1705       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1706       return;
1707     case Primitive::kPrimFloat: {
1708       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1709       if (rhs.IsConstant()) {
1710         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
1711         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
1712       } else if (rhs.IsStackSlot()) {
1713         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1714       } else {
1715         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1716       }
1717       GenerateFPJumps(cond, &true_label, &false_label);
1718       break;
1719     }
1720     case Primitive::kPrimDouble: {
1721       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1722       if (rhs.IsConstant()) {
1723         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
1724         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
1725       } else if (rhs.IsDoubleStackSlot()) {
1726         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1727       } else {
1728         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1729       }
1730       GenerateFPJumps(cond, &true_label, &false_label);
1731       break;
1732     }
1733   }
1734 
1735   // Convert the jumps into the result.
1736   NearLabel done_label;
1737 
1738   // False case: result = 0.
1739   __ Bind(&false_label);
1740   __ xorl(reg, reg);
1741   __ jmp(&done_label);
1742 
1743   // True case: result = 1.
1744   __ Bind(&true_label);
1745   __ movl(reg, Immediate(1));
1746   __ Bind(&done_label);
1747 }
1748 
VisitEqual(HEqual * comp)1749 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
1750   HandleCondition(comp);
1751 }
1752 
VisitEqual(HEqual * comp)1753 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
1754   HandleCondition(comp);
1755 }
1756 
VisitNotEqual(HNotEqual * comp)1757 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
1758   HandleCondition(comp);
1759 }
1760 
VisitNotEqual(HNotEqual * comp)1761 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
1762   HandleCondition(comp);
1763 }
1764 
VisitLessThan(HLessThan * comp)1765 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
1766   HandleCondition(comp);
1767 }
1768 
VisitLessThan(HLessThan * comp)1769 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
1770   HandleCondition(comp);
1771 }
1772 
VisitLessThanOrEqual(HLessThanOrEqual * comp)1773 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1774   HandleCondition(comp);
1775 }
1776 
VisitLessThanOrEqual(HLessThanOrEqual * comp)1777 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1778   HandleCondition(comp);
1779 }
1780 
VisitGreaterThan(HGreaterThan * comp)1781 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
1782   HandleCondition(comp);
1783 }
1784 
VisitGreaterThan(HGreaterThan * comp)1785 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
1786   HandleCondition(comp);
1787 }
1788 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1789 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1790   HandleCondition(comp);
1791 }
1792 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1793 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1794   HandleCondition(comp);
1795 }
1796 
VisitBelow(HBelow * comp)1797 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
1798   HandleCondition(comp);
1799 }
1800 
VisitBelow(HBelow * comp)1801 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
1802   HandleCondition(comp);
1803 }
1804 
VisitBelowOrEqual(HBelowOrEqual * comp)1805 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
1806   HandleCondition(comp);
1807 }
1808 
VisitBelowOrEqual(HBelowOrEqual * comp)1809 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
1810   HandleCondition(comp);
1811 }
1812 
VisitAbove(HAbove * comp)1813 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
1814   HandleCondition(comp);
1815 }
1816 
VisitAbove(HAbove * comp)1817 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
1818   HandleCondition(comp);
1819 }
1820 
VisitAboveOrEqual(HAboveOrEqual * comp)1821 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
1822   HandleCondition(comp);
1823 }
1824 
VisitAboveOrEqual(HAboveOrEqual * comp)1825 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
1826   HandleCondition(comp);
1827 }
1828 
VisitCompare(HCompare * compare)1829 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
1830   LocationSummary* locations =
1831       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
1832   switch (compare->InputAt(0)->GetType()) {
1833     case Primitive::kPrimBoolean:
1834     case Primitive::kPrimByte:
1835     case Primitive::kPrimShort:
1836     case Primitive::kPrimChar:
1837     case Primitive::kPrimInt:
1838     case Primitive::kPrimLong: {
1839       locations->SetInAt(0, Location::RequiresRegister());
1840       locations->SetInAt(1, Location::Any());
1841       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1842       break;
1843     }
1844     case Primitive::kPrimFloat:
1845     case Primitive::kPrimDouble: {
1846       locations->SetInAt(0, Location::RequiresFpuRegister());
1847       locations->SetInAt(1, Location::Any());
1848       locations->SetOut(Location::RequiresRegister());
1849       break;
1850     }
1851     default:
1852       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
1853   }
1854 }
1855 
VisitCompare(HCompare * compare)1856 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
1857   LocationSummary* locations = compare->GetLocations();
1858   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1859   Location left = locations->InAt(0);
1860   Location right = locations->InAt(1);
1861 
1862   NearLabel less, greater, done;
1863   Primitive::Type type = compare->InputAt(0)->GetType();
1864   Condition less_cond = kLess;
1865 
1866   switch (type) {
1867     case Primitive::kPrimBoolean:
1868     case Primitive::kPrimByte:
1869     case Primitive::kPrimShort:
1870     case Primitive::kPrimChar:
1871     case Primitive::kPrimInt: {
1872       CpuRegister left_reg = left.AsRegister<CpuRegister>();
1873       if (right.IsConstant()) {
1874         int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
1875         codegen_->Compare32BitValue(left_reg, value);
1876       } else if (right.IsStackSlot()) {
1877         __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1878       } else {
1879         __ cmpl(left_reg, right.AsRegister<CpuRegister>());
1880       }
1881       break;
1882     }
1883     case Primitive::kPrimLong: {
1884       CpuRegister left_reg = left.AsRegister<CpuRegister>();
1885       if (right.IsConstant()) {
1886         int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1887         codegen_->Compare64BitValue(left_reg, value);
1888       } else if (right.IsDoubleStackSlot()) {
1889         __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1890       } else {
1891         __ cmpq(left_reg, right.AsRegister<CpuRegister>());
1892       }
1893       break;
1894     }
1895     case Primitive::kPrimFloat: {
1896       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
1897       if (right.IsConstant()) {
1898         float value = right.GetConstant()->AsFloatConstant()->GetValue();
1899         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
1900       } else if (right.IsStackSlot()) {
1901         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1902       } else {
1903         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
1904       }
1905       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
1906       less_cond = kBelow;  //  ucomis{s,d} sets CF
1907       break;
1908     }
1909     case Primitive::kPrimDouble: {
1910       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
1911       if (right.IsConstant()) {
1912         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
1913         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
1914       } else if (right.IsDoubleStackSlot()) {
1915         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1916       } else {
1917         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
1918       }
1919       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
1920       less_cond = kBelow;  //  ucomis{s,d} sets CF
1921       break;
1922     }
1923     default:
1924       LOG(FATAL) << "Unexpected compare type " << type;
1925   }
1926 
1927   __ movl(out, Immediate(0));
1928   __ j(kEqual, &done);
1929   __ j(less_cond, &less);
1930 
1931   __ Bind(&greater);
1932   __ movl(out, Immediate(1));
1933   __ jmp(&done);
1934 
1935   __ Bind(&less);
1936   __ movl(out, Immediate(-1));
1937 
1938   __ Bind(&done);
1939 }
1940 
VisitIntConstant(HIntConstant * constant)1941 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
1942   LocationSummary* locations =
1943       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1944   locations->SetOut(Location::ConstantLocation(constant));
1945 }
1946 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)1947 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
1948   // Will be generated at use site.
1949 }
1950 
VisitNullConstant(HNullConstant * constant)1951 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
1952   LocationSummary* locations =
1953       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1954   locations->SetOut(Location::ConstantLocation(constant));
1955 }
1956 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)1957 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
1958   // Will be generated at use site.
1959 }
1960 
VisitLongConstant(HLongConstant * constant)1961 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
1962   LocationSummary* locations =
1963       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1964   locations->SetOut(Location::ConstantLocation(constant));
1965 }
1966 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)1967 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
1968   // Will be generated at use site.
1969 }
1970 
VisitFloatConstant(HFloatConstant * constant)1971 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
1972   LocationSummary* locations =
1973       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1974   locations->SetOut(Location::ConstantLocation(constant));
1975 }
1976 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)1977 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
1978   // Will be generated at use site.
1979 }
1980 
VisitDoubleConstant(HDoubleConstant * constant)1981 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
1982   LocationSummary* locations =
1983       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1984   locations->SetOut(Location::ConstantLocation(constant));
1985 }
1986 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)1987 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
1988     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
1989   // Will be generated at use site.
1990 }
1991 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)1992 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
1993   memory_barrier->SetLocations(nullptr);
1994 }
1995 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)1996 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
1997   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
1998 }
1999 
VisitReturnVoid(HReturnVoid * ret)2000 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2001   ret->SetLocations(nullptr);
2002 }
2003 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2004 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2005   codegen_->GenerateFrameExit();
2006 }
2007 
VisitReturn(HReturn * ret)2008 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2009   LocationSummary* locations =
2010       new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
2011   switch (ret->InputAt(0)->GetType()) {
2012     case Primitive::kPrimBoolean:
2013     case Primitive::kPrimByte:
2014     case Primitive::kPrimChar:
2015     case Primitive::kPrimShort:
2016     case Primitive::kPrimInt:
2017     case Primitive::kPrimNot:
2018     case Primitive::kPrimLong:
2019       locations->SetInAt(0, Location::RegisterLocation(RAX));
2020       break;
2021 
2022     case Primitive::kPrimFloat:
2023     case Primitive::kPrimDouble:
2024       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2025       break;
2026 
2027     default:
2028       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2029   }
2030 }
2031 
VisitReturn(HReturn * ret)2032 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2033   if (kIsDebugBuild) {
2034     switch (ret->InputAt(0)->GetType()) {
2035       case Primitive::kPrimBoolean:
2036       case Primitive::kPrimByte:
2037       case Primitive::kPrimChar:
2038       case Primitive::kPrimShort:
2039       case Primitive::kPrimInt:
2040       case Primitive::kPrimNot:
2041       case Primitive::kPrimLong:
2042         DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2043         break;
2044 
2045       case Primitive::kPrimFloat:
2046       case Primitive::kPrimDouble:
2047         DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2048                   XMM0);
2049         break;
2050 
2051       default:
2052         LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2053     }
2054   }
2055   codegen_->GenerateFrameExit();
2056 }
2057 
GetReturnLocation(Primitive::Type type) const2058 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(Primitive::Type type) const {
2059   switch (type) {
2060     case Primitive::kPrimBoolean:
2061     case Primitive::kPrimByte:
2062     case Primitive::kPrimChar:
2063     case Primitive::kPrimShort:
2064     case Primitive::kPrimInt:
2065     case Primitive::kPrimNot:
2066     case Primitive::kPrimLong:
2067       return Location::RegisterLocation(RAX);
2068 
2069     case Primitive::kPrimVoid:
2070       return Location::NoLocation();
2071 
2072     case Primitive::kPrimDouble:
2073     case Primitive::kPrimFloat:
2074       return Location::FpuRegisterLocation(XMM0);
2075   }
2076 
2077   UNREACHABLE();
2078 }
2079 
GetMethodLocation() const2080 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2081   return Location::RegisterLocation(kMethodRegisterArgument);
2082 }
2083 
GetNextLocation(Primitive::Type type)2084 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
2085   switch (type) {
2086     case Primitive::kPrimBoolean:
2087     case Primitive::kPrimByte:
2088     case Primitive::kPrimChar:
2089     case Primitive::kPrimShort:
2090     case Primitive::kPrimInt:
2091     case Primitive::kPrimNot: {
2092       uint32_t index = gp_index_++;
2093       stack_index_++;
2094       if (index < calling_convention.GetNumberOfRegisters()) {
2095         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2096       } else {
2097         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2098       }
2099     }
2100 
2101     case Primitive::kPrimLong: {
2102       uint32_t index = gp_index_;
2103       stack_index_ += 2;
2104       if (index < calling_convention.GetNumberOfRegisters()) {
2105         gp_index_ += 1;
2106         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2107       } else {
2108         gp_index_ += 2;
2109         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2110       }
2111     }
2112 
2113     case Primitive::kPrimFloat: {
2114       uint32_t index = float_index_++;
2115       stack_index_++;
2116       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2117         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2118       } else {
2119         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2120       }
2121     }
2122 
2123     case Primitive::kPrimDouble: {
2124       uint32_t index = float_index_++;
2125       stack_index_ += 2;
2126       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2127         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2128       } else {
2129         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2130       }
2131     }
2132 
2133     case Primitive::kPrimVoid:
2134       LOG(FATAL) << "Unexpected parameter type " << type;
2135       break;
2136   }
2137   return Location::NoLocation();
2138 }
2139 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2140 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2141   // The trampoline uses the same calling convention as dex calling conventions,
2142   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2143   // the method_idx.
2144   HandleInvoke(invoke);
2145 }
2146 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2147 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2148   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2149 }
2150 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2151 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2152   // Explicit clinit checks triggered by static invokes must have been pruned by
2153   // art::PrepareForRegisterAllocation.
2154   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2155 
2156   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2157   if (intrinsic.TryDispatch(invoke)) {
2158     return;
2159   }
2160 
2161   HandleInvoke(invoke);
2162 }
2163 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2164 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2165   if (invoke->GetLocations()->Intrinsified()) {
2166     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2167     intrinsic.Dispatch(invoke);
2168     return true;
2169   }
2170   return false;
2171 }
2172 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2173 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2174   // Explicit clinit checks triggered by static invokes must have been pruned by
2175   // art::PrepareForRegisterAllocation.
2176   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2177 
2178   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2179     return;
2180   }
2181 
2182   LocationSummary* locations = invoke->GetLocations();
2183   codegen_->GenerateStaticOrDirectCall(
2184       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2185   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2186 }
2187 
HandleInvoke(HInvoke * invoke)2188 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2189   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2190   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2191 }
2192 
VisitInvokeVirtual(HInvokeVirtual * invoke)2193 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2194   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2195   if (intrinsic.TryDispatch(invoke)) {
2196     return;
2197   }
2198 
2199   HandleInvoke(invoke);
2200 }
2201 
VisitInvokeVirtual(HInvokeVirtual * invoke)2202 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2203   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2204     return;
2205   }
2206 
2207   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2208   DCHECK(!codegen_->IsLeafMethod());
2209   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2210 }
2211 
VisitInvokeInterface(HInvokeInterface * invoke)2212 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2213   HandleInvoke(invoke);
2214   // Add the hidden argument.
2215   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2216 }
2217 
VisitInvokeInterface(HInvokeInterface * invoke)2218 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2219   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2220   LocationSummary* locations = invoke->GetLocations();
2221   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2222   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2223   Location receiver = locations->InAt(0);
2224   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2225 
2226   // Set the hidden argument. This is safe to do this here, as RAX
2227   // won't be modified thereafter, before the `call` instruction.
2228   DCHECK_EQ(RAX, hidden_reg.AsRegister());
2229   codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2230 
2231   if (receiver.IsStackSlot()) {
2232     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2233     // /* HeapReference<Class> */ temp = temp->klass_
2234     __ movl(temp, Address(temp, class_offset));
2235   } else {
2236     // /* HeapReference<Class> */ temp = receiver->klass_
2237     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2238   }
2239   codegen_->MaybeRecordImplicitNullCheck(invoke);
2240   // Instead of simply (possibly) unpoisoning `temp` here, we should
2241   // emit a read barrier for the previous class reference load.
2242   // However this is not required in practice, as this is an
2243   // intermediate/temporary reference and because the current
2244   // concurrent copying collector keeps the from-space memory
2245   // intact/accessible until the end of the marking phase (the
2246   // concurrent copying collector may not in the future).
2247   __ MaybeUnpoisonHeapReference(temp);
2248   // temp = temp->GetAddressOfIMT()
2249   __ movq(temp,
2250       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2251   // temp = temp->GetImtEntryAt(method_offset);
2252   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2253       invoke->GetImtIndex() % ImTable::kSize, kX86_64PointerSize));
2254   // temp = temp->GetImtEntryAt(method_offset);
2255   __ movq(temp, Address(temp, method_offset));
2256   // call temp->GetEntryPoint();
2257   __ call(Address(temp,
2258                   ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue()));
2259 
2260   DCHECK(!codegen_->IsLeafMethod());
2261   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2262 }
2263 
VisitNeg(HNeg * neg)2264 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2265   LocationSummary* locations =
2266       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
2267   switch (neg->GetResultType()) {
2268     case Primitive::kPrimInt:
2269     case Primitive::kPrimLong:
2270       locations->SetInAt(0, Location::RequiresRegister());
2271       locations->SetOut(Location::SameAsFirstInput());
2272       break;
2273 
2274     case Primitive::kPrimFloat:
2275     case Primitive::kPrimDouble:
2276       locations->SetInAt(0, Location::RequiresFpuRegister());
2277       locations->SetOut(Location::SameAsFirstInput());
2278       locations->AddTemp(Location::RequiresFpuRegister());
2279       break;
2280 
2281     default:
2282       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2283   }
2284 }
2285 
VisitNeg(HNeg * neg)2286 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2287   LocationSummary* locations = neg->GetLocations();
2288   Location out = locations->Out();
2289   Location in = locations->InAt(0);
2290   switch (neg->GetResultType()) {
2291     case Primitive::kPrimInt:
2292       DCHECK(in.IsRegister());
2293       DCHECK(in.Equals(out));
2294       __ negl(out.AsRegister<CpuRegister>());
2295       break;
2296 
2297     case Primitive::kPrimLong:
2298       DCHECK(in.IsRegister());
2299       DCHECK(in.Equals(out));
2300       __ negq(out.AsRegister<CpuRegister>());
2301       break;
2302 
2303     case Primitive::kPrimFloat: {
2304       DCHECK(in.Equals(out));
2305       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2306       // Implement float negation with an exclusive or with value
2307       // 0x80000000 (mask for bit 31, representing the sign of a
2308       // single-precision floating-point number).
2309       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2310       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2311       break;
2312     }
2313 
2314     case Primitive::kPrimDouble: {
2315       DCHECK(in.Equals(out));
2316       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2317       // Implement double negation with an exclusive or with value
2318       // 0x8000000000000000 (mask for bit 63, representing the sign of
2319       // a double-precision floating-point number).
2320       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2321       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2322       break;
2323     }
2324 
2325     default:
2326       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2327   }
2328 }
2329 
VisitTypeConversion(HTypeConversion * conversion)2330 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2331   LocationSummary* locations =
2332       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
2333   Primitive::Type result_type = conversion->GetResultType();
2334   Primitive::Type input_type = conversion->GetInputType();
2335   DCHECK_NE(result_type, input_type);
2336 
2337   // The Java language does not allow treating boolean as an integral type but
2338   // our bit representation makes it safe.
2339 
2340   switch (result_type) {
2341     case Primitive::kPrimByte:
2342       switch (input_type) {
2343         case Primitive::kPrimLong:
2344           // Type conversion from long to byte is a result of code transformations.
2345         case Primitive::kPrimBoolean:
2346           // Boolean input is a result of code transformations.
2347         case Primitive::kPrimShort:
2348         case Primitive::kPrimInt:
2349         case Primitive::kPrimChar:
2350           // Processing a Dex `int-to-byte' instruction.
2351           locations->SetInAt(0, Location::Any());
2352           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2353           break;
2354 
2355         default:
2356           LOG(FATAL) << "Unexpected type conversion from " << input_type
2357                      << " to " << result_type;
2358       }
2359       break;
2360 
2361     case Primitive::kPrimShort:
2362       switch (input_type) {
2363         case Primitive::kPrimLong:
2364           // Type conversion from long to short is a result of code transformations.
2365         case Primitive::kPrimBoolean:
2366           // Boolean input is a result of code transformations.
2367         case Primitive::kPrimByte:
2368         case Primitive::kPrimInt:
2369         case Primitive::kPrimChar:
2370           // Processing a Dex `int-to-short' instruction.
2371           locations->SetInAt(0, Location::Any());
2372           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2373           break;
2374 
2375         default:
2376           LOG(FATAL) << "Unexpected type conversion from " << input_type
2377                      << " to " << result_type;
2378       }
2379       break;
2380 
2381     case Primitive::kPrimInt:
2382       switch (input_type) {
2383         case Primitive::kPrimLong:
2384           // Processing a Dex `long-to-int' instruction.
2385           locations->SetInAt(0, Location::Any());
2386           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2387           break;
2388 
2389         case Primitive::kPrimFloat:
2390           // Processing a Dex `float-to-int' instruction.
2391           locations->SetInAt(0, Location::RequiresFpuRegister());
2392           locations->SetOut(Location::RequiresRegister());
2393           break;
2394 
2395         case Primitive::kPrimDouble:
2396           // Processing a Dex `double-to-int' instruction.
2397           locations->SetInAt(0, Location::RequiresFpuRegister());
2398           locations->SetOut(Location::RequiresRegister());
2399           break;
2400 
2401         default:
2402           LOG(FATAL) << "Unexpected type conversion from " << input_type
2403                      << " to " << result_type;
2404       }
2405       break;
2406 
2407     case Primitive::kPrimLong:
2408       switch (input_type) {
2409         case Primitive::kPrimBoolean:
2410           // Boolean input is a result of code transformations.
2411         case Primitive::kPrimByte:
2412         case Primitive::kPrimShort:
2413         case Primitive::kPrimInt:
2414         case Primitive::kPrimChar:
2415           // Processing a Dex `int-to-long' instruction.
2416           // TODO: We would benefit from a (to-be-implemented)
2417           // Location::RegisterOrStackSlot requirement for this input.
2418           locations->SetInAt(0, Location::RequiresRegister());
2419           locations->SetOut(Location::RequiresRegister());
2420           break;
2421 
2422         case Primitive::kPrimFloat:
2423           // Processing a Dex `float-to-long' instruction.
2424           locations->SetInAt(0, Location::RequiresFpuRegister());
2425           locations->SetOut(Location::RequiresRegister());
2426           break;
2427 
2428         case Primitive::kPrimDouble:
2429           // Processing a Dex `double-to-long' instruction.
2430           locations->SetInAt(0, Location::RequiresFpuRegister());
2431           locations->SetOut(Location::RequiresRegister());
2432           break;
2433 
2434         default:
2435           LOG(FATAL) << "Unexpected type conversion from " << input_type
2436                      << " to " << result_type;
2437       }
2438       break;
2439 
2440     case Primitive::kPrimChar:
2441       switch (input_type) {
2442         case Primitive::kPrimLong:
2443           // Type conversion from long to char is a result of code transformations.
2444         case Primitive::kPrimBoolean:
2445           // Boolean input is a result of code transformations.
2446         case Primitive::kPrimByte:
2447         case Primitive::kPrimShort:
2448         case Primitive::kPrimInt:
2449           // Processing a Dex `int-to-char' instruction.
2450           locations->SetInAt(0, Location::Any());
2451           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2452           break;
2453 
2454         default:
2455           LOG(FATAL) << "Unexpected type conversion from " << input_type
2456                      << " to " << result_type;
2457       }
2458       break;
2459 
2460     case Primitive::kPrimFloat:
2461       switch (input_type) {
2462         case Primitive::kPrimBoolean:
2463           // Boolean input is a result of code transformations.
2464         case Primitive::kPrimByte:
2465         case Primitive::kPrimShort:
2466         case Primitive::kPrimInt:
2467         case Primitive::kPrimChar:
2468           // Processing a Dex `int-to-float' instruction.
2469           locations->SetInAt(0, Location::Any());
2470           locations->SetOut(Location::RequiresFpuRegister());
2471           break;
2472 
2473         case Primitive::kPrimLong:
2474           // Processing a Dex `long-to-float' instruction.
2475           locations->SetInAt(0, Location::Any());
2476           locations->SetOut(Location::RequiresFpuRegister());
2477           break;
2478 
2479         case Primitive::kPrimDouble:
2480           // Processing a Dex `double-to-float' instruction.
2481           locations->SetInAt(0, Location::Any());
2482           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2483           break;
2484 
2485         default:
2486           LOG(FATAL) << "Unexpected type conversion from " << input_type
2487                      << " to " << result_type;
2488       };
2489       break;
2490 
2491     case Primitive::kPrimDouble:
2492       switch (input_type) {
2493         case Primitive::kPrimBoolean:
2494           // Boolean input is a result of code transformations.
2495         case Primitive::kPrimByte:
2496         case Primitive::kPrimShort:
2497         case Primitive::kPrimInt:
2498         case Primitive::kPrimChar:
2499           // Processing a Dex `int-to-double' instruction.
2500           locations->SetInAt(0, Location::Any());
2501           locations->SetOut(Location::RequiresFpuRegister());
2502           break;
2503 
2504         case Primitive::kPrimLong:
2505           // Processing a Dex `long-to-double' instruction.
2506           locations->SetInAt(0, Location::Any());
2507           locations->SetOut(Location::RequiresFpuRegister());
2508           break;
2509 
2510         case Primitive::kPrimFloat:
2511           // Processing a Dex `float-to-double' instruction.
2512           locations->SetInAt(0, Location::Any());
2513           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2514           break;
2515 
2516         default:
2517           LOG(FATAL) << "Unexpected type conversion from " << input_type
2518                      << " to " << result_type;
2519       }
2520       break;
2521 
2522     default:
2523       LOG(FATAL) << "Unexpected type conversion from " << input_type
2524                  << " to " << result_type;
2525   }
2526 }
2527 
VisitTypeConversion(HTypeConversion * conversion)2528 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2529   LocationSummary* locations = conversion->GetLocations();
2530   Location out = locations->Out();
2531   Location in = locations->InAt(0);
2532   Primitive::Type result_type = conversion->GetResultType();
2533   Primitive::Type input_type = conversion->GetInputType();
2534   DCHECK_NE(result_type, input_type);
2535   switch (result_type) {
2536     case Primitive::kPrimByte:
2537       switch (input_type) {
2538         case Primitive::kPrimLong:
2539           // Type conversion from long to byte is a result of code transformations.
2540         case Primitive::kPrimBoolean:
2541           // Boolean input is a result of code transformations.
2542         case Primitive::kPrimShort:
2543         case Primitive::kPrimInt:
2544         case Primitive::kPrimChar:
2545           // Processing a Dex `int-to-byte' instruction.
2546           if (in.IsRegister()) {
2547             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2548           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2549             __ movsxb(out.AsRegister<CpuRegister>(),
2550                       Address(CpuRegister(RSP), in.GetStackIndex()));
2551           } else {
2552             __ movl(out.AsRegister<CpuRegister>(),
2553                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2554           }
2555           break;
2556 
2557         default:
2558           LOG(FATAL) << "Unexpected type conversion from " << input_type
2559                      << " to " << result_type;
2560       }
2561       break;
2562 
2563     case Primitive::kPrimShort:
2564       switch (input_type) {
2565         case Primitive::kPrimLong:
2566           // Type conversion from long to short is a result of code transformations.
2567         case Primitive::kPrimBoolean:
2568           // Boolean input is a result of code transformations.
2569         case Primitive::kPrimByte:
2570         case Primitive::kPrimInt:
2571         case Primitive::kPrimChar:
2572           // Processing a Dex `int-to-short' instruction.
2573           if (in.IsRegister()) {
2574             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2575           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2576             __ movsxw(out.AsRegister<CpuRegister>(),
2577                       Address(CpuRegister(RSP), in.GetStackIndex()));
2578           } else {
2579             __ movl(out.AsRegister<CpuRegister>(),
2580                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2581           }
2582           break;
2583 
2584         default:
2585           LOG(FATAL) << "Unexpected type conversion from " << input_type
2586                      << " to " << result_type;
2587       }
2588       break;
2589 
2590     case Primitive::kPrimInt:
2591       switch (input_type) {
2592         case Primitive::kPrimLong:
2593           // Processing a Dex `long-to-int' instruction.
2594           if (in.IsRegister()) {
2595             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2596           } else if (in.IsDoubleStackSlot()) {
2597             __ movl(out.AsRegister<CpuRegister>(),
2598                     Address(CpuRegister(RSP), in.GetStackIndex()));
2599           } else {
2600             DCHECK(in.IsConstant());
2601             DCHECK(in.GetConstant()->IsLongConstant());
2602             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2603             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2604           }
2605           break;
2606 
2607         case Primitive::kPrimFloat: {
2608           // Processing a Dex `float-to-int' instruction.
2609           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2610           CpuRegister output = out.AsRegister<CpuRegister>();
2611           NearLabel done, nan;
2612 
2613           __ movl(output, Immediate(kPrimIntMax));
2614           // if input >= (float)INT_MAX goto done
2615           __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
2616           __ j(kAboveEqual, &done);
2617           // if input == NaN goto nan
2618           __ j(kUnordered, &nan);
2619           // output = float-to-int-truncate(input)
2620           __ cvttss2si(output, input, false);
2621           __ jmp(&done);
2622           __ Bind(&nan);
2623           //  output = 0
2624           __ xorl(output, output);
2625           __ Bind(&done);
2626           break;
2627         }
2628 
2629         case Primitive::kPrimDouble: {
2630           // Processing a Dex `double-to-int' instruction.
2631           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2632           CpuRegister output = out.AsRegister<CpuRegister>();
2633           NearLabel done, nan;
2634 
2635           __ movl(output, Immediate(kPrimIntMax));
2636           // if input >= (double)INT_MAX goto done
2637           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
2638           __ j(kAboveEqual, &done);
2639           // if input == NaN goto nan
2640           __ j(kUnordered, &nan);
2641           // output = double-to-int-truncate(input)
2642           __ cvttsd2si(output, input);
2643           __ jmp(&done);
2644           __ Bind(&nan);
2645           //  output = 0
2646           __ xorl(output, output);
2647           __ Bind(&done);
2648           break;
2649         }
2650 
2651         default:
2652           LOG(FATAL) << "Unexpected type conversion from " << input_type
2653                      << " to " << result_type;
2654       }
2655       break;
2656 
2657     case Primitive::kPrimLong:
2658       switch (input_type) {
2659         DCHECK(out.IsRegister());
2660         case Primitive::kPrimBoolean:
2661           // Boolean input is a result of code transformations.
2662         case Primitive::kPrimByte:
2663         case Primitive::kPrimShort:
2664         case Primitive::kPrimInt:
2665         case Primitive::kPrimChar:
2666           // Processing a Dex `int-to-long' instruction.
2667           DCHECK(in.IsRegister());
2668           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2669           break;
2670 
2671         case Primitive::kPrimFloat: {
2672           // Processing a Dex `float-to-long' instruction.
2673           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2674           CpuRegister output = out.AsRegister<CpuRegister>();
2675           NearLabel done, nan;
2676 
2677           codegen_->Load64BitValue(output, kPrimLongMax);
2678           // if input >= (float)LONG_MAX goto done
2679           __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
2680           __ j(kAboveEqual, &done);
2681           // if input == NaN goto nan
2682           __ j(kUnordered, &nan);
2683           // output = float-to-long-truncate(input)
2684           __ cvttss2si(output, input, true);
2685           __ jmp(&done);
2686           __ Bind(&nan);
2687           //  output = 0
2688           __ xorl(output, output);
2689           __ Bind(&done);
2690           break;
2691         }
2692 
2693         case Primitive::kPrimDouble: {
2694           // Processing a Dex `double-to-long' instruction.
2695           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2696           CpuRegister output = out.AsRegister<CpuRegister>();
2697           NearLabel done, nan;
2698 
2699           codegen_->Load64BitValue(output, kPrimLongMax);
2700           // if input >= (double)LONG_MAX goto done
2701           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
2702           __ j(kAboveEqual, &done);
2703           // if input == NaN goto nan
2704           __ j(kUnordered, &nan);
2705           // output = double-to-long-truncate(input)
2706           __ cvttsd2si(output, input, true);
2707           __ jmp(&done);
2708           __ Bind(&nan);
2709           //  output = 0
2710           __ xorl(output, output);
2711           __ Bind(&done);
2712           break;
2713         }
2714 
2715         default:
2716           LOG(FATAL) << "Unexpected type conversion from " << input_type
2717                      << " to " << result_type;
2718       }
2719       break;
2720 
2721     case Primitive::kPrimChar:
2722       switch (input_type) {
2723         case Primitive::kPrimLong:
2724           // Type conversion from long to char is a result of code transformations.
2725         case Primitive::kPrimBoolean:
2726           // Boolean input is a result of code transformations.
2727         case Primitive::kPrimByte:
2728         case Primitive::kPrimShort:
2729         case Primitive::kPrimInt:
2730           // Processing a Dex `int-to-char' instruction.
2731           if (in.IsRegister()) {
2732             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2733           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2734             __ movzxw(out.AsRegister<CpuRegister>(),
2735                       Address(CpuRegister(RSP), in.GetStackIndex()));
2736           } else {
2737             __ movl(out.AsRegister<CpuRegister>(),
2738                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2739           }
2740           break;
2741 
2742         default:
2743           LOG(FATAL) << "Unexpected type conversion from " << input_type
2744                      << " to " << result_type;
2745       }
2746       break;
2747 
2748     case Primitive::kPrimFloat:
2749       switch (input_type) {
2750         case Primitive::kPrimBoolean:
2751           // Boolean input is a result of code transformations.
2752         case Primitive::kPrimByte:
2753         case Primitive::kPrimShort:
2754         case Primitive::kPrimInt:
2755         case Primitive::kPrimChar:
2756           // Processing a Dex `int-to-float' instruction.
2757           if (in.IsRegister()) {
2758             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2759           } else if (in.IsConstant()) {
2760             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2761             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2762             codegen_->Load32BitValue(dest, static_cast<float>(v));
2763           } else {
2764             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2765                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
2766           }
2767           break;
2768 
2769         case Primitive::kPrimLong:
2770           // Processing a Dex `long-to-float' instruction.
2771           if (in.IsRegister()) {
2772             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2773           } else if (in.IsConstant()) {
2774             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2775             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2776             codegen_->Load32BitValue(dest, static_cast<float>(v));
2777           } else {
2778             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2779                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
2780           }
2781           break;
2782 
2783         case Primitive::kPrimDouble:
2784           // Processing a Dex `double-to-float' instruction.
2785           if (in.IsFpuRegister()) {
2786             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2787           } else if (in.IsConstant()) {
2788             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
2789             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2790             codegen_->Load32BitValue(dest, static_cast<float>(v));
2791           } else {
2792             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
2793                         Address(CpuRegister(RSP), in.GetStackIndex()));
2794           }
2795           break;
2796 
2797         default:
2798           LOG(FATAL) << "Unexpected type conversion from " << input_type
2799                      << " to " << result_type;
2800       };
2801       break;
2802 
2803     case Primitive::kPrimDouble:
2804       switch (input_type) {
2805         case Primitive::kPrimBoolean:
2806           // Boolean input is a result of code transformations.
2807         case Primitive::kPrimByte:
2808         case Primitive::kPrimShort:
2809         case Primitive::kPrimInt:
2810         case Primitive::kPrimChar:
2811           // Processing a Dex `int-to-double' instruction.
2812           if (in.IsRegister()) {
2813             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2814           } else if (in.IsConstant()) {
2815             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2816             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2817             codegen_->Load64BitValue(dest, static_cast<double>(v));
2818           } else {
2819             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2820                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
2821           }
2822           break;
2823 
2824         case Primitive::kPrimLong:
2825           // Processing a Dex `long-to-double' instruction.
2826           if (in.IsRegister()) {
2827             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2828           } else if (in.IsConstant()) {
2829             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2830             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2831             codegen_->Load64BitValue(dest, static_cast<double>(v));
2832           } else {
2833             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2834                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
2835           }
2836           break;
2837 
2838         case Primitive::kPrimFloat:
2839           // Processing a Dex `float-to-double' instruction.
2840           if (in.IsFpuRegister()) {
2841             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2842           } else if (in.IsConstant()) {
2843             float v = in.GetConstant()->AsFloatConstant()->GetValue();
2844             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2845             codegen_->Load64BitValue(dest, static_cast<double>(v));
2846           } else {
2847             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
2848                         Address(CpuRegister(RSP), in.GetStackIndex()));
2849           }
2850           break;
2851 
2852         default:
2853           LOG(FATAL) << "Unexpected type conversion from " << input_type
2854                      << " to " << result_type;
2855       };
2856       break;
2857 
2858     default:
2859       LOG(FATAL) << "Unexpected type conversion from " << input_type
2860                  << " to " << result_type;
2861   }
2862 }
2863 
VisitAdd(HAdd * add)2864 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
2865   LocationSummary* locations =
2866       new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
2867   switch (add->GetResultType()) {
2868     case Primitive::kPrimInt: {
2869       locations->SetInAt(0, Location::RequiresRegister());
2870       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
2871       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2872       break;
2873     }
2874 
2875     case Primitive::kPrimLong: {
2876       locations->SetInAt(0, Location::RequiresRegister());
2877       // We can use a leaq or addq if the constant can fit in an immediate.
2878       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
2879       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2880       break;
2881     }
2882 
2883     case Primitive::kPrimDouble:
2884     case Primitive::kPrimFloat: {
2885       locations->SetInAt(0, Location::RequiresFpuRegister());
2886       locations->SetInAt(1, Location::Any());
2887       locations->SetOut(Location::SameAsFirstInput());
2888       break;
2889     }
2890 
2891     default:
2892       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2893   }
2894 }
2895 
VisitAdd(HAdd * add)2896 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
2897   LocationSummary* locations = add->GetLocations();
2898   Location first = locations->InAt(0);
2899   Location second = locations->InAt(1);
2900   Location out = locations->Out();
2901 
2902   switch (add->GetResultType()) {
2903     case Primitive::kPrimInt: {
2904       if (second.IsRegister()) {
2905         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2906           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
2907         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2908           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
2909         } else {
2910           __ leal(out.AsRegister<CpuRegister>(), Address(
2911               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
2912         }
2913       } else if (second.IsConstant()) {
2914         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2915           __ addl(out.AsRegister<CpuRegister>(),
2916                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
2917         } else {
2918           __ leal(out.AsRegister<CpuRegister>(), Address(
2919               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
2920         }
2921       } else {
2922         DCHECK(first.Equals(locations->Out()));
2923         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
2924       }
2925       break;
2926     }
2927 
2928     case Primitive::kPrimLong: {
2929       if (second.IsRegister()) {
2930         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2931           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
2932         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2933           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
2934         } else {
2935           __ leaq(out.AsRegister<CpuRegister>(), Address(
2936               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
2937         }
2938       } else {
2939         DCHECK(second.IsConstant());
2940         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
2941         int32_t int32_value = Low32Bits(value);
2942         DCHECK_EQ(int32_value, value);
2943         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2944           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
2945         } else {
2946           __ leaq(out.AsRegister<CpuRegister>(), Address(
2947               first.AsRegister<CpuRegister>(), int32_value));
2948         }
2949       }
2950       break;
2951     }
2952 
2953     case Primitive::kPrimFloat: {
2954       if (second.IsFpuRegister()) {
2955         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
2956       } else if (second.IsConstant()) {
2957         __ addss(first.AsFpuRegister<XmmRegister>(),
2958                  codegen_->LiteralFloatAddress(
2959                      second.GetConstant()->AsFloatConstant()->GetValue()));
2960       } else {
2961         DCHECK(second.IsStackSlot());
2962         __ addss(first.AsFpuRegister<XmmRegister>(),
2963                  Address(CpuRegister(RSP), second.GetStackIndex()));
2964       }
2965       break;
2966     }
2967 
2968     case Primitive::kPrimDouble: {
2969       if (second.IsFpuRegister()) {
2970         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
2971       } else if (second.IsConstant()) {
2972         __ addsd(first.AsFpuRegister<XmmRegister>(),
2973                  codegen_->LiteralDoubleAddress(
2974                      second.GetConstant()->AsDoubleConstant()->GetValue()));
2975       } else {
2976         DCHECK(second.IsDoubleStackSlot());
2977         __ addsd(first.AsFpuRegister<XmmRegister>(),
2978                  Address(CpuRegister(RSP), second.GetStackIndex()));
2979       }
2980       break;
2981     }
2982 
2983     default:
2984       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2985   }
2986 }
2987 
VisitSub(HSub * sub)2988 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
2989   LocationSummary* locations =
2990       new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
2991   switch (sub->GetResultType()) {
2992     case Primitive::kPrimInt: {
2993       locations->SetInAt(0, Location::RequiresRegister());
2994       locations->SetInAt(1, Location::Any());
2995       locations->SetOut(Location::SameAsFirstInput());
2996       break;
2997     }
2998     case Primitive::kPrimLong: {
2999       locations->SetInAt(0, Location::RequiresRegister());
3000       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3001       locations->SetOut(Location::SameAsFirstInput());
3002       break;
3003     }
3004     case Primitive::kPrimFloat:
3005     case Primitive::kPrimDouble: {
3006       locations->SetInAt(0, Location::RequiresFpuRegister());
3007       locations->SetInAt(1, Location::Any());
3008       locations->SetOut(Location::SameAsFirstInput());
3009       break;
3010     }
3011     default:
3012       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3013   }
3014 }
3015 
VisitSub(HSub * sub)3016 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3017   LocationSummary* locations = sub->GetLocations();
3018   Location first = locations->InAt(0);
3019   Location second = locations->InAt(1);
3020   DCHECK(first.Equals(locations->Out()));
3021   switch (sub->GetResultType()) {
3022     case Primitive::kPrimInt: {
3023       if (second.IsRegister()) {
3024         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3025       } else if (second.IsConstant()) {
3026         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3027         __ subl(first.AsRegister<CpuRegister>(), imm);
3028       } else {
3029         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3030       }
3031       break;
3032     }
3033     case Primitive::kPrimLong: {
3034       if (second.IsConstant()) {
3035         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3036         DCHECK(IsInt<32>(value));
3037         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3038       } else {
3039         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3040       }
3041       break;
3042     }
3043 
3044     case Primitive::kPrimFloat: {
3045       if (second.IsFpuRegister()) {
3046         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3047       } else if (second.IsConstant()) {
3048         __ subss(first.AsFpuRegister<XmmRegister>(),
3049                  codegen_->LiteralFloatAddress(
3050                      second.GetConstant()->AsFloatConstant()->GetValue()));
3051       } else {
3052         DCHECK(second.IsStackSlot());
3053         __ subss(first.AsFpuRegister<XmmRegister>(),
3054                  Address(CpuRegister(RSP), second.GetStackIndex()));
3055       }
3056       break;
3057     }
3058 
3059     case Primitive::kPrimDouble: {
3060       if (second.IsFpuRegister()) {
3061         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3062       } else if (second.IsConstant()) {
3063         __ subsd(first.AsFpuRegister<XmmRegister>(),
3064                  codegen_->LiteralDoubleAddress(
3065                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3066       } else {
3067         DCHECK(second.IsDoubleStackSlot());
3068         __ subsd(first.AsFpuRegister<XmmRegister>(),
3069                  Address(CpuRegister(RSP), second.GetStackIndex()));
3070       }
3071       break;
3072     }
3073 
3074     default:
3075       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3076   }
3077 }
3078 
VisitMul(HMul * mul)3079 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3080   LocationSummary* locations =
3081       new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
3082   switch (mul->GetResultType()) {
3083     case Primitive::kPrimInt: {
3084       locations->SetInAt(0, Location::RequiresRegister());
3085       locations->SetInAt(1, Location::Any());
3086       if (mul->InputAt(1)->IsIntConstant()) {
3087         // Can use 3 operand multiply.
3088         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3089       } else {
3090         locations->SetOut(Location::SameAsFirstInput());
3091       }
3092       break;
3093     }
3094     case Primitive::kPrimLong: {
3095       locations->SetInAt(0, Location::RequiresRegister());
3096       locations->SetInAt(1, Location::Any());
3097       if (mul->InputAt(1)->IsLongConstant() &&
3098           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3099         // Can use 3 operand multiply.
3100         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3101       } else {
3102         locations->SetOut(Location::SameAsFirstInput());
3103       }
3104       break;
3105     }
3106     case Primitive::kPrimFloat:
3107     case Primitive::kPrimDouble: {
3108       locations->SetInAt(0, Location::RequiresFpuRegister());
3109       locations->SetInAt(1, Location::Any());
3110       locations->SetOut(Location::SameAsFirstInput());
3111       break;
3112     }
3113 
3114     default:
3115       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3116   }
3117 }
3118 
VisitMul(HMul * mul)3119 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3120   LocationSummary* locations = mul->GetLocations();
3121   Location first = locations->InAt(0);
3122   Location second = locations->InAt(1);
3123   Location out = locations->Out();
3124   switch (mul->GetResultType()) {
3125     case Primitive::kPrimInt:
3126       // The constant may have ended up in a register, so test explicitly to avoid
3127       // problems where the output may not be the same as the first operand.
3128       if (mul->InputAt(1)->IsIntConstant()) {
3129         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3130         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3131       } else if (second.IsRegister()) {
3132         DCHECK(first.Equals(out));
3133         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3134       } else {
3135         DCHECK(first.Equals(out));
3136         DCHECK(second.IsStackSlot());
3137         __ imull(first.AsRegister<CpuRegister>(),
3138                  Address(CpuRegister(RSP), second.GetStackIndex()));
3139       }
3140       break;
3141     case Primitive::kPrimLong: {
3142       // The constant may have ended up in a register, so test explicitly to avoid
3143       // problems where the output may not be the same as the first operand.
3144       if (mul->InputAt(1)->IsLongConstant()) {
3145         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3146         if (IsInt<32>(value)) {
3147           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3148                    Immediate(static_cast<int32_t>(value)));
3149         } else {
3150           // Have to use the constant area.
3151           DCHECK(first.Equals(out));
3152           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3153         }
3154       } else if (second.IsRegister()) {
3155         DCHECK(first.Equals(out));
3156         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3157       } else {
3158         DCHECK(second.IsDoubleStackSlot());
3159         DCHECK(first.Equals(out));
3160         __ imulq(first.AsRegister<CpuRegister>(),
3161                  Address(CpuRegister(RSP), second.GetStackIndex()));
3162       }
3163       break;
3164     }
3165 
3166     case Primitive::kPrimFloat: {
3167       DCHECK(first.Equals(out));
3168       if (second.IsFpuRegister()) {
3169         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3170       } else if (second.IsConstant()) {
3171         __ mulss(first.AsFpuRegister<XmmRegister>(),
3172                  codegen_->LiteralFloatAddress(
3173                      second.GetConstant()->AsFloatConstant()->GetValue()));
3174       } else {
3175         DCHECK(second.IsStackSlot());
3176         __ mulss(first.AsFpuRegister<XmmRegister>(),
3177                  Address(CpuRegister(RSP), second.GetStackIndex()));
3178       }
3179       break;
3180     }
3181 
3182     case Primitive::kPrimDouble: {
3183       DCHECK(first.Equals(out));
3184       if (second.IsFpuRegister()) {
3185         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3186       } else if (second.IsConstant()) {
3187         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3188                  codegen_->LiteralDoubleAddress(
3189                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3190       } else {
3191         DCHECK(second.IsDoubleStackSlot());
3192         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3193                  Address(CpuRegister(RSP), second.GetStackIndex()));
3194       }
3195       break;
3196     }
3197 
3198     default:
3199       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3200   }
3201 }
3202 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3203 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3204                                                      uint32_t stack_adjustment, bool is_float) {
3205   if (source.IsStackSlot()) {
3206     DCHECK(is_float);
3207     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3208   } else if (source.IsDoubleStackSlot()) {
3209     DCHECK(!is_float);
3210     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3211   } else {
3212     // Write the value to the temporary location on the stack and load to FP stack.
3213     if (is_float) {
3214       Location stack_temp = Location::StackSlot(temp_offset);
3215       codegen_->Move(stack_temp, source);
3216       __ flds(Address(CpuRegister(RSP), temp_offset));
3217     } else {
3218       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3219       codegen_->Move(stack_temp, source);
3220       __ fldl(Address(CpuRegister(RSP), temp_offset));
3221     }
3222   }
3223 }
3224 
GenerateRemFP(HRem * rem)3225 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3226   Primitive::Type type = rem->GetResultType();
3227   bool is_float = type == Primitive::kPrimFloat;
3228   size_t elem_size = Primitive::ComponentSize(type);
3229   LocationSummary* locations = rem->GetLocations();
3230   Location first = locations->InAt(0);
3231   Location second = locations->InAt(1);
3232   Location out = locations->Out();
3233 
3234   // Create stack space for 2 elements.
3235   // TODO: enhance register allocator to ask for stack temporaries.
3236   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3237 
3238   // Load the values to the FP stack in reverse order, using temporaries if needed.
3239   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3240   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3241 
3242   // Loop doing FPREM until we stabilize.
3243   NearLabel retry;
3244   __ Bind(&retry);
3245   __ fprem();
3246 
3247   // Move FP status to AX.
3248   __ fstsw();
3249 
3250   // And see if the argument reduction is complete. This is signaled by the
3251   // C2 FPU flag bit set to 0.
3252   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3253   __ j(kNotEqual, &retry);
3254 
3255   // We have settled on the final value. Retrieve it into an XMM register.
3256   // Store FP top of stack to real stack.
3257   if (is_float) {
3258     __ fsts(Address(CpuRegister(RSP), 0));
3259   } else {
3260     __ fstl(Address(CpuRegister(RSP), 0));
3261   }
3262 
3263   // Pop the 2 items from the FP stack.
3264   __ fucompp();
3265 
3266   // Load the value from the stack into an XMM register.
3267   DCHECK(out.IsFpuRegister()) << out;
3268   if (is_float) {
3269     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3270   } else {
3271     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3272   }
3273 
3274   // And remove the temporary stack space we allocated.
3275   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3276 }
3277 
DivRemOneOrMinusOne(HBinaryOperation * instruction)3278 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3279   DCHECK(instruction->IsDiv() || instruction->IsRem());
3280 
3281   LocationSummary* locations = instruction->GetLocations();
3282   Location second = locations->InAt(1);
3283   DCHECK(second.IsConstant());
3284 
3285   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3286   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3287   int64_t imm = Int64FromConstant(second.GetConstant());
3288 
3289   DCHECK(imm == 1 || imm == -1);
3290 
3291   switch (instruction->GetResultType()) {
3292     case Primitive::kPrimInt: {
3293       if (instruction->IsRem()) {
3294         __ xorl(output_register, output_register);
3295       } else {
3296         __ movl(output_register, input_register);
3297         if (imm == -1) {
3298           __ negl(output_register);
3299         }
3300       }
3301       break;
3302     }
3303 
3304     case Primitive::kPrimLong: {
3305       if (instruction->IsRem()) {
3306         __ xorl(output_register, output_register);
3307       } else {
3308         __ movq(output_register, input_register);
3309         if (imm == -1) {
3310           __ negq(output_register);
3311         }
3312       }
3313       break;
3314     }
3315 
3316     default:
3317       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3318   }
3319 }
3320 
DivByPowerOfTwo(HDiv * instruction)3321 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3322   LocationSummary* locations = instruction->GetLocations();
3323   Location second = locations->InAt(1);
3324 
3325   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3326   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3327 
3328   int64_t imm = Int64FromConstant(second.GetConstant());
3329   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3330   uint64_t abs_imm = AbsOrMin(imm);
3331 
3332   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3333 
3334   if (instruction->GetResultType() == Primitive::kPrimInt) {
3335     __ leal(tmp, Address(numerator, abs_imm - 1));
3336     __ testl(numerator, numerator);
3337     __ cmov(kGreaterEqual, tmp, numerator);
3338     int shift = CTZ(imm);
3339     __ sarl(tmp, Immediate(shift));
3340 
3341     if (imm < 0) {
3342       __ negl(tmp);
3343     }
3344 
3345     __ movl(output_register, tmp);
3346   } else {
3347     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3348     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3349 
3350     codegen_->Load64BitValue(rdx, abs_imm - 1);
3351     __ addq(rdx, numerator);
3352     __ testq(numerator, numerator);
3353     __ cmov(kGreaterEqual, rdx, numerator);
3354     int shift = CTZ(imm);
3355     __ sarq(rdx, Immediate(shift));
3356 
3357     if (imm < 0) {
3358       __ negq(rdx);
3359     }
3360 
3361     __ movq(output_register, rdx);
3362   }
3363 }
3364 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3365 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3366   DCHECK(instruction->IsDiv() || instruction->IsRem());
3367 
3368   LocationSummary* locations = instruction->GetLocations();
3369   Location second = locations->InAt(1);
3370 
3371   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3372       : locations->GetTemp(0).AsRegister<CpuRegister>();
3373   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3374   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3375       : locations->Out().AsRegister<CpuRegister>();
3376   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3377 
3378   DCHECK_EQ(RAX, eax.AsRegister());
3379   DCHECK_EQ(RDX, edx.AsRegister());
3380   if (instruction->IsDiv()) {
3381     DCHECK_EQ(RAX, out.AsRegister());
3382   } else {
3383     DCHECK_EQ(RDX, out.AsRegister());
3384   }
3385 
3386   int64_t magic;
3387   int shift;
3388 
3389   // TODO: can these branches be written as one?
3390   if (instruction->GetResultType() == Primitive::kPrimInt) {
3391     int imm = second.GetConstant()->AsIntConstant()->GetValue();
3392 
3393     CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3394 
3395     __ movl(numerator, eax);
3396 
3397     NearLabel no_div;
3398     NearLabel end;
3399     __ testl(eax, eax);
3400     __ j(kNotEqual, &no_div);
3401 
3402     __ xorl(out, out);
3403     __ jmp(&end);
3404 
3405     __ Bind(&no_div);
3406 
3407     __ movl(eax, Immediate(magic));
3408     __ imull(numerator);
3409 
3410     if (imm > 0 && magic < 0) {
3411       __ addl(edx, numerator);
3412     } else if (imm < 0 && magic > 0) {
3413       __ subl(edx, numerator);
3414     }
3415 
3416     if (shift != 0) {
3417       __ sarl(edx, Immediate(shift));
3418     }
3419 
3420     __ movl(eax, edx);
3421     __ shrl(edx, Immediate(31));
3422     __ addl(edx, eax);
3423 
3424     if (instruction->IsRem()) {
3425       __ movl(eax, numerator);
3426       __ imull(edx, Immediate(imm));
3427       __ subl(eax, edx);
3428       __ movl(edx, eax);
3429     } else {
3430       __ movl(eax, edx);
3431     }
3432     __ Bind(&end);
3433   } else {
3434     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3435 
3436     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3437 
3438     CpuRegister rax = eax;
3439     CpuRegister rdx = edx;
3440 
3441     CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
3442 
3443     // Save the numerator.
3444     __ movq(numerator, rax);
3445 
3446     // RAX = magic
3447     codegen_->Load64BitValue(rax, magic);
3448 
3449     // RDX:RAX = magic * numerator
3450     __ imulq(numerator);
3451 
3452     if (imm > 0 && magic < 0) {
3453       // RDX += numerator
3454       __ addq(rdx, numerator);
3455     } else if (imm < 0 && magic > 0) {
3456       // RDX -= numerator
3457       __ subq(rdx, numerator);
3458     }
3459 
3460     // Shift if needed.
3461     if (shift != 0) {
3462       __ sarq(rdx, Immediate(shift));
3463     }
3464 
3465     // RDX += 1 if RDX < 0
3466     __ movq(rax, rdx);
3467     __ shrq(rdx, Immediate(63));
3468     __ addq(rdx, rax);
3469 
3470     if (instruction->IsRem()) {
3471       __ movq(rax, numerator);
3472 
3473       if (IsInt<32>(imm)) {
3474         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3475       } else {
3476         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3477       }
3478 
3479       __ subq(rax, rdx);
3480       __ movq(rdx, rax);
3481     } else {
3482       __ movq(rax, rdx);
3483     }
3484   }
3485 }
3486 
GenerateDivRemIntegral(HBinaryOperation * instruction)3487 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3488   DCHECK(instruction->IsDiv() || instruction->IsRem());
3489   Primitive::Type type = instruction->GetResultType();
3490   DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
3491 
3492   bool is_div = instruction->IsDiv();
3493   LocationSummary* locations = instruction->GetLocations();
3494 
3495   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3496   Location second = locations->InAt(1);
3497 
3498   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3499   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3500 
3501   if (second.IsConstant()) {
3502     int64_t imm = Int64FromConstant(second.GetConstant());
3503 
3504     if (imm == 0) {
3505       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3506     } else if (imm == 1 || imm == -1) {
3507       DivRemOneOrMinusOne(instruction);
3508     } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
3509       DivByPowerOfTwo(instruction->AsDiv());
3510     } else {
3511       DCHECK(imm <= -2 || imm >= 2);
3512       GenerateDivRemWithAnyConstant(instruction);
3513     }
3514   } else {
3515     SlowPathCode* slow_path =
3516         new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
3517             instruction, out.AsRegister(), type, is_div);
3518     codegen_->AddSlowPath(slow_path);
3519 
3520     CpuRegister second_reg = second.AsRegister<CpuRegister>();
3521     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3522     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3523     // so it's safe to just use negl instead of more complex comparisons.
3524     if (type == Primitive::kPrimInt) {
3525       __ cmpl(second_reg, Immediate(-1));
3526       __ j(kEqual, slow_path->GetEntryLabel());
3527       // edx:eax <- sign-extended of eax
3528       __ cdq();
3529       // eax = quotient, edx = remainder
3530       __ idivl(second_reg);
3531     } else {
3532       __ cmpq(second_reg, Immediate(-1));
3533       __ j(kEqual, slow_path->GetEntryLabel());
3534       // rdx:rax <- sign-extended of rax
3535       __ cqo();
3536       // rax = quotient, rdx = remainder
3537       __ idivq(second_reg);
3538     }
3539     __ Bind(slow_path->GetExitLabel());
3540   }
3541 }
3542 
VisitDiv(HDiv * div)3543 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3544   LocationSummary* locations =
3545       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
3546   switch (div->GetResultType()) {
3547     case Primitive::kPrimInt:
3548     case Primitive::kPrimLong: {
3549       locations->SetInAt(0, Location::RegisterLocation(RAX));
3550       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3551       locations->SetOut(Location::SameAsFirstInput());
3552       // Intel uses edx:eax as the dividend.
3553       locations->AddTemp(Location::RegisterLocation(RDX));
3554       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3555       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3556       // output and request another temp.
3557       if (div->InputAt(1)->IsConstant()) {
3558         locations->AddTemp(Location::RequiresRegister());
3559       }
3560       break;
3561     }
3562 
3563     case Primitive::kPrimFloat:
3564     case Primitive::kPrimDouble: {
3565       locations->SetInAt(0, Location::RequiresFpuRegister());
3566       locations->SetInAt(1, Location::Any());
3567       locations->SetOut(Location::SameAsFirstInput());
3568       break;
3569     }
3570 
3571     default:
3572       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3573   }
3574 }
3575 
VisitDiv(HDiv * div)3576 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3577   LocationSummary* locations = div->GetLocations();
3578   Location first = locations->InAt(0);
3579   Location second = locations->InAt(1);
3580   DCHECK(first.Equals(locations->Out()));
3581 
3582   Primitive::Type type = div->GetResultType();
3583   switch (type) {
3584     case Primitive::kPrimInt:
3585     case Primitive::kPrimLong: {
3586       GenerateDivRemIntegral(div);
3587       break;
3588     }
3589 
3590     case Primitive::kPrimFloat: {
3591       if (second.IsFpuRegister()) {
3592         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3593       } else if (second.IsConstant()) {
3594         __ divss(first.AsFpuRegister<XmmRegister>(),
3595                  codegen_->LiteralFloatAddress(
3596                      second.GetConstant()->AsFloatConstant()->GetValue()));
3597       } else {
3598         DCHECK(second.IsStackSlot());
3599         __ divss(first.AsFpuRegister<XmmRegister>(),
3600                  Address(CpuRegister(RSP), second.GetStackIndex()));
3601       }
3602       break;
3603     }
3604 
3605     case Primitive::kPrimDouble: {
3606       if (second.IsFpuRegister()) {
3607         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3608       } else if (second.IsConstant()) {
3609         __ divsd(first.AsFpuRegister<XmmRegister>(),
3610                  codegen_->LiteralDoubleAddress(
3611                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3612       } else {
3613         DCHECK(second.IsDoubleStackSlot());
3614         __ divsd(first.AsFpuRegister<XmmRegister>(),
3615                  Address(CpuRegister(RSP), second.GetStackIndex()));
3616       }
3617       break;
3618     }
3619 
3620     default:
3621       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3622   }
3623 }
3624 
VisitRem(HRem * rem)3625 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
3626   Primitive::Type type = rem->GetResultType();
3627   LocationSummary* locations =
3628     new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
3629 
3630   switch (type) {
3631     case Primitive::kPrimInt:
3632     case Primitive::kPrimLong: {
3633       locations->SetInAt(0, Location::RegisterLocation(RAX));
3634       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3635       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
3636       locations->SetOut(Location::RegisterLocation(RDX));
3637       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3638       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
3639       // output and request another temp.
3640       if (rem->InputAt(1)->IsConstant()) {
3641         locations->AddTemp(Location::RequiresRegister());
3642       }
3643       break;
3644     }
3645 
3646     case Primitive::kPrimFloat:
3647     case Primitive::kPrimDouble: {
3648       locations->SetInAt(0, Location::Any());
3649       locations->SetInAt(1, Location::Any());
3650       locations->SetOut(Location::RequiresFpuRegister());
3651       locations->AddTemp(Location::RegisterLocation(RAX));
3652       break;
3653     }
3654 
3655     default:
3656       LOG(FATAL) << "Unexpected rem type " << type;
3657   }
3658 }
3659 
VisitRem(HRem * rem)3660 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
3661   Primitive::Type type = rem->GetResultType();
3662   switch (type) {
3663     case Primitive::kPrimInt:
3664     case Primitive::kPrimLong: {
3665       GenerateDivRemIntegral(rem);
3666       break;
3667     }
3668     case Primitive::kPrimFloat:
3669     case Primitive::kPrimDouble: {
3670       GenerateRemFP(rem);
3671       break;
3672     }
3673     default:
3674       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
3675   }
3676 }
3677 
VisitDivZeroCheck(HDivZeroCheck * instruction)3678 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3679   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
3680       ? LocationSummary::kCallOnSlowPath
3681       : LocationSummary::kNoCall;
3682   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
3683   locations->SetInAt(0, Location::Any());
3684   if (instruction->HasUses()) {
3685     locations->SetOut(Location::SameAsFirstInput());
3686   }
3687 }
3688 
VisitDivZeroCheck(HDivZeroCheck * instruction)3689 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3690   SlowPathCode* slow_path =
3691       new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction);
3692   codegen_->AddSlowPath(slow_path);
3693 
3694   LocationSummary* locations = instruction->GetLocations();
3695   Location value = locations->InAt(0);
3696 
3697   switch (instruction->GetType()) {
3698     case Primitive::kPrimBoolean:
3699     case Primitive::kPrimByte:
3700     case Primitive::kPrimChar:
3701     case Primitive::kPrimShort:
3702     case Primitive::kPrimInt: {
3703       if (value.IsRegister()) {
3704         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3705         __ j(kEqual, slow_path->GetEntryLabel());
3706       } else if (value.IsStackSlot()) {
3707         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3708         __ j(kEqual, slow_path->GetEntryLabel());
3709       } else {
3710         DCHECK(value.IsConstant()) << value;
3711         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3712         __ jmp(slow_path->GetEntryLabel());
3713         }
3714       }
3715       break;
3716     }
3717     case Primitive::kPrimLong: {
3718       if (value.IsRegister()) {
3719         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3720         __ j(kEqual, slow_path->GetEntryLabel());
3721       } else if (value.IsDoubleStackSlot()) {
3722         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3723         __ j(kEqual, slow_path->GetEntryLabel());
3724       } else {
3725         DCHECK(value.IsConstant()) << value;
3726         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3727         __ jmp(slow_path->GetEntryLabel());
3728         }
3729       }
3730       break;
3731     }
3732     default:
3733       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3734   }
3735 }
3736 
HandleShift(HBinaryOperation * op)3737 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
3738   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3739 
3740   LocationSummary* locations =
3741       new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
3742 
3743   switch (op->GetResultType()) {
3744     case Primitive::kPrimInt:
3745     case Primitive::kPrimLong: {
3746       locations->SetInAt(0, Location::RequiresRegister());
3747       // The shift count needs to be in CL.
3748       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
3749       locations->SetOut(Location::SameAsFirstInput());
3750       break;
3751     }
3752     default:
3753       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3754   }
3755 }
3756 
HandleShift(HBinaryOperation * op)3757 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
3758   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3759 
3760   LocationSummary* locations = op->GetLocations();
3761   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3762   Location second = locations->InAt(1);
3763 
3764   switch (op->GetResultType()) {
3765     case Primitive::kPrimInt: {
3766       if (second.IsRegister()) {
3767         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3768         if (op->IsShl()) {
3769           __ shll(first_reg, second_reg);
3770         } else if (op->IsShr()) {
3771           __ sarl(first_reg, second_reg);
3772         } else {
3773           __ shrl(first_reg, second_reg);
3774         }
3775       } else {
3776         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3777         if (op->IsShl()) {
3778           __ shll(first_reg, imm);
3779         } else if (op->IsShr()) {
3780           __ sarl(first_reg, imm);
3781         } else {
3782           __ shrl(first_reg, imm);
3783         }
3784       }
3785       break;
3786     }
3787     case Primitive::kPrimLong: {
3788       if (second.IsRegister()) {
3789         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3790         if (op->IsShl()) {
3791           __ shlq(first_reg, second_reg);
3792         } else if (op->IsShr()) {
3793           __ sarq(first_reg, second_reg);
3794         } else {
3795           __ shrq(first_reg, second_reg);
3796         }
3797       } else {
3798         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3799         if (op->IsShl()) {
3800           __ shlq(first_reg, imm);
3801         } else if (op->IsShr()) {
3802           __ sarq(first_reg, imm);
3803         } else {
3804           __ shrq(first_reg, imm);
3805         }
3806       }
3807       break;
3808     }
3809     default:
3810       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3811       UNREACHABLE();
3812   }
3813 }
3814 
VisitRor(HRor * ror)3815 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
3816   LocationSummary* locations =
3817       new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
3818 
3819   switch (ror->GetResultType()) {
3820     case Primitive::kPrimInt:
3821     case Primitive::kPrimLong: {
3822       locations->SetInAt(0, Location::RequiresRegister());
3823       // The shift count needs to be in CL (unless it is a constant).
3824       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
3825       locations->SetOut(Location::SameAsFirstInput());
3826       break;
3827     }
3828     default:
3829       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3830       UNREACHABLE();
3831   }
3832 }
3833 
VisitRor(HRor * ror)3834 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
3835   LocationSummary* locations = ror->GetLocations();
3836   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3837   Location second = locations->InAt(1);
3838 
3839   switch (ror->GetResultType()) {
3840     case Primitive::kPrimInt:
3841       if (second.IsRegister()) {
3842         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3843         __ rorl(first_reg, second_reg);
3844       } else {
3845         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3846         __ rorl(first_reg, imm);
3847       }
3848       break;
3849     case Primitive::kPrimLong:
3850       if (second.IsRegister()) {
3851         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3852         __ rorq(first_reg, second_reg);
3853       } else {
3854         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3855         __ rorq(first_reg, imm);
3856       }
3857       break;
3858     default:
3859       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3860       UNREACHABLE();
3861   }
3862 }
3863 
VisitShl(HShl * shl)3864 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
3865   HandleShift(shl);
3866 }
3867 
VisitShl(HShl * shl)3868 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
3869   HandleShift(shl);
3870 }
3871 
VisitShr(HShr * shr)3872 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
3873   HandleShift(shr);
3874 }
3875 
VisitShr(HShr * shr)3876 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
3877   HandleShift(shr);
3878 }
3879 
VisitUShr(HUShr * ushr)3880 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
3881   HandleShift(ushr);
3882 }
3883 
VisitUShr(HUShr * ushr)3884 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
3885   HandleShift(ushr);
3886 }
3887 
VisitNewInstance(HNewInstance * instruction)3888 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
3889   LocationSummary* locations =
3890       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
3891   InvokeRuntimeCallingConvention calling_convention;
3892   if (instruction->IsStringAlloc()) {
3893     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
3894   } else {
3895     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3896     locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
3897   }
3898   locations->SetOut(Location::RegisterLocation(RAX));
3899 }
3900 
VisitNewInstance(HNewInstance * instruction)3901 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
3902   // Note: if heap poisoning is enabled, the entry point takes cares
3903   // of poisoning the reference.
3904   if (instruction->IsStringAlloc()) {
3905     // String is allocated through StringFactory. Call NewEmptyString entry point.
3906     CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
3907     MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize);
3908     __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
3909     __ call(Address(temp, code_offset.SizeValue()));
3910     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
3911   } else {
3912     codegen_->InvokeRuntime(instruction->GetEntrypoint(),
3913                             instruction,
3914                             instruction->GetDexPc(),
3915                             nullptr);
3916     CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
3917     DCHECK(!codegen_->IsLeafMethod());
3918   }
3919 }
3920 
VisitNewArray(HNewArray * instruction)3921 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
3922   LocationSummary* locations =
3923       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
3924   InvokeRuntimeCallingConvention calling_convention;
3925   locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3926   locations->SetOut(Location::RegisterLocation(RAX));
3927   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
3928   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
3929 }
3930 
VisitNewArray(HNewArray * instruction)3931 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
3932   InvokeRuntimeCallingConvention calling_convention;
3933   codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
3934                            instruction->GetTypeIndex());
3935   // Note: if heap poisoning is enabled, the entry point takes cares
3936   // of poisoning the reference.
3937   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
3938                           instruction,
3939                           instruction->GetDexPc(),
3940                           nullptr);
3941   CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
3942 
3943   DCHECK(!codegen_->IsLeafMethod());
3944 }
3945 
VisitParameterValue(HParameterValue * instruction)3946 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
3947   LocationSummary* locations =
3948       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3949   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
3950   if (location.IsStackSlot()) {
3951     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
3952   } else if (location.IsDoubleStackSlot()) {
3953     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
3954   }
3955   locations->SetOut(location);
3956 }
3957 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)3958 void InstructionCodeGeneratorX86_64::VisitParameterValue(
3959     HParameterValue* instruction ATTRIBUTE_UNUSED) {
3960   // Nothing to do, the parameter is already at its location.
3961 }
3962 
VisitCurrentMethod(HCurrentMethod * instruction)3963 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
3964   LocationSummary* locations =
3965       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3966   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
3967 }
3968 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)3969 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
3970     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
3971   // Nothing to do, the method is already at its location.
3972 }
3973 
VisitClassTableGet(HClassTableGet * instruction)3974 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
3975   LocationSummary* locations =
3976       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3977   locations->SetInAt(0, Location::RequiresRegister());
3978   locations->SetOut(Location::RequiresRegister());
3979 }
3980 
VisitClassTableGet(HClassTableGet * instruction)3981 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
3982   LocationSummary* locations = instruction->GetLocations();
3983   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
3984     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
3985         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
3986     __ movq(locations->Out().AsRegister<CpuRegister>(),
3987             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
3988   } else {
3989     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3990         instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize));
3991     __ movq(locations->Out().AsRegister<CpuRegister>(),
3992             Address(locations->InAt(0).AsRegister<CpuRegister>(),
3993             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
3994     __ movq(locations->Out().AsRegister<CpuRegister>(),
3995             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
3996   }
3997 }
3998 
VisitNot(HNot * not_)3999 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4000   LocationSummary* locations =
4001       new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
4002   locations->SetInAt(0, Location::RequiresRegister());
4003   locations->SetOut(Location::SameAsFirstInput());
4004 }
4005 
VisitNot(HNot * not_)4006 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4007   LocationSummary* locations = not_->GetLocations();
4008   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4009             locations->Out().AsRegister<CpuRegister>().AsRegister());
4010   Location out = locations->Out();
4011   switch (not_->GetResultType()) {
4012     case Primitive::kPrimInt:
4013       __ notl(out.AsRegister<CpuRegister>());
4014       break;
4015 
4016     case Primitive::kPrimLong:
4017       __ notq(out.AsRegister<CpuRegister>());
4018       break;
4019 
4020     default:
4021       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4022   }
4023 }
4024 
VisitBooleanNot(HBooleanNot * bool_not)4025 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4026   LocationSummary* locations =
4027       new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
4028   locations->SetInAt(0, Location::RequiresRegister());
4029   locations->SetOut(Location::SameAsFirstInput());
4030 }
4031 
VisitBooleanNot(HBooleanNot * bool_not)4032 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4033   LocationSummary* locations = bool_not->GetLocations();
4034   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4035             locations->Out().AsRegister<CpuRegister>().AsRegister());
4036   Location out = locations->Out();
4037   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4038 }
4039 
VisitPhi(HPhi * instruction)4040 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4041   LocationSummary* locations =
4042       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4043   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
4044     locations->SetInAt(i, Location::Any());
4045   }
4046   locations->SetOut(Location::Any());
4047 }
4048 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4049 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4050   LOG(FATAL) << "Unimplemented";
4051 }
4052 
GenerateMemoryBarrier(MemBarrierKind kind)4053 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4054   /*
4055    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4056    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4057    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4058    */
4059   switch (kind) {
4060     case MemBarrierKind::kAnyAny: {
4061       MemoryFence();
4062       break;
4063     }
4064     case MemBarrierKind::kAnyStore:
4065     case MemBarrierKind::kLoadAny:
4066     case MemBarrierKind::kStoreStore: {
4067       // nop
4068       break;
4069     }
4070     default:
4071       LOG(FATAL) << "Unexpected memory barier " << kind;
4072   }
4073 }
4074 
HandleFieldGet(HInstruction * instruction)4075 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4076   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4077 
4078   bool object_field_get_with_read_barrier =
4079       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4080   LocationSummary* locations =
4081       new (GetGraph()->GetArena()) LocationSummary(instruction,
4082                                                    object_field_get_with_read_barrier ?
4083                                                        LocationSummary::kCallOnSlowPath :
4084                                                        LocationSummary::kNoCall);
4085   locations->SetInAt(0, Location::RequiresRegister());
4086   if (Primitive::IsFloatingPointType(instruction->GetType())) {
4087     locations->SetOut(Location::RequiresFpuRegister());
4088   } else {
4089     // The output overlaps for an object field get when read barriers
4090     // are enabled: we do not want the move to overwrite the object's
4091     // location, as we need it to emit the read barrier.
4092     locations->SetOut(
4093         Location::RequiresRegister(),
4094         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4095   }
4096   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4097     // We need a temporary register for the read barrier marking slow
4098     // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
4099     locations->AddTemp(Location::RequiresRegister());
4100   }
4101 }
4102 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4103 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4104                                                     const FieldInfo& field_info) {
4105   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4106 
4107   LocationSummary* locations = instruction->GetLocations();
4108   Location base_loc = locations->InAt(0);
4109   CpuRegister base = base_loc.AsRegister<CpuRegister>();
4110   Location out = locations->Out();
4111   bool is_volatile = field_info.IsVolatile();
4112   Primitive::Type field_type = field_info.GetFieldType();
4113   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4114 
4115   switch (field_type) {
4116     case Primitive::kPrimBoolean: {
4117       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4118       break;
4119     }
4120 
4121     case Primitive::kPrimByte: {
4122       __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4123       break;
4124     }
4125 
4126     case Primitive::kPrimShort: {
4127       __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4128       break;
4129     }
4130 
4131     case Primitive::kPrimChar: {
4132       __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4133       break;
4134     }
4135 
4136     case Primitive::kPrimInt: {
4137       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4138       break;
4139     }
4140 
4141     case Primitive::kPrimNot: {
4142       // /* HeapReference<Object> */ out = *(base + offset)
4143       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4144         Location temp_loc = locations->GetTemp(0);
4145         // Note that a potential implicit null check is handled in this
4146         // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
4147         codegen_->GenerateFieldLoadWithBakerReadBarrier(
4148             instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
4149         if (is_volatile) {
4150           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4151         }
4152       } else {
4153         __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4154         codegen_->MaybeRecordImplicitNullCheck(instruction);
4155         if (is_volatile) {
4156           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4157         }
4158         // If read barriers are enabled, emit read barriers other than
4159         // Baker's using a slow path (and also unpoison the loaded
4160         // reference, if heap poisoning is enabled).
4161         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4162       }
4163       break;
4164     }
4165 
4166     case Primitive::kPrimLong: {
4167       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4168       break;
4169     }
4170 
4171     case Primitive::kPrimFloat: {
4172       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4173       break;
4174     }
4175 
4176     case Primitive::kPrimDouble: {
4177       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4178       break;
4179     }
4180 
4181     case Primitive::kPrimVoid:
4182       LOG(FATAL) << "Unreachable type " << field_type;
4183       UNREACHABLE();
4184   }
4185 
4186   if (field_type == Primitive::kPrimNot) {
4187     // Potential implicit null checks, in the case of reference
4188     // fields, are handled in the previous switch statement.
4189   } else {
4190     codegen_->MaybeRecordImplicitNullCheck(instruction);
4191   }
4192 
4193   if (is_volatile) {
4194     if (field_type == Primitive::kPrimNot) {
4195       // Memory barriers, in the case of references, are also handled
4196       // in the previous switch statement.
4197     } else {
4198       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4199     }
4200   }
4201 }
4202 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4203 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4204                                             const FieldInfo& field_info) {
4205   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4206 
4207   LocationSummary* locations =
4208       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4209   Primitive::Type field_type = field_info.GetFieldType();
4210   bool is_volatile = field_info.IsVolatile();
4211   bool needs_write_barrier =
4212       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4213 
4214   locations->SetInAt(0, Location::RequiresRegister());
4215   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4216     if (is_volatile) {
4217       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4218       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4219     } else {
4220       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4221     }
4222   } else {
4223     if (is_volatile) {
4224       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4225       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4226     } else {
4227       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4228     }
4229   }
4230   if (needs_write_barrier) {
4231     // Temporary registers for the write barrier.
4232     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
4233     locations->AddTemp(Location::RequiresRegister());
4234   } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4235     // Temporary register for the reference poisoning.
4236     locations->AddTemp(Location::RequiresRegister());
4237   }
4238 }
4239 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4240 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4241                                                     const FieldInfo& field_info,
4242                                                     bool value_can_be_null) {
4243   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4244 
4245   LocationSummary* locations = instruction->GetLocations();
4246   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4247   Location value = locations->InAt(1);
4248   bool is_volatile = field_info.IsVolatile();
4249   Primitive::Type field_type = field_info.GetFieldType();
4250   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4251 
4252   if (is_volatile) {
4253     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4254   }
4255 
4256   bool maybe_record_implicit_null_check_done = false;
4257 
4258   switch (field_type) {
4259     case Primitive::kPrimBoolean:
4260     case Primitive::kPrimByte: {
4261       if (value.IsConstant()) {
4262         int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4263         __ movb(Address(base, offset), Immediate(v));
4264       } else {
4265         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4266       }
4267       break;
4268     }
4269 
4270     case Primitive::kPrimShort:
4271     case Primitive::kPrimChar: {
4272       if (value.IsConstant()) {
4273         int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4274         __ movw(Address(base, offset), Immediate(v));
4275       } else {
4276         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4277       }
4278       break;
4279     }
4280 
4281     case Primitive::kPrimInt:
4282     case Primitive::kPrimNot: {
4283       if (value.IsConstant()) {
4284         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4285         // `field_type == Primitive::kPrimNot` implies `v == 0`.
4286         DCHECK((field_type != Primitive::kPrimNot) || (v == 0));
4287         // Note: if heap poisoning is enabled, no need to poison
4288         // (negate) `v` if it is a reference, as it would be null.
4289         __ movl(Address(base, offset), Immediate(v));
4290       } else {
4291         if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4292           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4293           __ movl(temp, value.AsRegister<CpuRegister>());
4294           __ PoisonHeapReference(temp);
4295           __ movl(Address(base, offset), temp);
4296         } else {
4297           __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4298         }
4299       }
4300       break;
4301     }
4302 
4303     case Primitive::kPrimLong: {
4304       if (value.IsConstant()) {
4305         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4306         codegen_->MoveInt64ToAddress(Address(base, offset),
4307                                      Address(base, offset + sizeof(int32_t)),
4308                                      v,
4309                                      instruction);
4310         maybe_record_implicit_null_check_done = true;
4311       } else {
4312         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4313       }
4314       break;
4315     }
4316 
4317     case Primitive::kPrimFloat: {
4318       if (value.IsConstant()) {
4319         int32_t v =
4320             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4321         __ movl(Address(base, offset), Immediate(v));
4322       } else {
4323         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4324       }
4325       break;
4326     }
4327 
4328     case Primitive::kPrimDouble: {
4329       if (value.IsConstant()) {
4330         int64_t v =
4331             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4332         codegen_->MoveInt64ToAddress(Address(base, offset),
4333                                      Address(base, offset + sizeof(int32_t)),
4334                                      v,
4335                                      instruction);
4336         maybe_record_implicit_null_check_done = true;
4337       } else {
4338         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4339       }
4340       break;
4341     }
4342 
4343     case Primitive::kPrimVoid:
4344       LOG(FATAL) << "Unreachable type " << field_type;
4345       UNREACHABLE();
4346   }
4347 
4348   if (!maybe_record_implicit_null_check_done) {
4349     codegen_->MaybeRecordImplicitNullCheck(instruction);
4350   }
4351 
4352   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4353     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4354     CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4355     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4356   }
4357 
4358   if (is_volatile) {
4359     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4360   }
4361 }
4362 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4363 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4364   HandleFieldSet(instruction, instruction->GetFieldInfo());
4365 }
4366 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4367 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4368   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4369 }
4370 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4371 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4372   HandleFieldGet(instruction);
4373 }
4374 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4375 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4376   HandleFieldGet(instruction, instruction->GetFieldInfo());
4377 }
4378 
VisitStaticFieldGet(HStaticFieldGet * instruction)4379 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4380   HandleFieldGet(instruction);
4381 }
4382 
VisitStaticFieldGet(HStaticFieldGet * instruction)4383 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4384   HandleFieldGet(instruction, instruction->GetFieldInfo());
4385 }
4386 
VisitStaticFieldSet(HStaticFieldSet * instruction)4387 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4388   HandleFieldSet(instruction, instruction->GetFieldInfo());
4389 }
4390 
VisitStaticFieldSet(HStaticFieldSet * instruction)4391 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4392   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4393 }
4394 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4395 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4396     HUnresolvedInstanceFieldGet* instruction) {
4397   FieldAccessCallingConventionX86_64 calling_convention;
4398   codegen_->CreateUnresolvedFieldLocationSummary(
4399       instruction, instruction->GetFieldType(), calling_convention);
4400 }
4401 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4402 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4403     HUnresolvedInstanceFieldGet* instruction) {
4404   FieldAccessCallingConventionX86_64 calling_convention;
4405   codegen_->GenerateUnresolvedFieldAccess(instruction,
4406                                           instruction->GetFieldType(),
4407                                           instruction->GetFieldIndex(),
4408                                           instruction->GetDexPc(),
4409                                           calling_convention);
4410 }
4411 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4412 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
4413     HUnresolvedInstanceFieldSet* instruction) {
4414   FieldAccessCallingConventionX86_64 calling_convention;
4415   codegen_->CreateUnresolvedFieldLocationSummary(
4416       instruction, instruction->GetFieldType(), calling_convention);
4417 }
4418 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4419 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
4420     HUnresolvedInstanceFieldSet* instruction) {
4421   FieldAccessCallingConventionX86_64 calling_convention;
4422   codegen_->GenerateUnresolvedFieldAccess(instruction,
4423                                           instruction->GetFieldType(),
4424                                           instruction->GetFieldIndex(),
4425                                           instruction->GetDexPc(),
4426                                           calling_convention);
4427 }
4428 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4429 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
4430     HUnresolvedStaticFieldGet* instruction) {
4431   FieldAccessCallingConventionX86_64 calling_convention;
4432   codegen_->CreateUnresolvedFieldLocationSummary(
4433       instruction, instruction->GetFieldType(), calling_convention);
4434 }
4435 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4436 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
4437     HUnresolvedStaticFieldGet* instruction) {
4438   FieldAccessCallingConventionX86_64 calling_convention;
4439   codegen_->GenerateUnresolvedFieldAccess(instruction,
4440                                           instruction->GetFieldType(),
4441                                           instruction->GetFieldIndex(),
4442                                           instruction->GetDexPc(),
4443                                           calling_convention);
4444 }
4445 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4446 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
4447     HUnresolvedStaticFieldSet* instruction) {
4448   FieldAccessCallingConventionX86_64 calling_convention;
4449   codegen_->CreateUnresolvedFieldLocationSummary(
4450       instruction, instruction->GetFieldType(), calling_convention);
4451 }
4452 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4453 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
4454     HUnresolvedStaticFieldSet* instruction) {
4455   FieldAccessCallingConventionX86_64 calling_convention;
4456   codegen_->GenerateUnresolvedFieldAccess(instruction,
4457                                           instruction->GetFieldType(),
4458                                           instruction->GetFieldIndex(),
4459                                           instruction->GetDexPc(),
4460                                           calling_convention);
4461 }
4462 
VisitNullCheck(HNullCheck * instruction)4463 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
4464   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
4465       ? LocationSummary::kCallOnSlowPath
4466       : LocationSummary::kNoCall;
4467   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4468   Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
4469       ? Location::RequiresRegister()
4470       : Location::Any();
4471   locations->SetInAt(0, loc);
4472   if (instruction->HasUses()) {
4473     locations->SetOut(Location::SameAsFirstInput());
4474   }
4475 }
4476 
GenerateImplicitNullCheck(HNullCheck * instruction)4477 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
4478   if (CanMoveNullCheckToUser(instruction)) {
4479     return;
4480   }
4481   LocationSummary* locations = instruction->GetLocations();
4482   Location obj = locations->InAt(0);
4483 
4484   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
4485   RecordPcInfo(instruction, instruction->GetDexPc());
4486 }
4487 
GenerateExplicitNullCheck(HNullCheck * instruction)4488 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
4489   SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
4490   AddSlowPath(slow_path);
4491 
4492   LocationSummary* locations = instruction->GetLocations();
4493   Location obj = locations->InAt(0);
4494 
4495   if (obj.IsRegister()) {
4496     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
4497   } else if (obj.IsStackSlot()) {
4498     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
4499   } else {
4500     DCHECK(obj.IsConstant()) << obj;
4501     DCHECK(obj.GetConstant()->IsNullConstant());
4502     __ jmp(slow_path->GetEntryLabel());
4503     return;
4504   }
4505   __ j(kEqual, slow_path->GetEntryLabel());
4506 }
4507 
VisitNullCheck(HNullCheck * instruction)4508 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
4509   codegen_->GenerateNullCheck(instruction);
4510 }
4511 
VisitArrayGet(HArrayGet * instruction)4512 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
4513   bool object_array_get_with_read_barrier =
4514       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4515   LocationSummary* locations =
4516       new (GetGraph()->GetArena()) LocationSummary(instruction,
4517                                                    object_array_get_with_read_barrier ?
4518                                                        LocationSummary::kCallOnSlowPath :
4519                                                        LocationSummary::kNoCall);
4520   locations->SetInAt(0, Location::RequiresRegister());
4521   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4522   if (Primitive::IsFloatingPointType(instruction->GetType())) {
4523     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4524   } else {
4525     // The output overlaps for an object array get when read barriers
4526     // are enabled: we do not want the move to overwrite the array's
4527     // location, as we need it to emit the read barrier.
4528     locations->SetOut(
4529         Location::RequiresRegister(),
4530         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4531   }
4532   // We need a temporary register for the read barrier marking slow
4533   // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
4534   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
4535     locations->AddTemp(Location::RequiresRegister());
4536   }
4537 }
4538 
VisitArrayGet(HArrayGet * instruction)4539 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
4540   LocationSummary* locations = instruction->GetLocations();
4541   Location obj_loc = locations->InAt(0);
4542   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
4543   Location index = locations->InAt(1);
4544   Location out_loc = locations->Out();
4545 
4546   Primitive::Type type = instruction->GetType();
4547   switch (type) {
4548     case Primitive::kPrimBoolean: {
4549       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4550       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4551       if (index.IsConstant()) {
4552         __ movzxb(out, Address(obj,
4553             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
4554       } else {
4555         __ movzxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
4556       }
4557       break;
4558     }
4559 
4560     case Primitive::kPrimByte: {
4561       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
4562       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4563       if (index.IsConstant()) {
4564         __ movsxb(out, Address(obj,
4565             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
4566       } else {
4567         __ movsxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
4568       }
4569       break;
4570     }
4571 
4572     case Primitive::kPrimShort: {
4573       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
4574       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4575       if (index.IsConstant()) {
4576         __ movsxw(out, Address(obj,
4577             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
4578       } else {
4579         __ movsxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
4580       }
4581       break;
4582     }
4583 
4584     case Primitive::kPrimChar: {
4585       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4586       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4587       if (index.IsConstant()) {
4588         __ movzxw(out, Address(obj,
4589             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
4590       } else {
4591         __ movzxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
4592       }
4593       break;
4594     }
4595 
4596     case Primitive::kPrimInt: {
4597       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4598       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4599       if (index.IsConstant()) {
4600         __ movl(out, Address(obj,
4601             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
4602       } else {
4603         __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4604       }
4605       break;
4606     }
4607 
4608     case Primitive::kPrimNot: {
4609       static_assert(
4610           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
4611           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
4612       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4613       // /* HeapReference<Object> */ out =
4614       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
4615       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4616         Location temp = locations->GetTemp(0);
4617         // Note that a potential implicit null check is handled in this
4618         // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
4619         codegen_->GenerateArrayLoadWithBakerReadBarrier(
4620             instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
4621       } else {
4622         CpuRegister out = out_loc.AsRegister<CpuRegister>();
4623         if (index.IsConstant()) {
4624           uint32_t offset =
4625               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
4626           __ movl(out, Address(obj, offset));
4627           codegen_->MaybeRecordImplicitNullCheck(instruction);
4628           // If read barriers are enabled, emit read barriers other than
4629           // Baker's using a slow path (and also unpoison the loaded
4630           // reference, if heap poisoning is enabled).
4631           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
4632         } else {
4633           __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4634           codegen_->MaybeRecordImplicitNullCheck(instruction);
4635           // If read barriers are enabled, emit read barriers other than
4636           // Baker's using a slow path (and also unpoison the loaded
4637           // reference, if heap poisoning is enabled).
4638           codegen_->MaybeGenerateReadBarrierSlow(
4639               instruction, out_loc, out_loc, obj_loc, data_offset, index);
4640         }
4641       }
4642       break;
4643     }
4644 
4645     case Primitive::kPrimLong: {
4646       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4647       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4648       if (index.IsConstant()) {
4649         __ movq(out, Address(obj,
4650             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
4651       } else {
4652         __ movq(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
4653       }
4654       break;
4655     }
4656 
4657     case Primitive::kPrimFloat: {
4658       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4659       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4660       if (index.IsConstant()) {
4661         __ movss(out, Address(obj,
4662             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
4663       } else {
4664         __ movss(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4665       }
4666       break;
4667     }
4668 
4669     case Primitive::kPrimDouble: {
4670       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
4671       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4672       if (index.IsConstant()) {
4673         __ movsd(out, Address(obj,
4674             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
4675       } else {
4676         __ movsd(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
4677       }
4678       break;
4679     }
4680 
4681     case Primitive::kPrimVoid:
4682       LOG(FATAL) << "Unreachable type " << type;
4683       UNREACHABLE();
4684   }
4685 
4686   if (type == Primitive::kPrimNot) {
4687     // Potential implicit null checks, in the case of reference
4688     // arrays, are handled in the previous switch statement.
4689   } else {
4690     codegen_->MaybeRecordImplicitNullCheck(instruction);
4691   }
4692 }
4693 
VisitArraySet(HArraySet * instruction)4694 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
4695   Primitive::Type value_type = instruction->GetComponentType();
4696 
4697   bool needs_write_barrier =
4698       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4699   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4700   bool object_array_set_with_read_barrier =
4701       kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
4702 
4703   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
4704       instruction,
4705       (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
4706           LocationSummary::kCallOnSlowPath :
4707           LocationSummary::kNoCall);
4708 
4709   locations->SetInAt(0, Location::RequiresRegister());
4710   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4711   if (Primitive::IsFloatingPointType(value_type)) {
4712     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
4713   } else {
4714     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
4715   }
4716 
4717   if (needs_write_barrier) {
4718     // Temporary registers for the write barrier.
4719 
4720     // This first temporary register is possibly used for heap
4721     // reference poisoning and/or read barrier emission too.
4722     locations->AddTemp(Location::RequiresRegister());
4723     locations->AddTemp(Location::RequiresRegister());
4724   }
4725 }
4726 
VisitArraySet(HArraySet * instruction)4727 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
4728   LocationSummary* locations = instruction->GetLocations();
4729   Location array_loc = locations->InAt(0);
4730   CpuRegister array = array_loc.AsRegister<CpuRegister>();
4731   Location index = locations->InAt(1);
4732   Location value = locations->InAt(2);
4733   Primitive::Type value_type = instruction->GetComponentType();
4734   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4735   bool needs_write_barrier =
4736       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4737   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4738   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4739   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4740 
4741   switch (value_type) {
4742     case Primitive::kPrimBoolean:
4743     case Primitive::kPrimByte: {
4744       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4745       Address address = index.IsConstant()
4746           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
4747           : Address(array, index.AsRegister<CpuRegister>(), TIMES_1, offset);
4748       if (value.IsRegister()) {
4749         __ movb(address, value.AsRegister<CpuRegister>());
4750       } else {
4751         __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4752       }
4753       codegen_->MaybeRecordImplicitNullCheck(instruction);
4754       break;
4755     }
4756 
4757     case Primitive::kPrimShort:
4758     case Primitive::kPrimChar: {
4759       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4760       Address address = index.IsConstant()
4761           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
4762           : Address(array, index.AsRegister<CpuRegister>(), TIMES_2, offset);
4763       if (value.IsRegister()) {
4764         __ movw(address, value.AsRegister<CpuRegister>());
4765       } else {
4766         DCHECK(value.IsConstant()) << value;
4767         __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4768       }
4769       codegen_->MaybeRecordImplicitNullCheck(instruction);
4770       break;
4771     }
4772 
4773     case Primitive::kPrimNot: {
4774       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4775       Address address = index.IsConstant()
4776           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4777           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4778 
4779       if (!value.IsRegister()) {
4780         // Just setting null.
4781         DCHECK(instruction->InputAt(2)->IsNullConstant());
4782         DCHECK(value.IsConstant()) << value;
4783         __ movl(address, Immediate(0));
4784         codegen_->MaybeRecordImplicitNullCheck(instruction);
4785         DCHECK(!needs_write_barrier);
4786         DCHECK(!may_need_runtime_call_for_type_check);
4787         break;
4788       }
4789 
4790       DCHECK(needs_write_barrier);
4791       CpuRegister register_value = value.AsRegister<CpuRegister>();
4792       NearLabel done, not_null, do_put;
4793       SlowPathCode* slow_path = nullptr;
4794       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4795       if (may_need_runtime_call_for_type_check) {
4796         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
4797         codegen_->AddSlowPath(slow_path);
4798         if (instruction->GetValueCanBeNull()) {
4799           __ testl(register_value, register_value);
4800           __ j(kNotEqual, &not_null);
4801           __ movl(address, Immediate(0));
4802           codegen_->MaybeRecordImplicitNullCheck(instruction);
4803           __ jmp(&done);
4804           __ Bind(&not_null);
4805         }
4806 
4807         if (kEmitCompilerReadBarrier) {
4808           // When read barriers are enabled, the type checking
4809           // instrumentation requires two read barriers:
4810           //
4811           //   __ movl(temp2, temp);
4812           //   // /* HeapReference<Class> */ temp = temp->component_type_
4813           //   __ movl(temp, Address(temp, component_offset));
4814           //   codegen_->GenerateReadBarrierSlow(
4815           //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
4816           //
4817           //   // /* HeapReference<Class> */ temp2 = register_value->klass_
4818           //   __ movl(temp2, Address(register_value, class_offset));
4819           //   codegen_->GenerateReadBarrierSlow(
4820           //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
4821           //
4822           //   __ cmpl(temp, temp2);
4823           //
4824           // However, the second read barrier may trash `temp`, as it
4825           // is a temporary register, and as such would not be saved
4826           // along with live registers before calling the runtime (nor
4827           // restored afterwards).  So in this case, we bail out and
4828           // delegate the work to the array set slow path.
4829           //
4830           // TODO: Extend the register allocator to support a new
4831           // "(locally) live temp" location so as to avoid always
4832           // going into the slow path when read barriers are enabled.
4833           __ jmp(slow_path->GetEntryLabel());
4834         } else {
4835           // /* HeapReference<Class> */ temp = array->klass_
4836           __ movl(temp, Address(array, class_offset));
4837           codegen_->MaybeRecordImplicitNullCheck(instruction);
4838           __ MaybeUnpoisonHeapReference(temp);
4839 
4840           // /* HeapReference<Class> */ temp = temp->component_type_
4841           __ movl(temp, Address(temp, component_offset));
4842           // If heap poisoning is enabled, no need to unpoison `temp`
4843           // nor the object reference in `register_value->klass`, as
4844           // we are comparing two poisoned references.
4845           __ cmpl(temp, Address(register_value, class_offset));
4846 
4847           if (instruction->StaticTypeOfArrayIsObjectArray()) {
4848             __ j(kEqual, &do_put);
4849             // If heap poisoning is enabled, the `temp` reference has
4850             // not been unpoisoned yet; unpoison it now.
4851             __ MaybeUnpoisonHeapReference(temp);
4852 
4853             // /* HeapReference<Class> */ temp = temp->super_class_
4854             __ movl(temp, Address(temp, super_offset));
4855             // If heap poisoning is enabled, no need to unpoison
4856             // `temp`, as we are comparing against null below.
4857             __ testl(temp, temp);
4858             __ j(kNotEqual, slow_path->GetEntryLabel());
4859             __ Bind(&do_put);
4860           } else {
4861             __ j(kNotEqual, slow_path->GetEntryLabel());
4862           }
4863         }
4864       }
4865 
4866       if (kPoisonHeapReferences) {
4867         __ movl(temp, register_value);
4868         __ PoisonHeapReference(temp);
4869         __ movl(address, temp);
4870       } else {
4871         __ movl(address, register_value);
4872       }
4873       if (!may_need_runtime_call_for_type_check) {
4874         codegen_->MaybeRecordImplicitNullCheck(instruction);
4875       }
4876 
4877       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4878       codegen_->MarkGCCard(
4879           temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
4880       __ Bind(&done);
4881 
4882       if (slow_path != nullptr) {
4883         __ Bind(slow_path->GetExitLabel());
4884       }
4885 
4886       break;
4887     }
4888 
4889     case Primitive::kPrimInt: {
4890       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4891       Address address = index.IsConstant()
4892           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4893           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4894       if (value.IsRegister()) {
4895         __ movl(address, value.AsRegister<CpuRegister>());
4896       } else {
4897         DCHECK(value.IsConstant()) << value;
4898         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4899         __ movl(address, Immediate(v));
4900       }
4901       codegen_->MaybeRecordImplicitNullCheck(instruction);
4902       break;
4903     }
4904 
4905     case Primitive::kPrimLong: {
4906       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4907       Address address = index.IsConstant()
4908           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
4909           : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
4910       if (value.IsRegister()) {
4911         __ movq(address, value.AsRegister<CpuRegister>());
4912         codegen_->MaybeRecordImplicitNullCheck(instruction);
4913       } else {
4914         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4915         Address address_high = index.IsConstant()
4916             ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
4917                 offset + sizeof(int32_t))
4918             : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
4919         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4920       }
4921       break;
4922     }
4923 
4924     case Primitive::kPrimFloat: {
4925       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4926       Address address = index.IsConstant()
4927           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4928           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4929       if (value.IsFpuRegister()) {
4930         __ movss(address, value.AsFpuRegister<XmmRegister>());
4931       } else {
4932         DCHECK(value.IsConstant());
4933         int32_t v =
4934             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4935         __ movl(address, Immediate(v));
4936       }
4937       codegen_->MaybeRecordImplicitNullCheck(instruction);
4938       break;
4939     }
4940 
4941     case Primitive::kPrimDouble: {
4942       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
4943       Address address = index.IsConstant()
4944           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
4945           : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
4946       if (value.IsFpuRegister()) {
4947         __ movsd(address, value.AsFpuRegister<XmmRegister>());
4948         codegen_->MaybeRecordImplicitNullCheck(instruction);
4949       } else {
4950         int64_t v =
4951             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4952         Address address_high = index.IsConstant()
4953             ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
4954                 offset + sizeof(int32_t))
4955             : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
4956         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4957       }
4958       break;
4959     }
4960 
4961     case Primitive::kPrimVoid:
4962       LOG(FATAL) << "Unreachable type " << instruction->GetType();
4963       UNREACHABLE();
4964   }
4965 }
4966 
VisitArrayLength(HArrayLength * instruction)4967 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
4968   LocationSummary* locations =
4969       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4970   locations->SetInAt(0, Location::RequiresRegister());
4971   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4972 }
4973 
VisitArrayLength(HArrayLength * instruction)4974 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
4975   LocationSummary* locations = instruction->GetLocations();
4976   uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
4977   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
4978   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4979   __ movl(out, Address(obj, offset));
4980   codegen_->MaybeRecordImplicitNullCheck(instruction);
4981 }
4982 
VisitBoundsCheck(HBoundsCheck * instruction)4983 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
4984   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
4985       ? LocationSummary::kCallOnSlowPath
4986       : LocationSummary::kNoCall;
4987   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4988   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4989   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4990   if (instruction->HasUses()) {
4991     locations->SetOut(Location::SameAsFirstInput());
4992   }
4993 }
4994 
VisitBoundsCheck(HBoundsCheck * instruction)4995 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
4996   LocationSummary* locations = instruction->GetLocations();
4997   Location index_loc = locations->InAt(0);
4998   Location length_loc = locations->InAt(1);
4999   SlowPathCode* slow_path =
5000       new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
5001 
5002   if (length_loc.IsConstant()) {
5003     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5004     if (index_loc.IsConstant()) {
5005       // BCE will remove the bounds check if we are guarenteed to pass.
5006       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5007       if (index < 0 || index >= length) {
5008         codegen_->AddSlowPath(slow_path);
5009         __ jmp(slow_path->GetEntryLabel());
5010       } else {
5011         // Some optimization after BCE may have generated this, and we should not
5012         // generate a bounds check if it is a valid range.
5013       }
5014       return;
5015     }
5016 
5017     // We have to reverse the jump condition because the length is the constant.
5018     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5019     __ cmpl(index_reg, Immediate(length));
5020     codegen_->AddSlowPath(slow_path);
5021     __ j(kAboveEqual, slow_path->GetEntryLabel());
5022   } else {
5023     CpuRegister length = length_loc.AsRegister<CpuRegister>();
5024     if (index_loc.IsConstant()) {
5025       int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5026       __ cmpl(length, Immediate(value));
5027     } else {
5028       __ cmpl(length, index_loc.AsRegister<CpuRegister>());
5029     }
5030     codegen_->AddSlowPath(slow_path);
5031     __ j(kBelowEqual, slow_path->GetEntryLabel());
5032   }
5033 }
5034 
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5035 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5036                                      CpuRegister card,
5037                                      CpuRegister object,
5038                                      CpuRegister value,
5039                                      bool value_can_be_null) {
5040   NearLabel is_null;
5041   if (value_can_be_null) {
5042     __ testl(value, value);
5043     __ j(kEqual, &is_null);
5044   }
5045   __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
5046                                         /* no_rip */ true));
5047   __ movq(temp, object);
5048   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5049   __ movb(Address(temp, card, TIMES_1, 0), card);
5050   if (value_can_be_null) {
5051     __ Bind(&is_null);
5052   }
5053 }
5054 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5055 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5056   LOG(FATAL) << "Unimplemented";
5057 }
5058 
VisitParallelMove(HParallelMove * instruction)5059 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5060   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5061 }
5062 
VisitSuspendCheck(HSuspendCheck * instruction)5063 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5064   new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5065 }
5066 
VisitSuspendCheck(HSuspendCheck * instruction)5067 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5068   HBasicBlock* block = instruction->GetBlock();
5069   if (block->GetLoopInformation() != nullptr) {
5070     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5071     // The back edge will generate the suspend check.
5072     return;
5073   }
5074   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5075     // The goto will generate the suspend check.
5076     return;
5077   }
5078   GenerateSuspendCheck(instruction, nullptr);
5079 }
5080 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5081 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5082                                                           HBasicBlock* successor) {
5083   SuspendCheckSlowPathX86_64* slow_path =
5084       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5085   if (slow_path == nullptr) {
5086     slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
5087     instruction->SetSlowPath(slow_path);
5088     codegen_->AddSlowPath(slow_path);
5089     if (successor != nullptr) {
5090       DCHECK(successor->IsLoopHeader());
5091       codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
5092     }
5093   } else {
5094     DCHECK_EQ(slow_path->GetSuccessor(), successor);
5095   }
5096 
5097   __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
5098                                   /* no_rip */ true),
5099                 Immediate(0));
5100   if (successor == nullptr) {
5101     __ j(kNotEqual, slow_path->GetEntryLabel());
5102     __ Bind(slow_path->GetReturnLabel());
5103   } else {
5104     __ j(kEqual, codegen_->GetLabelOf(successor));
5105     __ jmp(slow_path->GetEntryLabel());
5106   }
5107 }
5108 
GetAssembler() const5109 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5110   return codegen_->GetAssembler();
5111 }
5112 
EmitMove(size_t index)5113 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5114   MoveOperands* move = moves_[index];
5115   Location source = move->GetSource();
5116   Location destination = move->GetDestination();
5117 
5118   if (source.IsRegister()) {
5119     if (destination.IsRegister()) {
5120       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5121     } else if (destination.IsStackSlot()) {
5122       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5123               source.AsRegister<CpuRegister>());
5124     } else {
5125       DCHECK(destination.IsDoubleStackSlot());
5126       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5127               source.AsRegister<CpuRegister>());
5128     }
5129   } else if (source.IsStackSlot()) {
5130     if (destination.IsRegister()) {
5131       __ movl(destination.AsRegister<CpuRegister>(),
5132               Address(CpuRegister(RSP), source.GetStackIndex()));
5133     } else if (destination.IsFpuRegister()) {
5134       __ movss(destination.AsFpuRegister<XmmRegister>(),
5135               Address(CpuRegister(RSP), source.GetStackIndex()));
5136     } else {
5137       DCHECK(destination.IsStackSlot());
5138       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5139       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5140     }
5141   } else if (source.IsDoubleStackSlot()) {
5142     if (destination.IsRegister()) {
5143       __ movq(destination.AsRegister<CpuRegister>(),
5144               Address(CpuRegister(RSP), source.GetStackIndex()));
5145     } else if (destination.IsFpuRegister()) {
5146       __ movsd(destination.AsFpuRegister<XmmRegister>(),
5147                Address(CpuRegister(RSP), source.GetStackIndex()));
5148     } else {
5149       DCHECK(destination.IsDoubleStackSlot()) << destination;
5150       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5151       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5152     }
5153   } else if (source.IsConstant()) {
5154     HConstant* constant = source.GetConstant();
5155     if (constant->IsIntConstant() || constant->IsNullConstant()) {
5156       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5157       if (destination.IsRegister()) {
5158         if (value == 0) {
5159           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5160         } else {
5161           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5162         }
5163       } else {
5164         DCHECK(destination.IsStackSlot()) << destination;
5165         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5166       }
5167     } else if (constant->IsLongConstant()) {
5168       int64_t value = constant->AsLongConstant()->GetValue();
5169       if (destination.IsRegister()) {
5170         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5171       } else {
5172         DCHECK(destination.IsDoubleStackSlot()) << destination;
5173         codegen_->Store64BitValueToStack(destination, value);
5174       }
5175     } else if (constant->IsFloatConstant()) {
5176       float fp_value = constant->AsFloatConstant()->GetValue();
5177       if (destination.IsFpuRegister()) {
5178         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5179         codegen_->Load32BitValue(dest, fp_value);
5180       } else {
5181         DCHECK(destination.IsStackSlot()) << destination;
5182         Immediate imm(bit_cast<int32_t, float>(fp_value));
5183         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5184       }
5185     } else {
5186       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5187       double fp_value =  constant->AsDoubleConstant()->GetValue();
5188       int64_t value = bit_cast<int64_t, double>(fp_value);
5189       if (destination.IsFpuRegister()) {
5190         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5191         codegen_->Load64BitValue(dest, fp_value);
5192       } else {
5193         DCHECK(destination.IsDoubleStackSlot()) << destination;
5194         codegen_->Store64BitValueToStack(destination, value);
5195       }
5196     }
5197   } else if (source.IsFpuRegister()) {
5198     if (destination.IsFpuRegister()) {
5199       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5200     } else if (destination.IsStackSlot()) {
5201       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5202                source.AsFpuRegister<XmmRegister>());
5203     } else {
5204       DCHECK(destination.IsDoubleStackSlot()) << destination;
5205       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5206                source.AsFpuRegister<XmmRegister>());
5207     }
5208   }
5209 }
5210 
Exchange32(CpuRegister reg,int mem)5211 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5212   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5213   __ movl(Address(CpuRegister(RSP), mem), reg);
5214   __ movl(reg, CpuRegister(TMP));
5215 }
5216 
Exchange32(int mem1,int mem2)5217 void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
5218   ScratchRegisterScope ensure_scratch(
5219       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5220 
5221   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5222   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5223   __ movl(CpuRegister(ensure_scratch.GetRegister()),
5224           Address(CpuRegister(RSP), mem2 + stack_offset));
5225   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5226   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5227           CpuRegister(ensure_scratch.GetRegister()));
5228 }
5229 
Exchange64(CpuRegister reg1,CpuRegister reg2)5230 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5231   __ movq(CpuRegister(TMP), reg1);
5232   __ movq(reg1, reg2);
5233   __ movq(reg2, CpuRegister(TMP));
5234 }
5235 
Exchange64(CpuRegister reg,int mem)5236 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5237   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5238   __ movq(Address(CpuRegister(RSP), mem), reg);
5239   __ movq(reg, CpuRegister(TMP));
5240 }
5241 
Exchange64(int mem1,int mem2)5242 void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
5243   ScratchRegisterScope ensure_scratch(
5244       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5245 
5246   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5247   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5248   __ movq(CpuRegister(ensure_scratch.GetRegister()),
5249           Address(CpuRegister(RSP), mem2 + stack_offset));
5250   __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5251   __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5252           CpuRegister(ensure_scratch.GetRegister()));
5253 }
5254 
Exchange32(XmmRegister reg,int mem)5255 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5256   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5257   __ movss(Address(CpuRegister(RSP), mem), reg);
5258   __ movd(reg, CpuRegister(TMP));
5259 }
5260 
Exchange64(XmmRegister reg,int mem)5261 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5262   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5263   __ movsd(Address(CpuRegister(RSP), mem), reg);
5264   __ movd(reg, CpuRegister(TMP));
5265 }
5266 
EmitSwap(size_t index)5267 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5268   MoveOperands* move = moves_[index];
5269   Location source = move->GetSource();
5270   Location destination = move->GetDestination();
5271 
5272   if (source.IsRegister() && destination.IsRegister()) {
5273     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5274   } else if (source.IsRegister() && destination.IsStackSlot()) {
5275     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5276   } else if (source.IsStackSlot() && destination.IsRegister()) {
5277     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5278   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5279     Exchange32(destination.GetStackIndex(), source.GetStackIndex());
5280   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5281     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5282   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5283     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5284   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5285     Exchange64(destination.GetStackIndex(), source.GetStackIndex());
5286   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5287     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5288     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5289     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5290   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5291     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5292   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5293     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5294   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5295     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5296   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5297     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5298   } else {
5299     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5300   }
5301 }
5302 
5303 
SpillScratch(int reg)5304 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5305   __ pushq(CpuRegister(reg));
5306 }
5307 
5308 
RestoreScratch(int reg)5309 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5310   __ popq(CpuRegister(reg));
5311 }
5312 
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)5313 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5314     SlowPathCode* slow_path, CpuRegister class_reg) {
5315   __ cmpl(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
5316           Immediate(mirror::Class::kStatusInitialized));
5317   __ j(kLess, slow_path->GetEntryLabel());
5318   __ Bind(slow_path->GetExitLabel());
5319   // No need for memory fence, thanks to the x86-64 memory model.
5320 }
5321 
VisitLoadClass(HLoadClass * cls)5322 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5323   InvokeRuntimeCallingConvention calling_convention;
5324   CodeGenerator::CreateLoadClassLocationSummary(
5325       cls,
5326       Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
5327       Location::RegisterLocation(RAX),
5328       /* code_generator_supports_read_barrier */ true);
5329 }
5330 
VisitLoadClass(HLoadClass * cls)5331 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
5332   LocationSummary* locations = cls->GetLocations();
5333   if (cls->NeedsAccessCheck()) {
5334     codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
5335     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
5336                             cls,
5337                             cls->GetDexPc(),
5338                             nullptr);
5339     CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
5340     return;
5341   }
5342 
5343   Location out_loc = locations->Out();
5344   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5345   CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5346 
5347   if (cls->IsReferrersClass()) {
5348     DCHECK(!cls->CanCallRuntime());
5349     DCHECK(!cls->MustGenerateClinitCheck());
5350     // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5351     GenerateGcRootFieldLoad(
5352         cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
5353   } else {
5354     // /* GcRoot<mirror::Class>[] */ out =
5355     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
5356     __ movq(out, Address(current_method,
5357                          ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
5358     // /* GcRoot<mirror::Class> */ out = out[type_index]
5359     GenerateGcRootFieldLoad(
5360         cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
5361 
5362     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
5363       DCHECK(cls->CanCallRuntime());
5364       SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5365           cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
5366       codegen_->AddSlowPath(slow_path);
5367       if (!cls->IsInDexCache()) {
5368         __ testl(out, out);
5369         __ j(kEqual, slow_path->GetEntryLabel());
5370       }
5371       if (cls->MustGenerateClinitCheck()) {
5372         GenerateClassInitializationCheck(slow_path, out);
5373       } else {
5374         __ Bind(slow_path->GetExitLabel());
5375       }
5376     }
5377   }
5378 }
5379 
VisitClinitCheck(HClinitCheck * check)5380 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
5381   LocationSummary* locations =
5382       new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
5383   locations->SetInAt(0, Location::RequiresRegister());
5384   if (check->HasUses()) {
5385     locations->SetOut(Location::SameAsFirstInput());
5386   }
5387 }
5388 
VisitClinitCheck(HClinitCheck * check)5389 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
5390   // We assume the class to not be null.
5391   SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5392       check->GetLoadClass(), check, check->GetDexPc(), true);
5393   codegen_->AddSlowPath(slow_path);
5394   GenerateClassInitializationCheck(slow_path,
5395                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
5396 }
5397 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5398 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
5399     HLoadString::LoadKind desired_string_load_kind) {
5400   if (kEmitCompilerReadBarrier) {
5401     switch (desired_string_load_kind) {
5402       case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5403       case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5404       case HLoadString::LoadKind::kBootImageAddress:
5405         // TODO: Implement for read barrier.
5406         return HLoadString::LoadKind::kDexCacheViaMethod;
5407       default:
5408         break;
5409     }
5410   }
5411   switch (desired_string_load_kind) {
5412     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5413       DCHECK(!GetCompilerOptions().GetCompilePic());
5414       // We prefer the always-available RIP-relative address for the x86-64 boot image.
5415       return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
5416     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5417       DCHECK(GetCompilerOptions().GetCompilePic());
5418       break;
5419     case HLoadString::LoadKind::kBootImageAddress:
5420       break;
5421     case HLoadString::LoadKind::kDexCacheAddress:
5422       DCHECK(Runtime::Current()->UseJitCompilation());
5423       break;
5424     case HLoadString::LoadKind::kDexCachePcRelative:
5425       DCHECK(!Runtime::Current()->UseJitCompilation());
5426       break;
5427     case HLoadString::LoadKind::kDexCacheViaMethod:
5428       break;
5429   }
5430   return desired_string_load_kind;
5431 }
5432 
VisitLoadString(HLoadString * load)5433 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
5434   LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
5435       ? LocationSummary::kCallOnSlowPath
5436       : LocationSummary::kNoCall;
5437   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
5438   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
5439     locations->SetInAt(0, Location::RequiresRegister());
5440   }
5441   locations->SetOut(Location::RequiresRegister());
5442 }
5443 
VisitLoadString(HLoadString * load)5444 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
5445   LocationSummary* locations = load->GetLocations();
5446   Location out_loc = locations->Out();
5447   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5448 
5449   switch (load->GetLoadKind()) {
5450     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5451       DCHECK(!kEmitCompilerReadBarrier);
5452       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5453       codegen_->RecordStringPatch(load);
5454       return;  // No dex cache slow path.
5455     }
5456     case HLoadString::LoadKind::kBootImageAddress: {
5457       DCHECK(!kEmitCompilerReadBarrier);
5458       DCHECK_NE(load->GetAddress(), 0u);
5459       uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
5460       __ movl(out, Immediate(address));  // Zero-extended.
5461       codegen_->RecordSimplePatch();
5462       return;  // No dex cache slow path.
5463     }
5464     case HLoadString::LoadKind::kDexCacheAddress: {
5465       DCHECK_NE(load->GetAddress(), 0u);
5466       if (IsUint<32>(load->GetAddress())) {
5467         Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
5468         GenerateGcRootFieldLoad(load, out_loc, address);
5469       } else {
5470         // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
5471         __ movq(out, Immediate(load->GetAddress()));
5472         GenerateGcRootFieldLoad(load, out_loc, Address(out, 0));
5473       }
5474       break;
5475     }
5476     case HLoadString::LoadKind::kDexCachePcRelative: {
5477       uint32_t offset = load->GetDexCacheElementOffset();
5478       Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
5479       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5480                                           /* no_rip */ false);
5481       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
5482       break;
5483     }
5484     case HLoadString::LoadKind::kDexCacheViaMethod: {
5485       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5486 
5487       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5488       GenerateGcRootFieldLoad(
5489           load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
5490       // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
5491       __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
5492       // /* GcRoot<mirror::String> */ out = out[string_index]
5493       GenerateGcRootFieldLoad(
5494           load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
5495       break;
5496     }
5497     default:
5498       LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
5499       UNREACHABLE();
5500   }
5501 
5502   if (!load->IsInDexCache()) {
5503     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
5504     codegen_->AddSlowPath(slow_path);
5505     __ testl(out, out);
5506     __ j(kEqual, slow_path->GetEntryLabel());
5507     __ Bind(slow_path->GetExitLabel());
5508   }
5509 }
5510 
GetExceptionTlsAddress()5511 static Address GetExceptionTlsAddress() {
5512   return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
5513                            /* no_rip */ true);
5514 }
5515 
VisitLoadException(HLoadException * load)5516 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
5517   LocationSummary* locations =
5518       new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
5519   locations->SetOut(Location::RequiresRegister());
5520 }
5521 
VisitLoadException(HLoadException * load)5522 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
5523   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
5524 }
5525 
VisitClearException(HClearException * clear)5526 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
5527   new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
5528 }
5529 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5530 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5531   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
5532 }
5533 
VisitThrow(HThrow * instruction)5534 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
5535   LocationSummary* locations =
5536       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
5537   InvokeRuntimeCallingConvention calling_convention;
5538   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5539 }
5540 
VisitThrow(HThrow * instruction)5541 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
5542   codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
5543                           instruction,
5544                           instruction->GetDexPc(),
5545                           nullptr);
5546   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5547 }
5548 
TypeCheckNeedsATemporary(TypeCheckKind type_check_kind)5549 static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5550   return kEmitCompilerReadBarrier &&
5551       (kUseBakerReadBarrier ||
5552        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5553        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5554        type_check_kind == TypeCheckKind::kArrayObjectCheck);
5555 }
5556 
VisitInstanceOf(HInstanceOf * instruction)5557 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5558   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5559   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5560   switch (type_check_kind) {
5561     case TypeCheckKind::kExactCheck:
5562     case TypeCheckKind::kAbstractClassCheck:
5563     case TypeCheckKind::kClassHierarchyCheck:
5564     case TypeCheckKind::kArrayObjectCheck:
5565       call_kind =
5566           kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
5567       break;
5568     case TypeCheckKind::kArrayCheck:
5569     case TypeCheckKind::kUnresolvedCheck:
5570     case TypeCheckKind::kInterfaceCheck:
5571       call_kind = LocationSummary::kCallOnSlowPath;
5572       break;
5573   }
5574 
5575   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5576   locations->SetInAt(0, Location::RequiresRegister());
5577   locations->SetInAt(1, Location::Any());
5578   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
5579   locations->SetOut(Location::RequiresRegister());
5580   // When read barriers are enabled, we need a temporary register for
5581   // some cases.
5582   if (TypeCheckNeedsATemporary(type_check_kind)) {
5583     locations->AddTemp(Location::RequiresRegister());
5584   }
5585 }
5586 
VisitInstanceOf(HInstanceOf * instruction)5587 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5588   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5589   LocationSummary* locations = instruction->GetLocations();
5590   Location obj_loc = locations->InAt(0);
5591   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5592   Location cls = locations->InAt(1);
5593   Location out_loc =  locations->Out();
5594   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5595   Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
5596       locations->GetTemp(0) :
5597       Location::NoLocation();
5598   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5599   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5600   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5601   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5602   SlowPathCode* slow_path = nullptr;
5603   NearLabel done, zero;
5604 
5605   // Return 0 if `obj` is null.
5606   // Avoid null check if we know obj is not null.
5607   if (instruction->MustDoNullCheck()) {
5608     __ testl(obj, obj);
5609     __ j(kEqual, &zero);
5610   }
5611 
5612   // /* HeapReference<Class> */ out = obj->klass_
5613   GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
5614 
5615   switch (type_check_kind) {
5616     case TypeCheckKind::kExactCheck: {
5617       if (cls.IsRegister()) {
5618         __ cmpl(out, cls.AsRegister<CpuRegister>());
5619       } else {
5620         DCHECK(cls.IsStackSlot()) << cls;
5621         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5622       }
5623       if (zero.IsLinked()) {
5624         // Classes must be equal for the instanceof to succeed.
5625         __ j(kNotEqual, &zero);
5626         __ movl(out, Immediate(1));
5627         __ jmp(&done);
5628       } else {
5629         __ setcc(kEqual, out);
5630         // setcc only sets the low byte.
5631         __ andl(out, Immediate(1));
5632       }
5633       break;
5634     }
5635 
5636     case TypeCheckKind::kAbstractClassCheck: {
5637       // If the class is abstract, we eagerly fetch the super class of the
5638       // object to avoid doing a comparison we know will fail.
5639       NearLabel loop, success;
5640       __ Bind(&loop);
5641       // /* HeapReference<Class> */ out = out->super_class_
5642       GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
5643       __ testl(out, out);
5644       // If `out` is null, we use it for the result, and jump to `done`.
5645       __ j(kEqual, &done);
5646       if (cls.IsRegister()) {
5647         __ cmpl(out, cls.AsRegister<CpuRegister>());
5648       } else {
5649         DCHECK(cls.IsStackSlot()) << cls;
5650         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5651       }
5652       __ j(kNotEqual, &loop);
5653       __ movl(out, Immediate(1));
5654       if (zero.IsLinked()) {
5655         __ jmp(&done);
5656       }
5657       break;
5658     }
5659 
5660     case TypeCheckKind::kClassHierarchyCheck: {
5661       // Walk over the class hierarchy to find a match.
5662       NearLabel loop, success;
5663       __ Bind(&loop);
5664       if (cls.IsRegister()) {
5665         __ cmpl(out, cls.AsRegister<CpuRegister>());
5666       } else {
5667         DCHECK(cls.IsStackSlot()) << cls;
5668         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5669       }
5670       __ j(kEqual, &success);
5671       // /* HeapReference<Class> */ out = out->super_class_
5672       GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
5673       __ testl(out, out);
5674       __ j(kNotEqual, &loop);
5675       // If `out` is null, we use it for the result, and jump to `done`.
5676       __ jmp(&done);
5677       __ Bind(&success);
5678       __ movl(out, Immediate(1));
5679       if (zero.IsLinked()) {
5680         __ jmp(&done);
5681       }
5682       break;
5683     }
5684 
5685     case TypeCheckKind::kArrayObjectCheck: {
5686       // Do an exact check.
5687       NearLabel exact_check;
5688       if (cls.IsRegister()) {
5689         __ cmpl(out, cls.AsRegister<CpuRegister>());
5690       } else {
5691         DCHECK(cls.IsStackSlot()) << cls;
5692         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5693       }
5694       __ j(kEqual, &exact_check);
5695       // Otherwise, we need to check that the object's class is a non-primitive array.
5696       // /* HeapReference<Class> */ out = out->component_type_
5697       GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
5698       __ testl(out, out);
5699       // If `out` is null, we use it for the result, and jump to `done`.
5700       __ j(kEqual, &done);
5701       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
5702       __ j(kNotEqual, &zero);
5703       __ Bind(&exact_check);
5704       __ movl(out, Immediate(1));
5705       __ jmp(&done);
5706       break;
5707     }
5708 
5709     case TypeCheckKind::kArrayCheck: {
5710       if (cls.IsRegister()) {
5711         __ cmpl(out, cls.AsRegister<CpuRegister>());
5712       } else {
5713         DCHECK(cls.IsStackSlot()) << cls;
5714         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5715       }
5716       DCHECK(locations->OnlyCallsOnSlowPath());
5717       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5718                                                                        /* is_fatal */ false);
5719       codegen_->AddSlowPath(slow_path);
5720       __ j(kNotEqual, slow_path->GetEntryLabel());
5721       __ movl(out, Immediate(1));
5722       if (zero.IsLinked()) {
5723         __ jmp(&done);
5724       }
5725       break;
5726     }
5727 
5728     case TypeCheckKind::kUnresolvedCheck:
5729     case TypeCheckKind::kInterfaceCheck: {
5730       // Note that we indeed only call on slow path, but we always go
5731       // into the slow path for the unresolved and interface check
5732       // cases.
5733       //
5734       // We cannot directly call the InstanceofNonTrivial runtime
5735       // entry point without resorting to a type checking slow path
5736       // here (i.e. by calling InvokeRuntime directly), as it would
5737       // require to assign fixed registers for the inputs of this
5738       // HInstanceOf instruction (following the runtime calling
5739       // convention), which might be cluttered by the potential first
5740       // read barrier emission at the beginning of this method.
5741       //
5742       // TODO: Introduce a new runtime entry point taking the object
5743       // to test (instead of its class) as argument, and let it deal
5744       // with the read barrier issues. This will let us refactor this
5745       // case of the `switch` code as it was previously (with a direct
5746       // call to the runtime not using a type checking slow path).
5747       // This should also be beneficial for the other cases above.
5748       DCHECK(locations->OnlyCallsOnSlowPath());
5749       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5750                                                                        /* is_fatal */ false);
5751       codegen_->AddSlowPath(slow_path);
5752       __ jmp(slow_path->GetEntryLabel());
5753       if (zero.IsLinked()) {
5754         __ jmp(&done);
5755       }
5756       break;
5757     }
5758   }
5759 
5760   if (zero.IsLinked()) {
5761     __ Bind(&zero);
5762     __ xorl(out, out);
5763   }
5764 
5765   if (done.IsLinked()) {
5766     __ Bind(&done);
5767   }
5768 
5769   if (slow_path != nullptr) {
5770     __ Bind(slow_path->GetExitLabel());
5771   }
5772 }
5773 
VisitCheckCast(HCheckCast * instruction)5774 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
5775   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5776   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
5777   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5778   switch (type_check_kind) {
5779     case TypeCheckKind::kExactCheck:
5780     case TypeCheckKind::kAbstractClassCheck:
5781     case TypeCheckKind::kClassHierarchyCheck:
5782     case TypeCheckKind::kArrayObjectCheck:
5783       call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
5784           LocationSummary::kCallOnSlowPath :
5785           LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
5786       break;
5787     case TypeCheckKind::kArrayCheck:
5788     case TypeCheckKind::kUnresolvedCheck:
5789     case TypeCheckKind::kInterfaceCheck:
5790       call_kind = LocationSummary::kCallOnSlowPath;
5791       break;
5792   }
5793   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5794   locations->SetInAt(0, Location::RequiresRegister());
5795   locations->SetInAt(1, Location::Any());
5796   // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
5797   locations->AddTemp(Location::RequiresRegister());
5798   // When read barriers are enabled, we need an additional temporary
5799   // register for some cases.
5800   if (TypeCheckNeedsATemporary(type_check_kind)) {
5801     locations->AddTemp(Location::RequiresRegister());
5802   }
5803 }
5804 
VisitCheckCast(HCheckCast * instruction)5805 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
5806   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5807   LocationSummary* locations = instruction->GetLocations();
5808   Location obj_loc = locations->InAt(0);
5809   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5810   Location cls = locations->InAt(1);
5811   Location temp_loc = locations->GetTemp(0);
5812   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5813   Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
5814       locations->GetTemp(1) :
5815       Location::NoLocation();
5816   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5817   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5818   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5819   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5820 
5821   bool is_type_check_slow_path_fatal =
5822       (type_check_kind == TypeCheckKind::kExactCheck ||
5823        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5824        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5825        type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
5826       !instruction->CanThrowIntoCatchBlock();
5827   SlowPathCode* type_check_slow_path =
5828       new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5829                                                            is_type_check_slow_path_fatal);
5830   codegen_->AddSlowPath(type_check_slow_path);
5831 
5832   switch (type_check_kind) {
5833     case TypeCheckKind::kExactCheck:
5834     case TypeCheckKind::kArrayCheck: {
5835       NearLabel done;
5836       // Avoid null check if we know obj is not null.
5837       if (instruction->MustDoNullCheck()) {
5838         __ testl(obj, obj);
5839         __ j(kEqual, &done);
5840       }
5841 
5842       // /* HeapReference<Class> */ temp = obj->klass_
5843       GenerateReferenceLoadTwoRegisters(
5844           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5845 
5846       if (cls.IsRegister()) {
5847         __ cmpl(temp, cls.AsRegister<CpuRegister>());
5848       } else {
5849         DCHECK(cls.IsStackSlot()) << cls;
5850         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5851       }
5852       // Jump to slow path for throwing the exception or doing a
5853       // more involved array check.
5854       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
5855       __ Bind(&done);
5856       break;
5857     }
5858 
5859     case TypeCheckKind::kAbstractClassCheck: {
5860       NearLabel done;
5861       // Avoid null check if we know obj is not null.
5862       if (instruction->MustDoNullCheck()) {
5863         __ testl(obj, obj);
5864         __ j(kEqual, &done);
5865       }
5866 
5867       // /* HeapReference<Class> */ temp = obj->klass_
5868       GenerateReferenceLoadTwoRegisters(
5869           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5870 
5871       // If the class is abstract, we eagerly fetch the super class of the
5872       // object to avoid doing a comparison we know will fail.
5873       NearLabel loop, compare_classes;
5874       __ Bind(&loop);
5875       // /* HeapReference<Class> */ temp = temp->super_class_
5876       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
5877 
5878       // If the class reference currently in `temp` is not null, jump
5879       // to the `compare_classes` label to compare it with the checked
5880       // class.
5881       __ testl(temp, temp);
5882       __ j(kNotEqual, &compare_classes);
5883       // Otherwise, jump to the slow path to throw the exception.
5884       //
5885       // But before, move back the object's class into `temp` before
5886       // going into the slow path, as it has been overwritten in the
5887       // meantime.
5888       // /* HeapReference<Class> */ temp = obj->klass_
5889       GenerateReferenceLoadTwoRegisters(
5890           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5891       __ jmp(type_check_slow_path->GetEntryLabel());
5892 
5893       __ Bind(&compare_classes);
5894       if (cls.IsRegister()) {
5895         __ cmpl(temp, cls.AsRegister<CpuRegister>());
5896       } else {
5897         DCHECK(cls.IsStackSlot()) << cls;
5898         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5899       }
5900       __ j(kNotEqual, &loop);
5901       __ Bind(&done);
5902       break;
5903     }
5904 
5905     case TypeCheckKind::kClassHierarchyCheck: {
5906       NearLabel done;
5907       // Avoid null check if we know obj is not null.
5908       if (instruction->MustDoNullCheck()) {
5909         __ testl(obj, obj);
5910         __ j(kEqual, &done);
5911       }
5912 
5913       // /* HeapReference<Class> */ temp = obj->klass_
5914       GenerateReferenceLoadTwoRegisters(
5915           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5916 
5917       // Walk over the class hierarchy to find a match.
5918       NearLabel loop;
5919       __ Bind(&loop);
5920       if (cls.IsRegister()) {
5921         __ cmpl(temp, cls.AsRegister<CpuRegister>());
5922       } else {
5923         DCHECK(cls.IsStackSlot()) << cls;
5924         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5925       }
5926       __ j(kEqual, &done);
5927 
5928       // /* HeapReference<Class> */ temp = temp->super_class_
5929       GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
5930 
5931       // If the class reference currently in `temp` is not null, jump
5932       // back at the beginning of the loop.
5933       __ testl(temp, temp);
5934       __ j(kNotEqual, &loop);
5935       // Otherwise, jump to the slow path to throw the exception.
5936       //
5937       // But before, move back the object's class into `temp` before
5938       // going into the slow path, as it has been overwritten in the
5939       // meantime.
5940       // /* HeapReference<Class> */ temp = obj->klass_
5941       GenerateReferenceLoadTwoRegisters(
5942           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5943       __ jmp(type_check_slow_path->GetEntryLabel());
5944       __ Bind(&done);
5945       break;
5946     }
5947 
5948     case TypeCheckKind::kArrayObjectCheck: {
5949       // We cannot use a NearLabel here, as its range might be too
5950       // short in some cases when read barriers are enabled.  This has
5951       // been observed for instance when the code emitted for this
5952       // case uses high x86-64 registers (R8-R15).
5953       Label done;
5954       // Avoid null check if we know obj is not null.
5955       if (instruction->MustDoNullCheck()) {
5956         __ testl(obj, obj);
5957         __ j(kEqual, &done);
5958       }
5959 
5960       // /* HeapReference<Class> */ temp = obj->klass_
5961       GenerateReferenceLoadTwoRegisters(
5962           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5963 
5964       // Do an exact check.
5965       NearLabel check_non_primitive_component_type;
5966       if (cls.IsRegister()) {
5967         __ cmpl(temp, cls.AsRegister<CpuRegister>());
5968       } else {
5969         DCHECK(cls.IsStackSlot()) << cls;
5970         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5971       }
5972       __ j(kEqual, &done);
5973 
5974       // Otherwise, we need to check that the object's class is a non-primitive array.
5975       // /* HeapReference<Class> */ temp = temp->component_type_
5976       GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
5977 
5978       // If the component type is not null (i.e. the object is indeed
5979       // an array), jump to label `check_non_primitive_component_type`
5980       // to further check that this component type is not a primitive
5981       // type.
5982       __ testl(temp, temp);
5983       __ j(kNotEqual, &check_non_primitive_component_type);
5984       // Otherwise, jump to the slow path to throw the exception.
5985       //
5986       // But before, move back the object's class into `temp` before
5987       // going into the slow path, as it has been overwritten in the
5988       // meantime.
5989       // /* HeapReference<Class> */ temp = obj->klass_
5990       GenerateReferenceLoadTwoRegisters(
5991           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5992       __ jmp(type_check_slow_path->GetEntryLabel());
5993 
5994       __ Bind(&check_non_primitive_component_type);
5995       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
5996       __ j(kEqual, &done);
5997       // Same comment as above regarding `temp` and the slow path.
5998       // /* HeapReference<Class> */ temp = obj->klass_
5999       GenerateReferenceLoadTwoRegisters(
6000           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
6001       __ jmp(type_check_slow_path->GetEntryLabel());
6002       __ Bind(&done);
6003       break;
6004     }
6005 
6006     case TypeCheckKind::kUnresolvedCheck:
6007     case TypeCheckKind::kInterfaceCheck:
6008       NearLabel done;
6009       // Avoid null check if we know obj is not null.
6010       if (instruction->MustDoNullCheck()) {
6011         __ testl(obj, obj);
6012         __ j(kEqual, &done);
6013       }
6014 
6015       // /* HeapReference<Class> */ temp = obj->klass_
6016       GenerateReferenceLoadTwoRegisters(
6017           instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
6018 
6019       // We always go into the type check slow path for the unresolved
6020       // and interface check cases.
6021       //
6022       // We cannot directly call the CheckCast runtime entry point
6023       // without resorting to a type checking slow path here (i.e. by
6024       // calling InvokeRuntime directly), as it would require to
6025       // assign fixed registers for the inputs of this HInstanceOf
6026       // instruction (following the runtime calling convention), which
6027       // might be cluttered by the potential first read barrier
6028       // emission at the beginning of this method.
6029       //
6030       // TODO: Introduce a new runtime entry point taking the object
6031       // to test (instead of its class) as argument, and let it deal
6032       // with the read barrier issues. This will let us refactor this
6033       // case of the `switch` code as it was previously (with a direct
6034       // call to the runtime not using a type checking slow path).
6035       // This should also be beneficial for the other cases above.
6036       __ jmp(type_check_slow_path->GetEntryLabel());
6037       __ Bind(&done);
6038       break;
6039   }
6040 
6041   __ Bind(type_check_slow_path->GetExitLabel());
6042 }
6043 
VisitMonitorOperation(HMonitorOperation * instruction)6044 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6045   LocationSummary* locations =
6046       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
6047   InvokeRuntimeCallingConvention calling_convention;
6048   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6049 }
6050 
VisitMonitorOperation(HMonitorOperation * instruction)6051 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6052   codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject)
6053                                                  : QUICK_ENTRY_POINT(pUnlockObject),
6054                           instruction,
6055                           instruction->GetDexPc(),
6056                           nullptr);
6057   if (instruction->IsEnter()) {
6058     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6059   } else {
6060     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6061   }
6062 }
6063 
VisitAnd(HAnd * instruction)6064 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6065 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6066 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6067 
HandleBitwiseOperation(HBinaryOperation * instruction)6068 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6069   LocationSummary* locations =
6070       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6071   DCHECK(instruction->GetResultType() == Primitive::kPrimInt
6072          || instruction->GetResultType() == Primitive::kPrimLong);
6073   locations->SetInAt(0, Location::RequiresRegister());
6074   locations->SetInAt(1, Location::Any());
6075   locations->SetOut(Location::SameAsFirstInput());
6076 }
6077 
VisitAnd(HAnd * instruction)6078 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6079   HandleBitwiseOperation(instruction);
6080 }
6081 
VisitOr(HOr * instruction)6082 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6083   HandleBitwiseOperation(instruction);
6084 }
6085 
VisitXor(HXor * instruction)6086 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6087   HandleBitwiseOperation(instruction);
6088 }
6089 
HandleBitwiseOperation(HBinaryOperation * instruction)6090 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6091   LocationSummary* locations = instruction->GetLocations();
6092   Location first = locations->InAt(0);
6093   Location second = locations->InAt(1);
6094   DCHECK(first.Equals(locations->Out()));
6095 
6096   if (instruction->GetResultType() == Primitive::kPrimInt) {
6097     if (second.IsRegister()) {
6098       if (instruction->IsAnd()) {
6099         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6100       } else if (instruction->IsOr()) {
6101         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6102       } else {
6103         DCHECK(instruction->IsXor());
6104         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6105       }
6106     } else if (second.IsConstant()) {
6107       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6108       if (instruction->IsAnd()) {
6109         __ andl(first.AsRegister<CpuRegister>(), imm);
6110       } else if (instruction->IsOr()) {
6111         __ orl(first.AsRegister<CpuRegister>(), imm);
6112       } else {
6113         DCHECK(instruction->IsXor());
6114         __ xorl(first.AsRegister<CpuRegister>(), imm);
6115       }
6116     } else {
6117       Address address(CpuRegister(RSP), second.GetStackIndex());
6118       if (instruction->IsAnd()) {
6119         __ andl(first.AsRegister<CpuRegister>(), address);
6120       } else if (instruction->IsOr()) {
6121         __ orl(first.AsRegister<CpuRegister>(), address);
6122       } else {
6123         DCHECK(instruction->IsXor());
6124         __ xorl(first.AsRegister<CpuRegister>(), address);
6125       }
6126     }
6127   } else {
6128     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
6129     CpuRegister first_reg = first.AsRegister<CpuRegister>();
6130     bool second_is_constant = false;
6131     int64_t value = 0;
6132     if (second.IsConstant()) {
6133       second_is_constant = true;
6134       value = second.GetConstant()->AsLongConstant()->GetValue();
6135     }
6136     bool is_int32_value = IsInt<32>(value);
6137 
6138     if (instruction->IsAnd()) {
6139       if (second_is_constant) {
6140         if (is_int32_value) {
6141           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
6142         } else {
6143           __ andq(first_reg, codegen_->LiteralInt64Address(value));
6144         }
6145       } else if (second.IsDoubleStackSlot()) {
6146         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6147       } else {
6148         __ andq(first_reg, second.AsRegister<CpuRegister>());
6149       }
6150     } else if (instruction->IsOr()) {
6151       if (second_is_constant) {
6152         if (is_int32_value) {
6153           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
6154         } else {
6155           __ orq(first_reg, codegen_->LiteralInt64Address(value));
6156         }
6157       } else if (second.IsDoubleStackSlot()) {
6158         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6159       } else {
6160         __ orq(first_reg, second.AsRegister<CpuRegister>());
6161       }
6162     } else {
6163       DCHECK(instruction->IsXor());
6164       if (second_is_constant) {
6165         if (is_int32_value) {
6166           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
6167         } else {
6168           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
6169         }
6170       } else if (second.IsDoubleStackSlot()) {
6171         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6172       } else {
6173         __ xorq(first_reg, second.AsRegister<CpuRegister>());
6174       }
6175     }
6176   }
6177 }
6178 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp)6179 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
6180                                                                       Location out,
6181                                                                       uint32_t offset,
6182                                                                       Location maybe_temp) {
6183   CpuRegister out_reg = out.AsRegister<CpuRegister>();
6184   if (kEmitCompilerReadBarrier) {
6185     DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6186     if (kUseBakerReadBarrier) {
6187       // Load with fast path based Baker's read barrier.
6188       // /* HeapReference<Object> */ out = *(out + offset)
6189       codegen_->GenerateFieldLoadWithBakerReadBarrier(
6190           instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
6191     } else {
6192       // Load with slow path based read barrier.
6193       // Save the value of `out` into `maybe_temp` before overwriting it
6194       // in the following move operation, as we will need it for the
6195       // read barrier below.
6196       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
6197       // /* HeapReference<Object> */ out = *(out + offset)
6198       __ movl(out_reg, Address(out_reg, offset));
6199       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6200     }
6201   } else {
6202     // Plain load with no read barrier.
6203     // /* HeapReference<Object> */ out = *(out + offset)
6204     __ movl(out_reg, Address(out_reg, offset));
6205     __ MaybeUnpoisonHeapReference(out_reg);
6206   }
6207 }
6208 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp)6209 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
6210                                                                        Location out,
6211                                                                        Location obj,
6212                                                                        uint32_t offset,
6213                                                                        Location maybe_temp) {
6214   CpuRegister out_reg = out.AsRegister<CpuRegister>();
6215   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
6216   if (kEmitCompilerReadBarrier) {
6217     if (kUseBakerReadBarrier) {
6218       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6219       // Load with fast path based Baker's read barrier.
6220       // /* HeapReference<Object> */ out = *(obj + offset)
6221       codegen_->GenerateFieldLoadWithBakerReadBarrier(
6222           instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
6223     } else {
6224       // Load with slow path based read barrier.
6225       // /* HeapReference<Object> */ out = *(obj + offset)
6226       __ movl(out_reg, Address(obj_reg, offset));
6227       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6228     }
6229   } else {
6230     // Plain load with no read barrier.
6231     // /* HeapReference<Object> */ out = *(obj + offset)
6232     __ movl(out_reg, Address(obj_reg, offset));
6233     __ MaybeUnpoisonHeapReference(out_reg);
6234   }
6235 }
6236 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label)6237 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
6238                                                              Location root,
6239                                                              const Address& address,
6240                                                              Label* fixup_label) {
6241   CpuRegister root_reg = root.AsRegister<CpuRegister>();
6242   if (kEmitCompilerReadBarrier) {
6243     if (kUseBakerReadBarrier) {
6244       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6245       // Baker's read barrier are used:
6246       //
6247       //   root = *address;
6248       //   if (Thread::Current()->GetIsGcMarking()) {
6249       //     root = ReadBarrier::Mark(root)
6250       //   }
6251 
6252       // /* GcRoot<mirror::Object> */ root = *address
6253       __ movl(root_reg, address);
6254       if (fixup_label != nullptr) {
6255         __ Bind(fixup_label);
6256       }
6257       static_assert(
6258           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6259           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6260           "have different sizes.");
6261       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6262                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
6263                     "have different sizes.");
6264 
6265       // Slow path used to mark the GC root `root`.
6266       SlowPathCode* slow_path =
6267           new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
6268       codegen_->AddSlowPath(slow_path);
6269 
6270       __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
6271                                       /* no_rip */ true),
6272                     Immediate(0));
6273       __ j(kNotEqual, slow_path->GetEntryLabel());
6274       __ Bind(slow_path->GetExitLabel());
6275     } else {
6276       // GC root loaded through a slow path for read barriers other
6277       // than Baker's.
6278       // /* GcRoot<mirror::Object>* */ root = address
6279       __ leaq(root_reg, address);
6280       if (fixup_label != nullptr) {
6281         __ Bind(fixup_label);
6282       }
6283       // /* mirror::Object* */ root = root->Read()
6284       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6285     }
6286   } else {
6287     // Plain GC root load with no read barrier.
6288     // /* GcRoot<mirror::Object> */ root = *address
6289     __ movl(root_reg, address);
6290     if (fixup_label != nullptr) {
6291       __ Bind(fixup_label);
6292     }
6293     // Note that GC roots are not affected by heap poisoning, thus we
6294     // do not have to unpoison `root_reg` here.
6295   }
6296 }
6297 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,Location temp,bool needs_null_check)6298 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6299                                                                 Location ref,
6300                                                                 CpuRegister obj,
6301                                                                 uint32_t offset,
6302                                                                 Location temp,
6303                                                                 bool needs_null_check) {
6304   DCHECK(kEmitCompilerReadBarrier);
6305   DCHECK(kUseBakerReadBarrier);
6306 
6307   // /* HeapReference<Object> */ ref = *(obj + offset)
6308   Address src(obj, offset);
6309   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
6310 }
6311 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,Location temp,bool needs_null_check)6312 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6313                                                                 Location ref,
6314                                                                 CpuRegister obj,
6315                                                                 uint32_t data_offset,
6316                                                                 Location index,
6317                                                                 Location temp,
6318                                                                 bool needs_null_check) {
6319   DCHECK(kEmitCompilerReadBarrier);
6320   DCHECK(kUseBakerReadBarrier);
6321 
6322   // /* HeapReference<Object> */ ref =
6323   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6324   Address src = index.IsConstant() ?
6325       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
6326       Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
6327   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
6328 }
6329 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,Location temp,bool needs_null_check)6330 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6331                                                                     Location ref,
6332                                                                     CpuRegister obj,
6333                                                                     const Address& src,
6334                                                                     Location temp,
6335                                                                     bool needs_null_check) {
6336   DCHECK(kEmitCompilerReadBarrier);
6337   DCHECK(kUseBakerReadBarrier);
6338 
6339   // In slow path based read barriers, the read barrier call is
6340   // inserted after the original load. However, in fast path based
6341   // Baker's read barriers, we need to perform the load of
6342   // mirror::Object::monitor_ *before* the original reference load.
6343   // This load-load ordering is required by the read barrier.
6344   // The fast path/slow path (for Baker's algorithm) should look like:
6345   //
6346   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6347   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
6348   //   HeapReference<Object> ref = *src;  // Original reference load.
6349   //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
6350   //   if (is_gray) {
6351   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
6352   //   }
6353   //
6354   // Note: the original implementation in ReadBarrier::Barrier is
6355   // slightly more complex as:
6356   // - it implements the load-load fence using a data dependency on
6357   //   the high-bits of rb_state, which are expected to be all zeroes
6358   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
6359   //   here, which is a no-op thanks to the x86-64 memory model);
6360   // - it performs additional checks that we do not do here for
6361   //   performance reasons.
6362 
6363   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
6364   CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
6365   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
6366 
6367   // /* int32_t */ monitor = obj->monitor_
6368   __ movl(temp_reg, Address(obj, monitor_offset));
6369   if (needs_null_check) {
6370     MaybeRecordImplicitNullCheck(instruction);
6371   }
6372   // /* LockWord */ lock_word = LockWord(monitor)
6373   static_assert(sizeof(LockWord) == sizeof(int32_t),
6374                 "art::LockWord and int32_t have different sizes.");
6375   // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
6376   __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
6377   __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
6378   static_assert(
6379       LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
6380       "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
6381 
6382   // Load fence to prevent load-load reordering.
6383   // Note that this is a no-op, thanks to the x86-64 memory model.
6384   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6385 
6386   // The actual reference load.
6387   // /* HeapReference<Object> */ ref = *src
6388   __ movl(ref_reg, src);
6389 
6390   // Object* ref = ref_addr->AsMirrorPtr()
6391   __ MaybeUnpoisonHeapReference(ref_reg);
6392 
6393   // Slow path used to mark the object `ref` when it is gray.
6394   SlowPathCode* slow_path =
6395       new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
6396   AddSlowPath(slow_path);
6397 
6398   // if (rb_state == ReadBarrier::gray_ptr_)
6399   //   ref = ReadBarrier::Mark(ref);
6400   __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
6401   __ j(kEqual, slow_path->GetEntryLabel());
6402   __ Bind(slow_path->GetExitLabel());
6403 }
6404 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6405 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
6406                                                   Location out,
6407                                                   Location ref,
6408                                                   Location obj,
6409                                                   uint32_t offset,
6410                                                   Location index) {
6411   DCHECK(kEmitCompilerReadBarrier);
6412 
6413   // Insert a slow path based read barrier *after* the reference load.
6414   //
6415   // If heap poisoning is enabled, the unpoisoning of the loaded
6416   // reference will be carried out by the runtime within the slow
6417   // path.
6418   //
6419   // Note that `ref` currently does not get unpoisoned (when heap
6420   // poisoning is enabled), which is alright as the `ref` argument is
6421   // not used by the artReadBarrierSlow entry point.
6422   //
6423   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6424   SlowPathCode* slow_path = new (GetGraph()->GetArena())
6425       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
6426   AddSlowPath(slow_path);
6427 
6428   __ jmp(slow_path->GetEntryLabel());
6429   __ Bind(slow_path->GetExitLabel());
6430 }
6431 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6432 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6433                                                        Location out,
6434                                                        Location ref,
6435                                                        Location obj,
6436                                                        uint32_t offset,
6437                                                        Location index) {
6438   if (kEmitCompilerReadBarrier) {
6439     // Baker's read barriers shall be handled by the fast path
6440     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
6441     DCHECK(!kUseBakerReadBarrier);
6442     // If heap poisoning is enabled, unpoisoning will be taken care of
6443     // by the runtime within the slow path.
6444     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6445   } else if (kPoisonHeapReferences) {
6446     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
6447   }
6448 }
6449 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6450 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6451                                                          Location out,
6452                                                          Location root) {
6453   DCHECK(kEmitCompilerReadBarrier);
6454 
6455   // Insert a slow path based read barrier *after* the GC root load.
6456   //
6457   // Note that GC roots are not affected by heap poisoning, so we do
6458   // not need to do anything special for this here.
6459   SlowPathCode* slow_path =
6460       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
6461   AddSlowPath(slow_path);
6462 
6463   __ jmp(slow_path->GetEntryLabel());
6464   __ Bind(slow_path->GetExitLabel());
6465 }
6466 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6467 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6468   // Nothing to do, this should be removed during prepare for register allocator.
6469   LOG(FATAL) << "Unreachable";
6470 }
6471 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6472 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6473   // Nothing to do, this should be removed during prepare for register allocator.
6474   LOG(FATAL) << "Unreachable";
6475 }
6476 
6477 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6478 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6479   LocationSummary* locations =
6480       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6481   locations->SetInAt(0, Location::RequiresRegister());
6482   locations->AddTemp(Location::RequiresRegister());
6483   locations->AddTemp(Location::RequiresRegister());
6484 }
6485 
VisitPackedSwitch(HPackedSwitch * switch_instr)6486 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6487   int32_t lower_bound = switch_instr->GetStartValue();
6488   uint32_t num_entries = switch_instr->GetNumEntries();
6489   LocationSummary* locations = switch_instr->GetLocations();
6490   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
6491   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
6492   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
6493   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6494 
6495   // Should we generate smaller inline compare/jumps?
6496   if (num_entries <= kPackedSwitchJumpTableThreshold) {
6497     // Figure out the correct compare values and jump conditions.
6498     // Handle the first compare/branch as a special case because it might
6499     // jump to the default case.
6500     DCHECK_GT(num_entries, 2u);
6501     Condition first_condition;
6502     uint32_t index;
6503     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6504     if (lower_bound != 0) {
6505       first_condition = kLess;
6506       __ cmpl(value_reg_in, Immediate(lower_bound));
6507       __ j(first_condition, codegen_->GetLabelOf(default_block));
6508       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
6509 
6510       index = 1;
6511     } else {
6512       // Handle all the compare/jumps below.
6513       first_condition = kBelow;
6514       index = 0;
6515     }
6516 
6517     // Handle the rest of the compare/jumps.
6518     for (; index + 1 < num_entries; index += 2) {
6519       int32_t compare_to_value = lower_bound + index + 1;
6520       __ cmpl(value_reg_in, Immediate(compare_to_value));
6521       // Jump to successors[index] if value < case_value[index].
6522       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
6523       // Jump to successors[index + 1] if value == case_value[index + 1].
6524       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
6525     }
6526 
6527     if (index != num_entries) {
6528       // There are an odd number of entries. Handle the last one.
6529       DCHECK_EQ(index + 1, num_entries);
6530       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
6531       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
6532     }
6533 
6534     // And the default for any other value.
6535     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6536       __ jmp(codegen_->GetLabelOf(default_block));
6537     }
6538     return;
6539   }
6540 
6541   // Remove the bias, if needed.
6542   Register value_reg_out = value_reg_in.AsRegister();
6543   if (lower_bound != 0) {
6544     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
6545     value_reg_out = temp_reg.AsRegister();
6546   }
6547   CpuRegister value_reg(value_reg_out);
6548 
6549   // Is the value in range?
6550   __ cmpl(value_reg, Immediate(num_entries - 1));
6551   __ j(kAbove, codegen_->GetLabelOf(default_block));
6552 
6553   // We are in the range of the table.
6554   // Load the address of the jump table in the constant area.
6555   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
6556 
6557   // Load the (signed) offset from the jump table.
6558   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
6559 
6560   // Add the offset to the address of the table base.
6561   __ addq(temp_reg, base_reg);
6562 
6563   // And jump.
6564   __ jmp(temp_reg);
6565 }
6566 
Load32BitValue(CpuRegister dest,int32_t value)6567 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
6568   if (value == 0) {
6569     __ xorl(dest, dest);
6570   } else {
6571     __ movl(dest, Immediate(value));
6572   }
6573 }
6574 
Load64BitValue(CpuRegister dest,int64_t value)6575 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
6576   if (value == 0) {
6577     // Clears upper bits too.
6578     __ xorl(dest, dest);
6579   } else if (IsUint<32>(value)) {
6580     // We can use a 32 bit move, as it will zero-extend and is shorter.
6581     __ movl(dest, Immediate(static_cast<int32_t>(value)));
6582   } else {
6583     __ movq(dest, Immediate(value));
6584   }
6585 }
6586 
Load32BitValue(XmmRegister dest,int32_t value)6587 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
6588   if (value == 0) {
6589     __ xorps(dest, dest);
6590   } else {
6591     __ movss(dest, LiteralInt32Address(value));
6592   }
6593 }
6594 
Load64BitValue(XmmRegister dest,int64_t value)6595 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
6596   if (value == 0) {
6597     __ xorpd(dest, dest);
6598   } else {
6599     __ movsd(dest, LiteralInt64Address(value));
6600   }
6601 }
6602 
Load32BitValue(XmmRegister dest,float value)6603 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
6604   Load32BitValue(dest, bit_cast<int32_t, float>(value));
6605 }
6606 
Load64BitValue(XmmRegister dest,double value)6607 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
6608   Load64BitValue(dest, bit_cast<int64_t, double>(value));
6609 }
6610 
Compare32BitValue(CpuRegister dest,int32_t value)6611 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
6612   if (value == 0) {
6613     __ testl(dest, dest);
6614   } else {
6615     __ cmpl(dest, Immediate(value));
6616   }
6617 }
6618 
Compare64BitValue(CpuRegister dest,int64_t value)6619 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
6620   if (IsInt<32>(value)) {
6621     if (value == 0) {
6622       __ testq(dest, dest);
6623     } else {
6624       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
6625     }
6626   } else {
6627     // Value won't fit in an int.
6628     __ cmpq(dest, LiteralInt64Address(value));
6629   }
6630 }
6631 
Store64BitValueToStack(Location dest,int64_t value)6632 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
6633   DCHECK(dest.IsDoubleStackSlot());
6634   if (IsInt<32>(value)) {
6635     // Can move directly as an int32 constant.
6636     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
6637             Immediate(static_cast<int32_t>(value)));
6638   } else {
6639     Load64BitValue(CpuRegister(TMP), value);
6640     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
6641   }
6642 }
6643 
6644 /**
6645  * Class to handle late fixup of offsets into constant area.
6646  */
6647 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
6648  public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)6649   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
6650       : codegen_(&codegen), offset_into_constant_area_(offset) {}
6651 
6652  protected:
SetOffset(size_t offset)6653   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
6654 
6655   CodeGeneratorX86_64* codegen_;
6656 
6657  private:
Process(const MemoryRegion & region,int pos)6658   void Process(const MemoryRegion& region, int pos) OVERRIDE {
6659     // Patch the correct offset for the instruction.  We use the address of the
6660     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
6661     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
6662     int32_t relative_position = constant_offset - pos;
6663 
6664     // Patch in the right value.
6665     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
6666   }
6667 
6668   // Location in constant area that the fixup refers to.
6669   size_t offset_into_constant_area_;
6670 };
6671 
6672 /**
6673  t * Class to handle late fixup of offsets to a jump table that will be created in the
6674  * constant area.
6675  */
6676 class JumpTableRIPFixup : public RIPFixup {
6677  public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)6678   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
6679       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
6680 
CreateJumpTable()6681   void CreateJumpTable() {
6682     X86_64Assembler* assembler = codegen_->GetAssembler();
6683 
6684     // Ensure that the reference to the jump table has the correct offset.
6685     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
6686     SetOffset(offset_in_constant_table);
6687 
6688     // Compute the offset from the start of the function to this jump table.
6689     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
6690 
6691     // Populate the jump table with the correct values for the jump table.
6692     int32_t num_entries = switch_instr_->GetNumEntries();
6693     HBasicBlock* block = switch_instr_->GetBlock();
6694     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
6695     // The value that we want is the target offset - the position of the table.
6696     for (int32_t i = 0; i < num_entries; i++) {
6697       HBasicBlock* b = successors[i];
6698       Label* l = codegen_->GetLabelOf(b);
6699       DCHECK(l->IsBound());
6700       int32_t offset_to_block = l->Position() - current_table_offset;
6701       assembler->AppendInt32(offset_to_block);
6702     }
6703   }
6704 
6705  private:
6706   const HPackedSwitch* switch_instr_;
6707 };
6708 
Finalize(CodeAllocator * allocator)6709 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
6710   // Generate the constant area if needed.
6711   X86_64Assembler* assembler = GetAssembler();
6712   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
6713     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
6714     assembler->Align(4, 0);
6715     constant_area_start_ = assembler->CodeSize();
6716 
6717     // Populate any jump tables.
6718     for (auto jump_table : fixups_to_jump_tables_) {
6719       jump_table->CreateJumpTable();
6720     }
6721 
6722     // And now add the constant area to the generated code.
6723     assembler->AddConstantArea();
6724   }
6725 
6726   // And finish up.
6727   CodeGenerator::Finalize(allocator);
6728 }
6729 
LiteralDoubleAddress(double v)6730 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
6731   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
6732   return Address::RIP(fixup);
6733 }
6734 
LiteralFloatAddress(float v)6735 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
6736   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
6737   return Address::RIP(fixup);
6738 }
6739 
LiteralInt32Address(int32_t v)6740 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
6741   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
6742   return Address::RIP(fixup);
6743 }
6744 
LiteralInt64Address(int64_t v)6745 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
6746   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
6747   return Address::RIP(fixup);
6748 }
6749 
6750 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,Primitive::Type type)6751 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
6752   if (!trg.IsValid()) {
6753     DCHECK_EQ(type, Primitive::kPrimVoid);
6754     return;
6755   }
6756 
6757   DCHECK_NE(type, Primitive::kPrimVoid);
6758 
6759   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
6760   if (trg.Equals(return_loc)) {
6761     return;
6762   }
6763 
6764   // Let the parallel move resolver take care of all of this.
6765   HParallelMove parallel_move(GetGraph()->GetArena());
6766   parallel_move.AddMove(return_loc, trg, type, nullptr);
6767   GetMoveResolver()->EmitNativeCode(&parallel_move);
6768 }
6769 
LiteralCaseTable(HPackedSwitch * switch_instr)6770 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
6771   // Create a fixup to be used to create and address the jump table.
6772   JumpTableRIPFixup* table_fixup =
6773       new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
6774 
6775   // We have to populate the jump tables.
6776   fixups_to_jump_tables_.push_back(table_fixup);
6777   return Address::RIP(table_fixup);
6778 }
6779 
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)6780 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
6781                                              const Address& addr_high,
6782                                              int64_t v,
6783                                              HInstruction* instruction) {
6784   if (IsInt<32>(v)) {
6785     int32_t v_32 = v;
6786     __ movq(addr_low, Immediate(v_32));
6787     MaybeRecordImplicitNullCheck(instruction);
6788   } else {
6789     // Didn't fit in a register.  Do it in pieces.
6790     int32_t low_v = Low32Bits(v);
6791     int32_t high_v = High32Bits(v);
6792     __ movl(addr_low, Immediate(low_v));
6793     MaybeRecordImplicitNullCheck(instruction);
6794     __ movl(addr_high, Immediate(high_v));
6795   }
6796 }
6797 
6798 #undef __
6799 
6800 }  // namespace x86_64
6801 }  // namespace art
6802