1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "arch/x86_64/jni_frame_x86_64.h"
20 #include "art_method-inl.h"
21 #include "class_root-inl.h"
22 #include "class_table.h"
23 #include "code_generator_utils.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_utils.h"
31 #include "intrinsics_x86_64.h"
32 #include "jit/profiling_info.h"
33 #include "linker/linker_patch.h"
34 #include "lock_word.h"
35 #include "mirror/array-inl.h"
36 #include "mirror/class-inl.h"
37 #include "mirror/object_reference.h"
38 #include "mirror/var_handle.h"
39 #include "optimizing/nodes.h"
40 #include "scoped_thread_state_change-inl.h"
41 #include "thread.h"
42 #include "utils/assembler.h"
43 #include "utils/stack_checks.h"
44 #include "utils/x86_64/assembler_x86_64.h"
45 #include "utils/x86_64/constants_x86_64.h"
46 #include "utils/x86_64/managed_register_x86_64.h"
47
48 namespace art HIDDEN {
49
50 template<class MirrorType>
51 class GcRoot;
52
53 namespace x86_64 {
54
55 static constexpr int kCurrentMethodStackOffset = 0;
56 static constexpr Register kMethodRegisterArgument = RDI;
57 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
58 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
59 // generates less code/data with a small num_entries.
60 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
61
62 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
63 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
64
65 static constexpr int kC2ConditionMask = 0x400;
66
OneRegInReferenceOutSaveEverythingCallerSaves()67 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
68 // Custom calling convention: RAX serves as both input and output.
69 RegisterSet caller_saves = RegisterSet::Empty();
70 caller_saves.Add(Location::RegisterLocation(RAX));
71 return caller_saves;
72 }
73
74 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
75 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
76 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
77
78 class NullCheckSlowPathX86_64 : public SlowPathCode {
79 public:
NullCheckSlowPathX86_64(HNullCheck * instruction)80 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
81
EmitNativeCode(CodeGenerator * codegen)82 void EmitNativeCode(CodeGenerator* codegen) override {
83 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
84 __ Bind(GetEntryLabel());
85 if (instruction_->CanThrowIntoCatchBlock()) {
86 // Live registers will be restored in the catch block if caught.
87 SaveLiveRegisters(codegen, instruction_->GetLocations());
88 }
89 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
90 instruction_,
91 instruction_->GetDexPc(),
92 this);
93 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
94 }
95
IsFatal() const96 bool IsFatal() const override { return true; }
97
GetDescription() const98 const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
99
100 private:
101 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
102 };
103
104 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
105 public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)106 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
107
EmitNativeCode(CodeGenerator * codegen)108 void EmitNativeCode(CodeGenerator* codegen) override {
109 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
110 __ Bind(GetEntryLabel());
111 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
112 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
113 }
114
IsFatal() const115 bool IsFatal() const override { return true; }
116
GetDescription() const117 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
118
119 private:
120 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
121 };
122
123 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
124 public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)125 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
126 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
127
EmitNativeCode(CodeGenerator * codegen)128 void EmitNativeCode(CodeGenerator* codegen) override {
129 __ Bind(GetEntryLabel());
130 if (type_ == DataType::Type::kInt32) {
131 if (is_div_) {
132 __ negl(cpu_reg_);
133 } else {
134 __ xorl(cpu_reg_, cpu_reg_);
135 }
136
137 } else {
138 DCHECK_EQ(DataType::Type::kInt64, type_);
139 if (is_div_) {
140 __ negq(cpu_reg_);
141 } else {
142 __ xorl(cpu_reg_, cpu_reg_);
143 }
144 }
145 __ jmp(GetExitLabel());
146 }
147
GetDescription() const148 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
149
150 private:
151 const CpuRegister cpu_reg_;
152 const DataType::Type type_;
153 const bool is_div_;
154 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
155 };
156
157 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
158 public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)159 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
160 : SlowPathCode(instruction), successor_(successor) {}
161
EmitNativeCode(CodeGenerator * codegen)162 void EmitNativeCode(CodeGenerator* codegen) override {
163 LocationSummary* locations = instruction_->GetLocations();
164 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
165 __ Bind(GetEntryLabel());
166 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
167 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
168 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
169 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
170 if (successor_ == nullptr) {
171 __ jmp(GetReturnLabel());
172 } else {
173 __ jmp(x86_64_codegen->GetLabelOf(successor_));
174 }
175 }
176
GetReturnLabel()177 Label* GetReturnLabel() {
178 DCHECK(successor_ == nullptr);
179 return &return_label_;
180 }
181
GetSuccessor() const182 HBasicBlock* GetSuccessor() const {
183 return successor_;
184 }
185
GetDescription() const186 const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
187
188 private:
189 HBasicBlock* const successor_;
190 Label return_label_;
191
192 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
193 };
194
195 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
196 public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)197 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
198 : SlowPathCode(instruction) {}
199
EmitNativeCode(CodeGenerator * codegen)200 void EmitNativeCode(CodeGenerator* codegen) override {
201 LocationSummary* locations = instruction_->GetLocations();
202 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
203 __ Bind(GetEntryLabel());
204 if (instruction_->CanThrowIntoCatchBlock()) {
205 // Live registers will be restored in the catch block if caught.
206 SaveLiveRegisters(codegen, locations);
207 }
208
209 Location index_loc = locations->InAt(0);
210 Location length_loc = locations->InAt(1);
211 InvokeRuntimeCallingConvention calling_convention;
212 Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
213 Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
214
215 // Are we using an array length from memory?
216 if (!length_loc.IsValid()) {
217 DCHECK(instruction_->InputAt(1)->IsArrayLength());
218 HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
219 DCHECK(array_length->IsEmittedAtUseSite());
220 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
221 Location array_loc = array_length->GetLocations()->InAt(0);
222 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
223 if (!index_loc.Equals(length_arg)) {
224 // The index is not clobbered by loading the length directly to `length_arg`.
225 __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
226 x86_64_codegen->Move(index_arg, index_loc);
227 } else if (!array_loc.Equals(index_arg)) {
228 // The array reference is not clobbered by the index move.
229 x86_64_codegen->Move(index_arg, index_loc);
230 __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
231 } else {
232 // Load the array length into `TMP`.
233 DCHECK(codegen->IsBlockedCoreRegister(TMP));
234 __ movl(CpuRegister(TMP), array_len);
235 // Single move to CPU register does not clobber `TMP`.
236 x86_64_codegen->Move(index_arg, index_loc);
237 __ movl(length_arg.AsRegister<CpuRegister>(), CpuRegister(TMP));
238 }
239 if (mirror::kUseStringCompression && array_length->IsStringLength()) {
240 __ shrl(length_arg.AsRegister<CpuRegister>(), Immediate(1));
241 }
242 } else {
243 // We're moving two locations to locations that could overlap,
244 // so we need a parallel move resolver.
245 codegen->EmitParallelMoves(
246 index_loc,
247 index_arg,
248 DataType::Type::kInt32,
249 length_loc,
250 length_arg,
251 DataType::Type::kInt32);
252 }
253
254 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
255 ? kQuickThrowStringBounds
256 : kQuickThrowArrayBounds;
257 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
258 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
259 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
260 }
261
IsFatal() const262 bool IsFatal() const override { return true; }
263
GetDescription() const264 const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
265
266 private:
267 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
268 };
269
270 class LoadClassSlowPathX86_64 : public SlowPathCode {
271 public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)272 LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
273 : SlowPathCode(at), cls_(cls) {
274 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
275 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
276 }
277
EmitNativeCode(CodeGenerator * codegen)278 void EmitNativeCode(CodeGenerator* codegen) override {
279 LocationSummary* locations = instruction_->GetLocations();
280 Location out = locations->Out();
281 const uint32_t dex_pc = instruction_->GetDexPc();
282 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
283 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
284
285 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
286 __ Bind(GetEntryLabel());
287 SaveLiveRegisters(codegen, locations);
288
289 // Custom calling convention: RAX serves as both input and output.
290 if (must_resolve_type) {
291 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()) ||
292 x86_64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
293 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
294 &cls_->GetDexFile()));
295 dex::TypeIndex type_index = cls_->GetTypeIndex();
296 __ movl(CpuRegister(RAX), Immediate(type_index.index_));
297 if (cls_->NeedsAccessCheck()) {
298 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
299 x86_64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
300 } else {
301 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
302 x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
303 }
304 // If we also must_do_clinit, the resolved type is now in the correct register.
305 } else {
306 DCHECK(must_do_clinit);
307 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
308 x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
309 }
310 if (must_do_clinit) {
311 x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
312 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
313 }
314
315 // Move the class to the desired location.
316 if (out.IsValid()) {
317 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
318 x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
319 }
320
321 RestoreLiveRegisters(codegen, locations);
322 __ jmp(GetExitLabel());
323 }
324
GetDescription() const325 const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
326
327 private:
328 // The class this slow path will load.
329 HLoadClass* const cls_;
330
331 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
332 };
333
334 class LoadStringSlowPathX86_64 : public SlowPathCode {
335 public:
LoadStringSlowPathX86_64(HLoadString * instruction)336 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
337
EmitNativeCode(CodeGenerator * codegen)338 void EmitNativeCode(CodeGenerator* codegen) override {
339 LocationSummary* locations = instruction_->GetLocations();
340 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
341
342 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
343 __ Bind(GetEntryLabel());
344 SaveLiveRegisters(codegen, locations);
345
346 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
347 // Custom calling convention: RAX serves as both input and output.
348 __ movl(CpuRegister(RAX), Immediate(string_index.index_));
349 x86_64_codegen->InvokeRuntime(kQuickResolveString,
350 instruction_,
351 instruction_->GetDexPc(),
352 this);
353 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
354 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
355 RestoreLiveRegisters(codegen, locations);
356
357 __ jmp(GetExitLabel());
358 }
359
GetDescription() const360 const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
361
362 private:
363 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
364 };
365
366 class TypeCheckSlowPathX86_64 : public SlowPathCode {
367 public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)368 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
369 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
370
EmitNativeCode(CodeGenerator * codegen)371 void EmitNativeCode(CodeGenerator* codegen) override {
372 LocationSummary* locations = instruction_->GetLocations();
373 uint32_t dex_pc = instruction_->GetDexPc();
374 DCHECK(instruction_->IsCheckCast()
375 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
376
377 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
378 __ Bind(GetEntryLabel());
379
380 if (kPoisonHeapReferences &&
381 instruction_->IsCheckCast() &&
382 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
383 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
384 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
385 }
386
387 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
388 SaveLiveRegisters(codegen, locations);
389 }
390
391 // We're moving two locations to locations that could overlap, so we need a parallel
392 // move resolver.
393 InvokeRuntimeCallingConvention calling_convention;
394 codegen->EmitParallelMoves(locations->InAt(0),
395 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
396 DataType::Type::kReference,
397 locations->InAt(1),
398 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
399 DataType::Type::kReference);
400 if (instruction_->IsInstanceOf()) {
401 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
402 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
403 } else {
404 DCHECK(instruction_->IsCheckCast());
405 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
406 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
407 }
408
409 if (!is_fatal_) {
410 if (instruction_->IsInstanceOf()) {
411 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
412 }
413
414 RestoreLiveRegisters(codegen, locations);
415 __ jmp(GetExitLabel());
416 }
417 }
418
GetDescription() const419 const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
420
IsFatal() const421 bool IsFatal() const override { return is_fatal_; }
422
423 private:
424 const bool is_fatal_;
425
426 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
427 };
428
429 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
430 public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)431 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
432 : SlowPathCode(instruction) {}
433
EmitNativeCode(CodeGenerator * codegen)434 void EmitNativeCode(CodeGenerator* codegen) override {
435 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
436 __ Bind(GetEntryLabel());
437 LocationSummary* locations = instruction_->GetLocations();
438 SaveLiveRegisters(codegen, locations);
439 InvokeRuntimeCallingConvention calling_convention;
440 x86_64_codegen->Load32BitValue(
441 CpuRegister(calling_convention.GetRegisterAt(0)),
442 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
443 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
444 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
445 }
446
GetDescription() const447 const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
448
449 private:
450 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
451 };
452
453 class ArraySetSlowPathX86_64 : public SlowPathCode {
454 public:
ArraySetSlowPathX86_64(HInstruction * instruction)455 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
456
EmitNativeCode(CodeGenerator * codegen)457 void EmitNativeCode(CodeGenerator* codegen) override {
458 LocationSummary* locations = instruction_->GetLocations();
459 __ Bind(GetEntryLabel());
460 SaveLiveRegisters(codegen, locations);
461
462 InvokeRuntimeCallingConvention calling_convention;
463 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
464 parallel_move.AddMove(
465 locations->InAt(0),
466 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
467 DataType::Type::kReference,
468 nullptr);
469 parallel_move.AddMove(
470 locations->InAt(1),
471 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
472 DataType::Type::kInt32,
473 nullptr);
474 parallel_move.AddMove(
475 locations->InAt(2),
476 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
477 DataType::Type::kReference,
478 nullptr);
479 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
480
481 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
482 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
483 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
484 RestoreLiveRegisters(codegen, locations);
485 __ jmp(GetExitLabel());
486 }
487
GetDescription() const488 const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
489
490 private:
491 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
492 };
493
494 // Slow path marking an object reference `ref` during a read
495 // barrier. The field `obj.field` in the object `obj` holding this
496 // reference does not get updated by this slow path after marking (see
497 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
498 //
499 // This means that after the execution of this slow path, `ref` will
500 // always be up-to-date, but `obj.field` may not; i.e., after the
501 // flip, `ref` will be a to-space reference, but `obj.field` will
502 // probably still be a from-space reference (unless it gets updated by
503 // another thread, or if another thread installed another object
504 // reference (different from `ref`) in `obj.field`).
505 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
506 public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)507 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
508 Location ref,
509 bool unpoison_ref_before_marking)
510 : SlowPathCode(instruction),
511 ref_(ref),
512 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
513 DCHECK(gUseReadBarrier);
514 }
515
GetDescription() const516 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
517
EmitNativeCode(CodeGenerator * codegen)518 void EmitNativeCode(CodeGenerator* codegen) override {
519 LocationSummary* locations = instruction_->GetLocations();
520 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
521 Register ref_reg = ref_cpu_reg.AsRegister();
522 DCHECK(locations->CanCall());
523 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
524 DCHECK(instruction_->IsInstanceFieldGet() ||
525 instruction_->IsPredicatedInstanceFieldGet() ||
526 instruction_->IsStaticFieldGet() ||
527 instruction_->IsArrayGet() ||
528 instruction_->IsArraySet() ||
529 instruction_->IsLoadClass() ||
530 instruction_->IsLoadString() ||
531 instruction_->IsInstanceOf() ||
532 instruction_->IsCheckCast() ||
533 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
534 << "Unexpected instruction in read barrier marking slow path: "
535 << instruction_->DebugName();
536
537 __ Bind(GetEntryLabel());
538 if (unpoison_ref_before_marking_) {
539 // Object* ref = ref_addr->AsMirrorPtr()
540 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
541 }
542 // No need to save live registers; it's taken care of by the
543 // entrypoint. Also, there is no need to update the stack mask,
544 // as this runtime call will not trigger a garbage collection.
545 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
546 DCHECK_NE(ref_reg, RSP);
547 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
548 // "Compact" slow path, saving two moves.
549 //
550 // Instead of using the standard runtime calling convention (input
551 // and output in R0):
552 //
553 // RDI <- ref
554 // RAX <- ReadBarrierMark(RDI)
555 // ref <- RAX
556 //
557 // we just use rX (the register containing `ref`) as input and output
558 // of a dedicated entrypoint:
559 //
560 // rX <- ReadBarrierMarkRegX(rX)
561 //
562 int32_t entry_point_offset =
563 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
564 // This runtime call does not require a stack map.
565 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
566 __ jmp(GetExitLabel());
567 }
568
569 private:
570 // The location (register) of the marked object reference.
571 const Location ref_;
572 // Should the reference in `ref_` be unpoisoned prior to marking it?
573 const bool unpoison_ref_before_marking_;
574
575 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
576 };
577
578 // Slow path marking an object reference `ref` during a read barrier,
579 // and if needed, atomically updating the field `obj.field` in the
580 // object `obj` holding this reference after marking (contrary to
581 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
582 // `obj.field`).
583 //
584 // This means that after the execution of this slow path, both `ref`
585 // and `obj.field` will be up-to-date; i.e., after the flip, both will
586 // hold the same to-space reference (unless another thread installed
587 // another object reference (different from `ref`) in `obj.field`).
588 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
589 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)590 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
591 Location ref,
592 CpuRegister obj,
593 const Address& field_addr,
594 bool unpoison_ref_before_marking,
595 CpuRegister temp1,
596 CpuRegister temp2)
597 : SlowPathCode(instruction),
598 ref_(ref),
599 obj_(obj),
600 field_addr_(field_addr),
601 unpoison_ref_before_marking_(unpoison_ref_before_marking),
602 temp1_(temp1),
603 temp2_(temp2) {
604 DCHECK(gUseReadBarrier);
605 }
606
GetDescription() const607 const char* GetDescription() const override {
608 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
609 }
610
EmitNativeCode(CodeGenerator * codegen)611 void EmitNativeCode(CodeGenerator* codegen) override {
612 LocationSummary* locations = instruction_->GetLocations();
613 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
614 Register ref_reg = ref_cpu_reg.AsRegister();
615 DCHECK(locations->CanCall());
616 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
617 DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
618 << "Unexpected instruction in read barrier marking and field updating slow path: "
619 << instruction_->DebugName();
620 HInvoke* invoke = instruction_->AsInvoke();
621 DCHECK(IsUnsafeCASObject(invoke) || IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
622
623 __ Bind(GetEntryLabel());
624 if (unpoison_ref_before_marking_) {
625 // Object* ref = ref_addr->AsMirrorPtr()
626 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
627 }
628
629 // Save the old (unpoisoned) reference.
630 __ movl(temp1_, ref_cpu_reg);
631
632 // No need to save live registers; it's taken care of by the
633 // entrypoint. Also, there is no need to update the stack mask,
634 // as this runtime call will not trigger a garbage collection.
635 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
636 DCHECK_NE(ref_reg, RSP);
637 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
638 // "Compact" slow path, saving two moves.
639 //
640 // Instead of using the standard runtime calling convention (input
641 // and output in R0):
642 //
643 // RDI <- ref
644 // RAX <- ReadBarrierMark(RDI)
645 // ref <- RAX
646 //
647 // we just use rX (the register containing `ref`) as input and output
648 // of a dedicated entrypoint:
649 //
650 // rX <- ReadBarrierMarkRegX(rX)
651 //
652 int32_t entry_point_offset =
653 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
654 // This runtime call does not require a stack map.
655 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
656
657 // If the new reference is different from the old reference,
658 // update the field in the holder (`*field_addr`).
659 //
660 // Note that this field could also hold a different object, if
661 // another thread had concurrently changed it. In that case, the
662 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
663 // operation below would abort the CAS, leaving the field as-is.
664 NearLabel done;
665 __ cmpl(temp1_, ref_cpu_reg);
666 __ j(kEqual, &done);
667
668 // Update the the holder's field atomically. This may fail if
669 // mutator updates before us, but it's OK. This is achived
670 // using a strong compare-and-set (CAS) operation with relaxed
671 // memory synchronization ordering, where the expected value is
672 // the old reference and the desired value is the new reference.
673 // This operation is implemented with a 32-bit LOCK CMPXLCHG
674 // instruction, which requires the expected value (the old
675 // reference) to be in EAX. Save RAX beforehand, and move the
676 // expected value (stored in `temp1_`) into EAX.
677 __ movq(temp2_, CpuRegister(RAX));
678 __ movl(CpuRegister(RAX), temp1_);
679
680 // Convenience aliases.
681 CpuRegister base = obj_;
682 CpuRegister expected = CpuRegister(RAX);
683 CpuRegister value = ref_cpu_reg;
684
685 bool base_equals_value = (base.AsRegister() == value.AsRegister());
686 Register value_reg = ref_reg;
687 if (kPoisonHeapReferences) {
688 if (base_equals_value) {
689 // If `base` and `value` are the same register location, move
690 // `value_reg` to a temporary register. This way, poisoning
691 // `value_reg` won't invalidate `base`.
692 value_reg = temp1_.AsRegister();
693 __ movl(CpuRegister(value_reg), base);
694 }
695
696 // Check that the register allocator did not assign the location
697 // of `expected` (RAX) to `value` nor to `base`, so that heap
698 // poisoning (when enabled) works as intended below.
699 // - If `value` were equal to `expected`, both references would
700 // be poisoned twice, meaning they would not be poisoned at
701 // all, as heap poisoning uses address negation.
702 // - If `base` were equal to `expected`, poisoning `expected`
703 // would invalidate `base`.
704 DCHECK_NE(value_reg, expected.AsRegister());
705 DCHECK_NE(base.AsRegister(), expected.AsRegister());
706
707 __ PoisonHeapReference(expected);
708 __ PoisonHeapReference(CpuRegister(value_reg));
709 }
710
711 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
712
713 // If heap poisoning is enabled, we need to unpoison the values
714 // that were poisoned earlier.
715 if (kPoisonHeapReferences) {
716 if (base_equals_value) {
717 // `value_reg` has been moved to a temporary register, no need
718 // to unpoison it.
719 } else {
720 __ UnpoisonHeapReference(CpuRegister(value_reg));
721 }
722 // No need to unpoison `expected` (RAX), as it is be overwritten below.
723 }
724
725 // Restore RAX.
726 __ movq(CpuRegister(RAX), temp2_);
727
728 __ Bind(&done);
729 __ jmp(GetExitLabel());
730 }
731
732 private:
733 // The location (register) of the marked object reference.
734 const Location ref_;
735 // The register containing the object holding the marked object reference field.
736 const CpuRegister obj_;
737 // The address of the marked reference field. The base of this address must be `obj_`.
738 const Address field_addr_;
739
740 // Should the reference in `ref_` be unpoisoned prior to marking it?
741 const bool unpoison_ref_before_marking_;
742
743 const CpuRegister temp1_;
744 const CpuRegister temp2_;
745
746 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
747 };
748
749 // Slow path generating a read barrier for a heap reference.
750 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
751 public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)752 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
753 Location out,
754 Location ref,
755 Location obj,
756 uint32_t offset,
757 Location index)
758 : SlowPathCode(instruction),
759 out_(out),
760 ref_(ref),
761 obj_(obj),
762 offset_(offset),
763 index_(index) {
764 DCHECK(gUseReadBarrier);
765 // If `obj` is equal to `out` or `ref`, it means the initial
766 // object has been overwritten by (or after) the heap object
767 // reference load to be instrumented, e.g.:
768 //
769 // __ movl(out, Address(out, offset));
770 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
771 //
772 // In that case, we have lost the information about the original
773 // object, and the emitted read barrier cannot work properly.
774 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
775 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
776 }
777
EmitNativeCode(CodeGenerator * codegen)778 void EmitNativeCode(CodeGenerator* codegen) override {
779 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
780 LocationSummary* locations = instruction_->GetLocations();
781 CpuRegister reg_out = out_.AsRegister<CpuRegister>();
782 DCHECK(locations->CanCall());
783 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
784 DCHECK(instruction_->IsInstanceFieldGet() ||
785 instruction_->IsPredicatedInstanceFieldGet() ||
786 instruction_->IsStaticFieldGet() ||
787 instruction_->IsArrayGet() ||
788 instruction_->IsInstanceOf() ||
789 instruction_->IsCheckCast() ||
790 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
791 << "Unexpected instruction in read barrier for heap reference slow path: "
792 << instruction_->DebugName();
793
794 __ Bind(GetEntryLabel());
795 SaveLiveRegisters(codegen, locations);
796
797 // We may have to change the index's value, but as `index_` is a
798 // constant member (like other "inputs" of this slow path),
799 // introduce a copy of it, `index`.
800 Location index = index_;
801 if (index_.IsValid()) {
802 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
803 if (instruction_->IsArrayGet()) {
804 // Compute real offset and store it in index_.
805 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
806 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
807 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
808 // We are about to change the value of `index_reg` (see the
809 // calls to art::x86_64::X86_64Assembler::shll and
810 // art::x86_64::X86_64Assembler::AddImmediate below), but it
811 // has not been saved by the previous call to
812 // art::SlowPathCode::SaveLiveRegisters, as it is a
813 // callee-save register --
814 // art::SlowPathCode::SaveLiveRegisters does not consider
815 // callee-save registers, as it has been designed with the
816 // assumption that callee-save registers are supposed to be
817 // handled by the called function. So, as a callee-save
818 // register, `index_reg` _would_ eventually be saved onto
819 // the stack, but it would be too late: we would have
820 // changed its value earlier. Therefore, we manually save
821 // it here into another freely available register,
822 // `free_reg`, chosen of course among the caller-save
823 // registers (as a callee-save `free_reg` register would
824 // exhibit the same problem).
825 //
826 // Note we could have requested a temporary register from
827 // the register allocator instead; but we prefer not to, as
828 // this is a slow path, and we know we can find a
829 // caller-save register that is available.
830 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
831 __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
832 index_reg = free_reg;
833 index = Location::RegisterLocation(index_reg);
834 } else {
835 // The initial register stored in `index_` has already been
836 // saved in the call to art::SlowPathCode::SaveLiveRegisters
837 // (as it is not a callee-save register), so we can freely
838 // use it.
839 }
840 // Shifting the index value contained in `index_reg` by the
841 // scale factor (2) cannot overflow in practice, as the
842 // runtime is unable to allocate object arrays with a size
843 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
844 __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
845 static_assert(
846 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
847 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
848 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
849 } else {
850 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
851 // intrinsics, `index_` is not shifted by a scale factor of 2
852 // (as in the case of ArrayGet), as it is actually an offset
853 // to an object field within an object.
854 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
855 DCHECK(instruction_->GetLocations()->Intrinsified());
856 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
857 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
858 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) ||
859 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) ||
860 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire))
861 << instruction_->AsInvoke()->GetIntrinsic();
862 DCHECK_EQ(offset_, 0U);
863 DCHECK(index_.IsRegister());
864 }
865 }
866
867 // We're moving two or three locations to locations that could
868 // overlap, so we need a parallel move resolver.
869 InvokeRuntimeCallingConvention calling_convention;
870 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
871 parallel_move.AddMove(ref_,
872 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
873 DataType::Type::kReference,
874 nullptr);
875 parallel_move.AddMove(obj_,
876 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
877 DataType::Type::kReference,
878 nullptr);
879 if (index.IsValid()) {
880 parallel_move.AddMove(index,
881 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
882 DataType::Type::kInt32,
883 nullptr);
884 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
885 } else {
886 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
887 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
888 }
889 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
890 instruction_,
891 instruction_->GetDexPc(),
892 this);
893 CheckEntrypointTypes<
894 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
895 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
896
897 RestoreLiveRegisters(codegen, locations);
898 __ jmp(GetExitLabel());
899 }
900
GetDescription() const901 const char* GetDescription() const override {
902 return "ReadBarrierForHeapReferenceSlowPathX86_64";
903 }
904
905 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)906 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
907 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
908 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
909 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
910 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
911 return static_cast<CpuRegister>(i);
912 }
913 }
914 // We shall never fail to find a free caller-save register, as
915 // there are more than two core caller-save registers on x86-64
916 // (meaning it is possible to find one which is different from
917 // `ref` and `obj`).
918 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
919 LOG(FATAL) << "Could not find a free caller-save register";
920 UNREACHABLE();
921 }
922
923 const Location out_;
924 const Location ref_;
925 const Location obj_;
926 const uint32_t offset_;
927 // An additional location containing an index to an array.
928 // Only used for HArrayGet and the UnsafeGetObject &
929 // UnsafeGetObjectVolatile intrinsics.
930 const Location index_;
931
932 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
933 };
934
935 // Slow path generating a read barrier for a GC root.
936 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
937 public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)938 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
939 : SlowPathCode(instruction), out_(out), root_(root) {
940 DCHECK(gUseReadBarrier);
941 }
942
EmitNativeCode(CodeGenerator * codegen)943 void EmitNativeCode(CodeGenerator* codegen) override {
944 LocationSummary* locations = instruction_->GetLocations();
945 DCHECK(locations->CanCall());
946 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
947 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
948 << "Unexpected instruction in read barrier for GC root slow path: "
949 << instruction_->DebugName();
950
951 __ Bind(GetEntryLabel());
952 SaveLiveRegisters(codegen, locations);
953
954 InvokeRuntimeCallingConvention calling_convention;
955 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
956 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
957 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
958 instruction_,
959 instruction_->GetDexPc(),
960 this);
961 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
962 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
963
964 RestoreLiveRegisters(codegen, locations);
965 __ jmp(GetExitLabel());
966 }
967
GetDescription() const968 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
969
970 private:
971 const Location out_;
972 const Location root_;
973
974 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
975 };
976
977 class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
978 public:
MethodEntryExitHooksSlowPathX86_64(HInstruction * instruction)979 explicit MethodEntryExitHooksSlowPathX86_64(HInstruction* instruction)
980 : SlowPathCode(instruction) {}
981
EmitNativeCode(CodeGenerator * codegen)982 void EmitNativeCode(CodeGenerator* codegen) override {
983 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
984 LocationSummary* locations = instruction_->GetLocations();
985 QuickEntrypointEnum entry_point =
986 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
987 __ Bind(GetEntryLabel());
988 SaveLiveRegisters(codegen, locations);
989 if (instruction_->IsMethodExitHook()) {
990 // Load FrameSize to pass to the exit hook.
991 __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize()));
992 }
993 x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
994 RestoreLiveRegisters(codegen, locations);
995 __ jmp(GetExitLabel());
996 }
997
GetDescription() const998 const char* GetDescription() const override {
999 return "MethodEntryExitHooksSlowPath";
1000 }
1001
1002 private:
1003 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64);
1004 };
1005
1006 class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
1007 public:
CompileOptimizedSlowPathX86_64()1008 CompileOptimizedSlowPathX86_64() : SlowPathCode(/* instruction= */ nullptr) {}
1009
EmitNativeCode(CodeGenerator * codegen)1010 void EmitNativeCode(CodeGenerator* codegen) override {
1011 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1012 __ Bind(GetEntryLabel());
1013 x86_64_codegen->GenerateInvokeRuntime(
1014 GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1015 __ jmp(GetExitLabel());
1016 }
1017
GetDescription() const1018 const char* GetDescription() const override {
1019 return "CompileOptimizedSlowPath";
1020 }
1021
1022 private:
1023 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86_64);
1024 };
1025
1026 #undef __
1027 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1028 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
1029
X86_64IntegerCondition(IfCondition cond)1030 inline Condition X86_64IntegerCondition(IfCondition cond) {
1031 switch (cond) {
1032 case kCondEQ: return kEqual;
1033 case kCondNE: return kNotEqual;
1034 case kCondLT: return kLess;
1035 case kCondLE: return kLessEqual;
1036 case kCondGT: return kGreater;
1037 case kCondGE: return kGreaterEqual;
1038 case kCondB: return kBelow;
1039 case kCondBE: return kBelowEqual;
1040 case kCondA: return kAbove;
1041 case kCondAE: return kAboveEqual;
1042 }
1043 LOG(FATAL) << "Unreachable";
1044 UNREACHABLE();
1045 }
1046
1047 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)1048 inline Condition X86_64FPCondition(IfCondition cond) {
1049 switch (cond) {
1050 case kCondEQ: return kEqual;
1051 case kCondNE: return kNotEqual;
1052 case kCondLT: return kBelow;
1053 case kCondLE: return kBelowEqual;
1054 case kCondGT: return kAbove;
1055 case kCondGE: return kAboveEqual;
1056 default: break; // should not happen
1057 }
1058 LOG(FATAL) << "Unreachable";
1059 UNREACHABLE();
1060 }
1061
BlockNonVolatileXmmRegisters(LocationSummary * locations)1062 void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
1063 // We have to ensure that the native code we call directly (such as @CriticalNative
1064 // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
1065 // which are non-volatile for ART, but volatile for Native calls. This will ensure
1066 // that they are saved in the prologue and properly restored.
1067 for (FloatRegister fp_reg : non_volatile_xmm_regs) {
1068 locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
1069 }
1070 }
1071
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)1072 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
1073 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
1074 ArtMethod* method ATTRIBUTE_UNUSED) {
1075 return desired_dispatch_info;
1076 }
1077
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)1078 void CodeGeneratorX86_64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
1079 switch (load_kind) {
1080 case MethodLoadKind::kBootImageLinkTimePcRelative:
1081 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1082 __ leal(temp.AsRegister<CpuRegister>(),
1083 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1084 RecordBootImageMethodPatch(invoke);
1085 break;
1086 case MethodLoadKind::kBootImageRelRo: {
1087 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1088 __ movl(temp.AsRegister<CpuRegister>(),
1089 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1090 RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1091 break;
1092 }
1093 case MethodLoadKind::kBssEntry: {
1094 __ movq(temp.AsRegister<CpuRegister>(),
1095 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1096 RecordMethodBssEntryPatch(invoke);
1097 // No need for memory fence, thanks to the x86-64 memory model.
1098 break;
1099 }
1100 case MethodLoadKind::kJitDirectAddress: {
1101 Load64BitValue(temp.AsRegister<CpuRegister>(),
1102 reinterpret_cast<int64_t>(invoke->GetResolvedMethod()));
1103 break;
1104 }
1105 case MethodLoadKind::kRuntimeCall: {
1106 // Test situation, don't do anything.
1107 break;
1108 }
1109 default: {
1110 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
1111 UNREACHABLE();
1112 }
1113 }
1114 }
1115
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)1116 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
1117 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
1118 // All registers are assumed to be correctly set up.
1119
1120 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
1121 switch (invoke->GetMethodLoadKind()) {
1122 case MethodLoadKind::kStringInit: {
1123 // temp = thread->string_init_entrypoint
1124 uint32_t offset =
1125 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
1126 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
1127 break;
1128 }
1129 case MethodLoadKind::kRecursive: {
1130 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
1131 break;
1132 }
1133 case MethodLoadKind::kRuntimeCall: {
1134 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1135 return; // No code pointer retrieval; the runtime performs the call directly.
1136 }
1137 case MethodLoadKind::kBootImageLinkTimePcRelative:
1138 // For kCallCriticalNative we skip loading the method and do the call directly.
1139 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
1140 break;
1141 }
1142 FALLTHROUGH_INTENDED;
1143 default: {
1144 LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
1145 break;
1146 }
1147 }
1148
1149 switch (invoke->GetCodePtrLocation()) {
1150 case CodePtrLocation::kCallSelf:
1151 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
1152 __ call(&frame_entry_label_);
1153 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1154 break;
1155 case CodePtrLocation::kCallCriticalNative: {
1156 size_t out_frame_size =
1157 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
1158 kNativeStackAlignment,
1159 GetCriticalNativeDirectCallFrameSize>(invoke);
1160 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
1161 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1162 __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1163 RecordBootImageJniEntrypointPatch(invoke);
1164 } else {
1165 // (callee_method + offset_of_jni_entry_point)()
1166 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1167 ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
1168 }
1169 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1170 // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
1171 switch (invoke->GetType()) {
1172 case DataType::Type::kBool:
1173 __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
1174 break;
1175 case DataType::Type::kInt8:
1176 __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
1177 break;
1178 case DataType::Type::kUint16:
1179 __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
1180 break;
1181 case DataType::Type::kInt16:
1182 __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
1183 break;
1184 case DataType::Type::kInt32:
1185 case DataType::Type::kInt64:
1186 case DataType::Type::kFloat32:
1187 case DataType::Type::kFloat64:
1188 case DataType::Type::kVoid:
1189 break;
1190 default:
1191 DCHECK(false) << invoke->GetType();
1192 break;
1193 }
1194 if (out_frame_size != 0u) {
1195 DecreaseFrame(out_frame_size);
1196 }
1197 break;
1198 }
1199 case CodePtrLocation::kCallArtMethod:
1200 // (callee_method + offset_of_quick_compiled_code)()
1201 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1202 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1203 kX86_64PointerSize).SizeValue()));
1204 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1205 break;
1206 }
1207
1208 DCHECK(!IsLeafMethod());
1209 }
1210
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1211 void CodeGeneratorX86_64::GenerateVirtualCall(
1212 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1213 CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1214 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1215 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1216
1217 // Use the calling convention instead of the location of the receiver, as
1218 // intrinsics may have put the receiver in a different register. In the intrinsics
1219 // slow path, the arguments have been moved to the right place, so here we are
1220 // guaranteed that the receiver is the first register of the calling convention.
1221 InvokeDexCallingConvention calling_convention;
1222 Register receiver = calling_convention.GetRegisterAt(0);
1223
1224 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1225 // /* HeapReference<Class> */ temp = receiver->klass_
1226 __ movl(temp, Address(CpuRegister(receiver), class_offset));
1227 MaybeRecordImplicitNullCheck(invoke);
1228 // Instead of simply (possibly) unpoisoning `temp` here, we should
1229 // emit a read barrier for the previous class reference load.
1230 // However this is not required in practice, as this is an
1231 // intermediate/temporary reference and because the current
1232 // concurrent copying collector keeps the from-space memory
1233 // intact/accessible until the end of the marking phase (the
1234 // concurrent copying collector may not in the future).
1235 __ MaybeUnpoisonHeapReference(temp);
1236
1237 MaybeGenerateInlineCacheCheck(invoke, temp);
1238
1239 // temp = temp->GetMethodAt(method_offset);
1240 __ movq(temp, Address(temp, method_offset));
1241 // call temp->GetEntryPoint();
1242 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1243 kX86_64PointerSize).SizeValue()));
1244 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1245 }
1246
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1247 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1248 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1249 __ Bind(&boot_image_other_patches_.back().label);
1250 }
1251
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1252 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1253 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1254 __ Bind(&boot_image_other_patches_.back().label);
1255 }
1256
RecordBootImageMethodPatch(HInvoke * invoke)1257 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvoke* invoke) {
1258 boot_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1259 invoke->GetResolvedMethodReference().index);
1260 __ Bind(&boot_image_method_patches_.back().label);
1261 }
1262
RecordMethodBssEntryPatch(HInvoke * invoke)1263 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvoke* invoke) {
1264 DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
1265 GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
1266 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
1267 invoke->GetMethodReference().dex_file));
1268 method_bss_entry_patches_.emplace_back(invoke->GetMethodReference().dex_file,
1269 invoke->GetMethodReference().index);
1270 __ Bind(&method_bss_entry_patches_.back().label);
1271 }
1272
RecordBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1273 void CodeGeneratorX86_64::RecordBootImageTypePatch(const DexFile& dex_file,
1274 dex::TypeIndex type_index) {
1275 boot_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1276 __ Bind(&boot_image_type_patches_.back().label);
1277 }
1278
NewTypeBssEntryPatch(HLoadClass * load_class)1279 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1280 ArenaDeque<PatchInfo<Label>>* patches = nullptr;
1281 switch (load_class->GetLoadKind()) {
1282 case HLoadClass::LoadKind::kBssEntry:
1283 patches = &type_bss_entry_patches_;
1284 break;
1285 case HLoadClass::LoadKind::kBssEntryPublic:
1286 patches = &public_type_bss_entry_patches_;
1287 break;
1288 case HLoadClass::LoadKind::kBssEntryPackage:
1289 patches = &package_type_bss_entry_patches_;
1290 break;
1291 default:
1292 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
1293 UNREACHABLE();
1294 }
1295 patches->emplace_back(&load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1296 return &patches->back().label;
1297 }
1298
RecordBootImageStringPatch(HLoadString * load_string)1299 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1300 boot_image_string_patches_.emplace_back(
1301 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1302 __ Bind(&boot_image_string_patches_.back().label);
1303 }
1304
NewStringBssEntryPatch(HLoadString * load_string)1305 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1306 string_bss_entry_patches_.emplace_back(
1307 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1308 return &string_bss_entry_patches_.back().label;
1309 }
1310
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)1311 void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
1312 boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1313 invoke->GetResolvedMethodReference().index);
1314 __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
1315 }
1316
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1317 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1318 if (GetCompilerOptions().IsBootImage()) {
1319 __ leal(reg,
1320 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1321 RecordBootImageIntrinsicPatch(boot_image_reference);
1322 } else if (GetCompilerOptions().GetCompilePic()) {
1323 __ movl(reg,
1324 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1325 RecordBootImageRelRoPatch(boot_image_reference);
1326 } else {
1327 DCHECK(GetCompilerOptions().IsJitCompiler());
1328 gc::Heap* heap = Runtime::Current()->GetHeap();
1329 DCHECK(!heap->GetBootImageSpaces().empty());
1330 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1331 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1332 }
1333 }
1334
LoadIntrinsicDeclaringClass(CpuRegister reg,HInvoke * invoke)1335 void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) {
1336 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
1337 if (GetCompilerOptions().IsBootImage()) {
1338 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1339 __ leal(reg,
1340 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1341 MethodReference target_method = invoke->GetResolvedMethodReference();
1342 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1343 boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1344 __ Bind(&boot_image_type_patches_.back().label);
1345 } else {
1346 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
1347 LoadBootImageAddress(reg, boot_image_offset);
1348 }
1349 }
1350
LoadClassRootForIntrinsic(CpuRegister reg,ClassRoot class_root)1351 void CodeGeneratorX86_64::LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root) {
1352 if (GetCompilerOptions().IsBootImage()) {
1353 ScopedObjectAccess soa(Thread::Current());
1354 ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
1355 boot_image_type_patches_.emplace_back(&klass->GetDexFile(), klass->GetDexTypeIndex().index_);
1356 __ Bind(&boot_image_type_patches_.back().label);
1357 } else {
1358 uint32_t boot_image_offset = GetBootImageOffset(class_root);
1359 LoadBootImageAddress(reg, boot_image_offset);
1360 }
1361 }
1362
1363 // The label points to the end of the "movl" or another instruction but the literal offset
1364 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1365 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1366
1367 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1368 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1369 const ArenaDeque<PatchInfo<Label>>& infos,
1370 ArenaVector<linker::LinkerPatch>* linker_patches) {
1371 for (const PatchInfo<Label>& info : infos) {
1372 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1373 linker_patches->push_back(
1374 Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1375 }
1376 }
1377
1378 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1379 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1380 const DexFile* target_dex_file,
1381 uint32_t pc_insn_offset,
1382 uint32_t boot_image_offset) {
1383 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
1384 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1385 }
1386
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1387 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1388 DCHECK(linker_patches->empty());
1389 size_t size =
1390 boot_image_method_patches_.size() +
1391 method_bss_entry_patches_.size() +
1392 boot_image_type_patches_.size() +
1393 type_bss_entry_patches_.size() +
1394 public_type_bss_entry_patches_.size() +
1395 package_type_bss_entry_patches_.size() +
1396 boot_image_string_patches_.size() +
1397 string_bss_entry_patches_.size() +
1398 boot_image_jni_entrypoint_patches_.size() +
1399 boot_image_other_patches_.size();
1400 linker_patches->reserve(size);
1401 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1402 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1403 boot_image_method_patches_, linker_patches);
1404 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1405 boot_image_type_patches_, linker_patches);
1406 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1407 boot_image_string_patches_, linker_patches);
1408 } else {
1409 DCHECK(boot_image_method_patches_.empty());
1410 DCHECK(boot_image_type_patches_.empty());
1411 DCHECK(boot_image_string_patches_.empty());
1412 }
1413 if (GetCompilerOptions().IsBootImage()) {
1414 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1415 boot_image_other_patches_, linker_patches);
1416 } else {
1417 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
1418 boot_image_other_patches_, linker_patches);
1419 }
1420 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1421 method_bss_entry_patches_, linker_patches);
1422 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1423 type_bss_entry_patches_, linker_patches);
1424 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
1425 public_type_bss_entry_patches_, linker_patches);
1426 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
1427 package_type_bss_entry_patches_, linker_patches);
1428 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1429 string_bss_entry_patches_, linker_patches);
1430 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
1431 boot_image_jni_entrypoint_patches_, linker_patches);
1432 DCHECK_EQ(size, linker_patches->size());
1433 }
1434
DumpCoreRegister(std::ostream & stream,int reg) const1435 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1436 stream << Register(reg);
1437 }
1438
DumpFloatingPointRegister(std::ostream & stream,int reg) const1439 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1440 stream << FloatRegister(reg);
1441 }
1442
GetInstructionSetFeatures() const1443 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1444 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1445 }
1446
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1447 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1448 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1449 return kX86_64WordSize;
1450 }
1451
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1452 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1453 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1454 return kX86_64WordSize;
1455 }
1456
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1457 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1458 if (GetGraph()->HasSIMD()) {
1459 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1460 } else {
1461 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1462 }
1463 return GetSlowPathFPWidth();
1464 }
1465
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1466 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1467 if (GetGraph()->HasSIMD()) {
1468 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1469 } else {
1470 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1471 }
1472 return GetSlowPathFPWidth();
1473 }
1474
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1475 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1476 HInstruction* instruction,
1477 uint32_t dex_pc,
1478 SlowPathCode* slow_path) {
1479 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1480 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1481 if (EntrypointRequiresStackMap(entrypoint)) {
1482 RecordPcInfo(instruction, dex_pc, slow_path);
1483 }
1484 }
1485
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1486 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1487 HInstruction* instruction,
1488 SlowPathCode* slow_path) {
1489 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1490 GenerateInvokeRuntime(entry_point_offset);
1491 }
1492
GenerateInvokeRuntime(int32_t entry_point_offset)1493 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1494 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1495 }
1496
1497 namespace detail {
1498 // Mark which intrinsics we don't have handcrafted code for.
1499 template <Intrinsics T>
1500 struct IsUnimplemented {
1501 bool is_unimplemented = false;
1502 };
1503
1504 #define TRUE_OVERRIDE(Name) \
1505 template <> \
1506 struct IsUnimplemented<Intrinsics::k##Name> { \
1507 bool is_unimplemented = true; \
1508 };
1509 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE)
1510 #undef TRUE_OVERRIDE
1511
1512 #include "intrinsics_list.h"
1513 static constexpr bool kIsIntrinsicUnimplemented[] = {
1514 false, // kNone
1515 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1516 IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1517 INTRINSICS_LIST(IS_UNIMPLEMENTED)
1518 #undef IS_UNIMPLEMENTED
1519 };
1520 #undef INTRINSICS_LIST
1521
1522 } // namespace detail
1523
1524 static constexpr int kNumberOfCpuRegisterPairs = 0;
1525 // Use a fake return address register to mimic Quick.
1526 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1527 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1528 const CompilerOptions& compiler_options,
1529 OptimizingCompilerStats* stats)
1530 : CodeGenerator(graph,
1531 kNumberOfCpuRegisters,
1532 kNumberOfFloatRegisters,
1533 kNumberOfCpuRegisterPairs,
1534 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1535 arraysize(kCoreCalleeSaves))
1536 | (1 << kFakeReturnRegister),
1537 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1538 arraysize(kFpuCalleeSaves)),
1539 compiler_options,
1540 stats,
1541 ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1542 block_labels_(nullptr),
1543 location_builder_(graph, this),
1544 instruction_visitor_(graph, this),
1545 move_resolver_(graph->GetAllocator(), this),
1546 assembler_(graph->GetAllocator(),
1547 compiler_options.GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures()),
1548 constant_area_start_(0),
1549 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1550 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1551 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1552 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1553 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1554 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1555 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1556 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1557 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1558 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1559 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1560 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1561 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1562 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1563 }
1564
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1565 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1566 CodeGeneratorX86_64* codegen)
1567 : InstructionCodeGenerator(graph, codegen),
1568 assembler_(codegen->GetAssembler()),
1569 codegen_(codegen) {}
1570
SetupBlockedRegisters() const1571 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1572 // Stack register is always reserved.
1573 blocked_core_registers_[RSP] = true;
1574
1575 // Block the register used as TMP.
1576 blocked_core_registers_[TMP] = true;
1577 }
1578
DWARFReg(Register reg)1579 static dwarf::Reg DWARFReg(Register reg) {
1580 return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1581 }
1582
DWARFReg(FloatRegister reg)1583 static dwarf::Reg DWARFReg(FloatRegister reg) {
1584 return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1585 }
1586
VisitMethodEntryHook(HMethodEntryHook * method_hook)1587 void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1588 new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1589 }
1590
GenerateMethodEntryExitHook(HInstruction * instruction)1591 void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1592 SlowPathCode* slow_path =
1593 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
1594 codegen_->AddSlowPath(slow_path);
1595
1596 if (instruction->IsMethodExitHook()) {
1597 // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1598 // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1599 // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1600 // disabled in debuggable runtime. The other bit is used when this method itself requires a
1601 // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1602 __ cmpl(Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()),
1603 Immediate(0));
1604 __ j(kNotEqual, slow_path->GetEntryLabel());
1605 }
1606
1607 uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1608 MemberOffset offset = instruction->IsMethodExitHook() ?
1609 instrumentation::Instrumentation::HaveMethodExitListenersOffset()
1610 : instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1611 __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value()));
1612 __ cmpb(Address(CpuRegister(TMP), 0), Immediate(0));
1613 __ j(kNotEqual, slow_path->GetEntryLabel());
1614 __ Bind(slow_path->GetExitLabel());
1615 }
1616
VisitMethodEntryHook(HMethodEntryHook * instruction)1617 void InstructionCodeGeneratorX86_64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1618 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1619 DCHECK(codegen_->RequiresCurrentMethod());
1620 GenerateMethodEntryExitHook(instruction);
1621 }
1622
SetInForReturnValue(HInstruction * instr,LocationSummary * locations)1623 void SetInForReturnValue(HInstruction* instr, LocationSummary* locations) {
1624 switch (instr->InputAt(0)->GetType()) {
1625 case DataType::Type::kReference:
1626 case DataType::Type::kBool:
1627 case DataType::Type::kUint8:
1628 case DataType::Type::kInt8:
1629 case DataType::Type::kUint16:
1630 case DataType::Type::kInt16:
1631 case DataType::Type::kInt32:
1632 case DataType::Type::kInt64:
1633 locations->SetInAt(0, Location::RegisterLocation(RAX));
1634 break;
1635
1636 case DataType::Type::kFloat32:
1637 case DataType::Type::kFloat64:
1638 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1639 break;
1640
1641 case DataType::Type::kVoid:
1642 locations->SetInAt(0, Location::NoLocation());
1643 break;
1644
1645 default:
1646 LOG(FATAL) << "Unexpected return type " << instr->InputAt(0)->GetType();
1647 }
1648 }
1649
VisitMethodExitHook(HMethodExitHook * method_hook)1650 void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1651 LocationSummary* locations = new (GetGraph()->GetAllocator())
1652 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1653 SetInForReturnValue(method_hook, locations);
1654 }
1655
VisitMethodExitHook(HMethodExitHook * instruction)1656 void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
1657 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1658 DCHECK(codegen_->RequiresCurrentMethod());
1659 GenerateMethodEntryExitHook(instruction);
1660 }
1661
MaybeIncrementHotness(bool is_frame_entry)1662 void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
1663 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1664 NearLabel overflow;
1665 Register method = kMethodRegisterArgument;
1666 if (!is_frame_entry) {
1667 CHECK(RequiresCurrentMethod());
1668 method = TMP;
1669 __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1670 }
1671 __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1672 Immediate(interpreter::kNterpHotnessValue));
1673 __ j(kEqual, &overflow);
1674 __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1675 Immediate(-1));
1676 __ Bind(&overflow);
1677 }
1678
1679 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1680 SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64();
1681 AddSlowPath(slow_path);
1682 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1683 DCHECK(info != nullptr);
1684 CHECK(!HasEmptyFrame());
1685 uint64_t address = reinterpret_cast64<uint64_t>(info);
1686 // Note: if the address was in the 32bit range, we could use
1687 // Address::Absolute and avoid this movq.
1688 __ movq(CpuRegister(TMP), Immediate(address));
1689 // With multiple threads, this can overflow. This is OK, we will eventually get to see
1690 // it reaching 0. Also, at this point we have no register available to look
1691 // at the counter directly.
1692 __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1693 Immediate(-1));
1694 __ j(kEqual, slow_path->GetEntryLabel());
1695 __ Bind(slow_path->GetExitLabel());
1696 }
1697 }
1698
GenerateFrameEntry()1699 void CodeGeneratorX86_64::GenerateFrameEntry() {
1700 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
1701
1702 // Check if we need to generate the clinit check. We will jump to the
1703 // resolution stub if the class is not initialized and the executing thread is
1704 // not the thread initializing it.
1705 // We do this before constructing the frame to get the correct stack trace if
1706 // an exception is thrown.
1707 if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1708 NearLabel resolution;
1709 // Check if we're visibly initialized.
1710
1711 // We don't emit a read barrier here to save on code size. We rely on the
1712 // resolution trampoline to do a suspend check before re-entering this code.
1713 __ movl(CpuRegister(TMP),
1714 Address(CpuRegister(kMethodRegisterArgument),
1715 ArtMethod::DeclaringClassOffset().Int32Value()));
1716 __ cmpb(Address(CpuRegister(TMP), status_byte_offset),
1717 Immediate(shifted_visibly_initialized_value));
1718 __ j(kAboveEqual, &frame_entry_label_);
1719
1720 // Check if we're initializing and the thread initializing is the one
1721 // executing the code.
1722 __ cmpb(Address(CpuRegister(TMP), status_byte_offset), Immediate(shifted_initializing_value));
1723 __ j(kBelow, &resolution);
1724
1725 __ movl(CpuRegister(TMP),
1726 Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value()));
1727 __ gs()->cmpl(
1728 CpuRegister(TMP),
1729 Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true));
1730 __ j(kEqual, &frame_entry_label_);
1731 __ Bind(&resolution);
1732
1733 // Jump to the resolution stub.
1734 ThreadOffset64 entrypoint_offset =
1735 GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline);
1736 __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true));
1737 }
1738
1739 __ Bind(&frame_entry_label_);
1740 bool skip_overflow_check = IsLeafMethod()
1741 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1742 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1743
1744
1745 if (!skip_overflow_check) {
1746 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1747 __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1748 RecordPcInfo(nullptr, 0);
1749 }
1750
1751 if (!HasEmptyFrame()) {
1752 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1753 Register reg = kCoreCalleeSaves[i];
1754 if (allocated_registers_.ContainsCoreRegister(reg)) {
1755 __ pushq(CpuRegister(reg));
1756 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1757 __ cfi().RelOffset(DWARFReg(reg), 0);
1758 }
1759 }
1760
1761 int adjust = GetFrameSize() - GetCoreSpillSize();
1762 IncreaseFrame(adjust);
1763 uint32_t xmm_spill_location = GetFpuSpillStart();
1764 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1765
1766 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1767 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1768 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1769 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1770 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1771 }
1772 }
1773
1774 // Save the current method if we need it. Note that we do not
1775 // do this in HCurrentMethod, as the instruction might have been removed
1776 // in the SSA graph.
1777 if (RequiresCurrentMethod()) {
1778 CHECK(!HasEmptyFrame());
1779 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1780 CpuRegister(kMethodRegisterArgument));
1781 }
1782
1783 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1784 CHECK(!HasEmptyFrame());
1785 // Initialize should_deoptimize flag to 0.
1786 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1787 }
1788 }
1789
1790 MaybeIncrementHotness(/* is_frame_entry= */ true);
1791 }
1792
GenerateFrameExit()1793 void CodeGeneratorX86_64::GenerateFrameExit() {
1794 __ cfi().RememberState();
1795 if (!HasEmptyFrame()) {
1796 uint32_t xmm_spill_location = GetFpuSpillStart();
1797 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1798 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1799 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1800 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1801 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1802 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1803 }
1804 }
1805
1806 int adjust = GetFrameSize() - GetCoreSpillSize();
1807 DecreaseFrame(adjust);
1808
1809 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1810 Register reg = kCoreCalleeSaves[i];
1811 if (allocated_registers_.ContainsCoreRegister(reg)) {
1812 __ popq(CpuRegister(reg));
1813 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1814 __ cfi().Restore(DWARFReg(reg));
1815 }
1816 }
1817 }
1818 __ ret();
1819 __ cfi().RestoreState();
1820 __ cfi().DefCFAOffset(GetFrameSize());
1821 }
1822
Bind(HBasicBlock * block)1823 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1824 __ Bind(GetLabelOf(block));
1825 }
1826
Move(Location destination,Location source)1827 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1828 if (source.Equals(destination)) {
1829 return;
1830 }
1831 if (destination.IsRegister()) {
1832 CpuRegister dest = destination.AsRegister<CpuRegister>();
1833 if (source.IsRegister()) {
1834 __ movq(dest, source.AsRegister<CpuRegister>());
1835 } else if (source.IsFpuRegister()) {
1836 __ movd(dest, source.AsFpuRegister<XmmRegister>());
1837 } else if (source.IsStackSlot()) {
1838 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1839 } else if (source.IsConstant()) {
1840 HConstant* constant = source.GetConstant();
1841 if (constant->IsLongConstant()) {
1842 Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1843 } else if (constant->IsDoubleConstant()) {
1844 Load64BitValue(dest, GetInt64ValueOf(constant));
1845 } else {
1846 Load32BitValue(dest, GetInt32ValueOf(constant));
1847 }
1848 } else {
1849 DCHECK(source.IsDoubleStackSlot());
1850 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1851 }
1852 } else if (destination.IsFpuRegister()) {
1853 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1854 if (source.IsRegister()) {
1855 __ movd(dest, source.AsRegister<CpuRegister>());
1856 } else if (source.IsFpuRegister()) {
1857 __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1858 } else if (source.IsConstant()) {
1859 HConstant* constant = source.GetConstant();
1860 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1861 if (constant->IsFloatConstant()) {
1862 Load32BitValue(dest, static_cast<int32_t>(value));
1863 } else {
1864 Load64BitValue(dest, value);
1865 }
1866 } else if (source.IsStackSlot()) {
1867 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1868 } else {
1869 DCHECK(source.IsDoubleStackSlot());
1870 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1871 }
1872 } else if (destination.IsStackSlot()) {
1873 if (source.IsRegister()) {
1874 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1875 source.AsRegister<CpuRegister>());
1876 } else if (source.IsFpuRegister()) {
1877 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1878 source.AsFpuRegister<XmmRegister>());
1879 } else if (source.IsConstant()) {
1880 HConstant* constant = source.GetConstant();
1881 int32_t value = GetInt32ValueOf(constant);
1882 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1883 } else {
1884 DCHECK(source.IsStackSlot()) << source;
1885 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1886 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1887 }
1888 } else {
1889 DCHECK(destination.IsDoubleStackSlot());
1890 if (source.IsRegister()) {
1891 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1892 source.AsRegister<CpuRegister>());
1893 } else if (source.IsFpuRegister()) {
1894 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1895 source.AsFpuRegister<XmmRegister>());
1896 } else if (source.IsConstant()) {
1897 HConstant* constant = source.GetConstant();
1898 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1899 int64_t value = GetInt64ValueOf(constant);
1900 Store64BitValueToStack(destination, value);
1901 } else {
1902 DCHECK(source.IsDoubleStackSlot());
1903 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1904 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1905 }
1906 }
1907 }
1908
LoadFromMemoryNoReference(DataType::Type type,Location dst,Address src)1909 void CodeGeneratorX86_64::LoadFromMemoryNoReference(DataType::Type type,
1910 Location dst,
1911 Address src) {
1912 switch (type) {
1913 case DataType::Type::kBool:
1914 case DataType::Type::kUint8:
1915 __ movzxb(dst.AsRegister<CpuRegister>(), src);
1916 break;
1917 case DataType::Type::kInt8:
1918 __ movsxb(dst.AsRegister<CpuRegister>(), src);
1919 break;
1920 case DataType::Type::kUint16:
1921 __ movzxw(dst.AsRegister<CpuRegister>(), src);
1922 break;
1923 case DataType::Type::kInt16:
1924 __ movsxw(dst.AsRegister<CpuRegister>(), src);
1925 break;
1926 case DataType::Type::kInt32:
1927 case DataType::Type::kUint32:
1928 __ movl(dst.AsRegister<CpuRegister>(), src);
1929 break;
1930 case DataType::Type::kInt64:
1931 case DataType::Type::kUint64:
1932 __ movq(dst.AsRegister<CpuRegister>(), src);
1933 break;
1934 case DataType::Type::kFloat32:
1935 __ movss(dst.AsFpuRegister<XmmRegister>(), src);
1936 break;
1937 case DataType::Type::kFloat64:
1938 __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
1939 break;
1940 case DataType::Type::kVoid:
1941 case DataType::Type::kReference:
1942 LOG(FATAL) << "Unreachable type " << type;
1943 UNREACHABLE();
1944 }
1945 }
1946
MoveConstant(Location location,int32_t value)1947 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1948 DCHECK(location.IsRegister());
1949 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1950 }
1951
MoveLocation(Location dst,Location src,DataType::Type dst_type ATTRIBUTE_UNUSED)1952 void CodeGeneratorX86_64::MoveLocation(
1953 Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
1954 Move(dst, src);
1955 }
1956
AddLocationAsTemp(Location location,LocationSummary * locations)1957 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1958 if (location.IsRegister()) {
1959 locations->AddTemp(location);
1960 } else {
1961 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1962 }
1963 }
1964
HandleGoto(HInstruction * got,HBasicBlock * successor)1965 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1966 if (successor->IsExitBlock()) {
1967 DCHECK(got->GetPrevious()->AlwaysThrows());
1968 return; // no code needed
1969 }
1970
1971 HBasicBlock* block = got->GetBlock();
1972 HInstruction* previous = got->GetPrevious();
1973
1974 HLoopInformation* info = block->GetLoopInformation();
1975 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1976 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1977 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1978 return;
1979 }
1980
1981 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1982 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1983 }
1984 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1985 __ jmp(codegen_->GetLabelOf(successor));
1986 }
1987 }
1988
VisitGoto(HGoto * got)1989 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1990 got->SetLocations(nullptr);
1991 }
1992
VisitGoto(HGoto * got)1993 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1994 HandleGoto(got, got->GetSuccessor());
1995 }
1996
VisitTryBoundary(HTryBoundary * try_boundary)1997 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1998 try_boundary->SetLocations(nullptr);
1999 }
2000
VisitTryBoundary(HTryBoundary * try_boundary)2001 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2002 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2003 if (!successor->IsExitBlock()) {
2004 HandleGoto(try_boundary, successor);
2005 }
2006 }
2007
VisitExit(HExit * exit)2008 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
2009 exit->SetLocations(nullptr);
2010 }
2011
VisitExit(HExit * exit ATTRIBUTE_UNUSED)2012 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
2013 }
2014
2015 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)2016 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
2017 LabelType* true_label,
2018 LabelType* false_label) {
2019 if (cond->IsFPConditionTrueIfNaN()) {
2020 __ j(kUnordered, true_label);
2021 } else if (cond->IsFPConditionFalseIfNaN()) {
2022 __ j(kUnordered, false_label);
2023 }
2024 __ j(X86_64FPCondition(cond->GetCondition()), true_label);
2025 }
2026
GenerateCompareTest(HCondition * condition)2027 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
2028 LocationSummary* locations = condition->GetLocations();
2029
2030 Location left = locations->InAt(0);
2031 Location right = locations->InAt(1);
2032 DataType::Type type = condition->InputAt(0)->GetType();
2033 switch (type) {
2034 case DataType::Type::kBool:
2035 case DataType::Type::kUint8:
2036 case DataType::Type::kInt8:
2037 case DataType::Type::kUint16:
2038 case DataType::Type::kInt16:
2039 case DataType::Type::kInt32:
2040 case DataType::Type::kReference: {
2041 codegen_->GenerateIntCompare(left, right);
2042 break;
2043 }
2044 case DataType::Type::kInt64: {
2045 codegen_->GenerateLongCompare(left, right);
2046 break;
2047 }
2048 case DataType::Type::kFloat32: {
2049 if (right.IsFpuRegister()) {
2050 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2051 } else if (right.IsConstant()) {
2052 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2053 codegen_->LiteralFloatAddress(
2054 right.GetConstant()->AsFloatConstant()->GetValue()));
2055 } else {
2056 DCHECK(right.IsStackSlot());
2057 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2058 Address(CpuRegister(RSP), right.GetStackIndex()));
2059 }
2060 break;
2061 }
2062 case DataType::Type::kFloat64: {
2063 if (right.IsFpuRegister()) {
2064 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2065 } else if (right.IsConstant()) {
2066 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2067 codegen_->LiteralDoubleAddress(
2068 right.GetConstant()->AsDoubleConstant()->GetValue()));
2069 } else {
2070 DCHECK(right.IsDoubleStackSlot());
2071 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2072 Address(CpuRegister(RSP), right.GetStackIndex()));
2073 }
2074 break;
2075 }
2076 default:
2077 LOG(FATAL) << "Unexpected condition type " << type;
2078 }
2079 }
2080
2081 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2082 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
2083 LabelType* true_target_in,
2084 LabelType* false_target_in) {
2085 // Generated branching requires both targets to be explicit. If either of the
2086 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2087 LabelType fallthrough_target;
2088 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2089 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2090
2091 // Generate the comparison to set the CC.
2092 GenerateCompareTest(condition);
2093
2094 // Now generate the correct jump(s).
2095 DataType::Type type = condition->InputAt(0)->GetType();
2096 switch (type) {
2097 case DataType::Type::kInt64: {
2098 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2099 break;
2100 }
2101 case DataType::Type::kFloat32: {
2102 GenerateFPJumps(condition, true_target, false_target);
2103 break;
2104 }
2105 case DataType::Type::kFloat64: {
2106 GenerateFPJumps(condition, true_target, false_target);
2107 break;
2108 }
2109 default:
2110 LOG(FATAL) << "Unexpected condition type " << type;
2111 }
2112
2113 if (false_target != &fallthrough_target) {
2114 __ jmp(false_target);
2115 }
2116
2117 if (fallthrough_target.IsLinked()) {
2118 __ Bind(&fallthrough_target);
2119 }
2120 }
2121
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)2122 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
2123 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2124 // are set only strictly before `branch`. We can't use the eflags on long
2125 // conditions if they are materialized due to the complex branching.
2126 return cond->IsCondition() &&
2127 cond->GetNext() == branch &&
2128 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
2129 }
2130
2131 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2132 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
2133 size_t condition_input_index,
2134 LabelType* true_target,
2135 LabelType* false_target) {
2136 HInstruction* cond = instruction->InputAt(condition_input_index);
2137
2138 if (true_target == nullptr && false_target == nullptr) {
2139 // Nothing to do. The code always falls through.
2140 return;
2141 } else if (cond->IsIntConstant()) {
2142 // Constant condition, statically compared against "true" (integer value 1).
2143 if (cond->AsIntConstant()->IsTrue()) {
2144 if (true_target != nullptr) {
2145 __ jmp(true_target);
2146 }
2147 } else {
2148 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2149 if (false_target != nullptr) {
2150 __ jmp(false_target);
2151 }
2152 }
2153 return;
2154 }
2155
2156 // The following code generates these patterns:
2157 // (1) true_target == nullptr && false_target != nullptr
2158 // - opposite condition true => branch to false_target
2159 // (2) true_target != nullptr && false_target == nullptr
2160 // - condition true => branch to true_target
2161 // (3) true_target != nullptr && false_target != nullptr
2162 // - condition true => branch to true_target
2163 // - branch to false_target
2164 if (IsBooleanValueOrMaterializedCondition(cond)) {
2165 if (AreEflagsSetFrom(cond, instruction)) {
2166 if (true_target == nullptr) {
2167 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
2168 } else {
2169 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
2170 }
2171 } else {
2172 // Materialized condition, compare against 0.
2173 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2174 if (lhs.IsRegister()) {
2175 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
2176 } else {
2177 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
2178 }
2179 if (true_target == nullptr) {
2180 __ j(kEqual, false_target);
2181 } else {
2182 __ j(kNotEqual, true_target);
2183 }
2184 }
2185 } else {
2186 // Condition has not been materialized, use its inputs as the
2187 // comparison and its condition as the branch condition.
2188 HCondition* condition = cond->AsCondition();
2189
2190 // If this is a long or FP comparison that has been folded into
2191 // the HCondition, generate the comparison directly.
2192 DataType::Type type = condition->InputAt(0)->GetType();
2193 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2194 GenerateCompareTestAndBranch(condition, true_target, false_target);
2195 return;
2196 }
2197
2198 Location lhs = condition->GetLocations()->InAt(0);
2199 Location rhs = condition->GetLocations()->InAt(1);
2200 codegen_->GenerateIntCompare(lhs, rhs);
2201 if (true_target == nullptr) {
2202 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
2203 } else {
2204 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2205 }
2206 }
2207
2208 // If neither branch falls through (case 3), the conditional branch to `true_target`
2209 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2210 if (true_target != nullptr && false_target != nullptr) {
2211 __ jmp(false_target);
2212 }
2213 }
2214
VisitIf(HIf * if_instr)2215 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
2216 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2217 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2218 locations->SetInAt(0, Location::Any());
2219 }
2220 }
2221
VisitIf(HIf * if_instr)2222 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
2223 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2224 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2225 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2226 nullptr : codegen_->GetLabelOf(true_successor);
2227 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2228 nullptr : codegen_->GetLabelOf(false_successor);
2229 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2230 }
2231
VisitDeoptimize(HDeoptimize * deoptimize)2232 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2233 LocationSummary* locations = new (GetGraph()->GetAllocator())
2234 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2235 InvokeRuntimeCallingConvention calling_convention;
2236 RegisterSet caller_saves = RegisterSet::Empty();
2237 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2238 locations->SetCustomSlowPathCallerSaves(caller_saves);
2239 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2240 locations->SetInAt(0, Location::Any());
2241 }
2242 }
2243
VisitDeoptimize(HDeoptimize * deoptimize)2244 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2245 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
2246 GenerateTestAndBranch<Label>(deoptimize,
2247 /* condition_input_index= */ 0,
2248 slow_path->GetEntryLabel(),
2249 /* false_target= */ nullptr);
2250 }
2251
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2252 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2253 LocationSummary* locations = new (GetGraph()->GetAllocator())
2254 LocationSummary(flag, LocationSummary::kNoCall);
2255 locations->SetOut(Location::RequiresRegister());
2256 }
2257
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2258 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2259 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
2260 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2261 }
2262
SelectCanUseCMOV(HSelect * select)2263 static bool SelectCanUseCMOV(HSelect* select) {
2264 // There are no conditional move instructions for XMMs.
2265 if (DataType::IsFloatingPointType(select->GetType())) {
2266 return false;
2267 }
2268
2269 // A FP condition doesn't generate the single CC that we need.
2270 HInstruction* condition = select->GetCondition();
2271 if (condition->IsCondition() &&
2272 DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
2273 return false;
2274 }
2275
2276 // We can generate a CMOV for this Select.
2277 return true;
2278 }
2279
VisitSelect(HSelect * select)2280 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
2281 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2282 if (DataType::IsFloatingPointType(select->GetType())) {
2283 locations->SetInAt(0, Location::RequiresFpuRegister());
2284 locations->SetInAt(1, Location::Any());
2285 } else {
2286 locations->SetInAt(0, Location::RequiresRegister());
2287 if (SelectCanUseCMOV(select)) {
2288 if (select->InputAt(1)->IsConstant()) {
2289 locations->SetInAt(1, Location::RequiresRegister());
2290 } else {
2291 locations->SetInAt(1, Location::Any());
2292 }
2293 } else {
2294 locations->SetInAt(1, Location::Any());
2295 }
2296 }
2297 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2298 locations->SetInAt(2, Location::RequiresRegister());
2299 }
2300 locations->SetOut(Location::SameAsFirstInput());
2301 }
2302
VisitSelect(HSelect * select)2303 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
2304 LocationSummary* locations = select->GetLocations();
2305 if (SelectCanUseCMOV(select)) {
2306 // If both the condition and the source types are integer, we can generate
2307 // a CMOV to implement Select.
2308 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
2309 Location value_true_loc = locations->InAt(1);
2310 DCHECK(locations->InAt(0).Equals(locations->Out()));
2311
2312 HInstruction* select_condition = select->GetCondition();
2313 Condition cond = kNotEqual;
2314
2315 // Figure out how to test the 'condition'.
2316 if (select_condition->IsCondition()) {
2317 HCondition* condition = select_condition->AsCondition();
2318 if (!condition->IsEmittedAtUseSite()) {
2319 // This was a previously materialized condition.
2320 // Can we use the existing condition code?
2321 if (AreEflagsSetFrom(condition, select)) {
2322 // Materialization was the previous instruction. Condition codes are right.
2323 cond = X86_64IntegerCondition(condition->GetCondition());
2324 } else {
2325 // No, we have to recreate the condition code.
2326 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2327 __ testl(cond_reg, cond_reg);
2328 }
2329 } else {
2330 GenerateCompareTest(condition);
2331 cond = X86_64IntegerCondition(condition->GetCondition());
2332 }
2333 } else {
2334 // Must be a Boolean condition, which needs to be compared to 0.
2335 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2336 __ testl(cond_reg, cond_reg);
2337 }
2338
2339 // If the condition is true, overwrite the output, which already contains false.
2340 // Generate the correct sized CMOV.
2341 bool is_64_bit = DataType::Is64BitType(select->GetType());
2342 if (value_true_loc.IsRegister()) {
2343 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
2344 } else {
2345 __ cmov(cond,
2346 value_false,
2347 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
2348 }
2349 } else {
2350 NearLabel false_target;
2351 GenerateTestAndBranch<NearLabel>(select,
2352 /* condition_input_index= */ 2,
2353 /* true_target= */ nullptr,
2354 &false_target);
2355 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2356 __ Bind(&false_target);
2357 }
2358 }
2359
VisitNop(HNop * nop)2360 void LocationsBuilderX86_64::VisitNop(HNop* nop) {
2361 new (GetGraph()->GetAllocator()) LocationSummary(nop);
2362 }
2363
VisitNop(HNop *)2364 void InstructionCodeGeneratorX86_64::VisitNop(HNop*) {
2365 // The environment recording already happened in CodeGenerator::Compile.
2366 }
2367
IncreaseFrame(size_t adjustment)2368 void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
2369 __ subq(CpuRegister(RSP), Immediate(adjustment));
2370 __ cfi().AdjustCFAOffset(adjustment);
2371 }
2372
DecreaseFrame(size_t adjustment)2373 void CodeGeneratorX86_64::DecreaseFrame(size_t adjustment) {
2374 __ addq(CpuRegister(RSP), Immediate(adjustment));
2375 __ cfi().AdjustCFAOffset(-adjustment);
2376 }
2377
GenerateNop()2378 void CodeGeneratorX86_64::GenerateNop() {
2379 __ nop();
2380 }
2381
HandleCondition(HCondition * cond)2382 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
2383 LocationSummary* locations =
2384 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2385 // Handle the long/FP comparisons made in instruction simplification.
2386 switch (cond->InputAt(0)->GetType()) {
2387 case DataType::Type::kInt64:
2388 locations->SetInAt(0, Location::RequiresRegister());
2389 locations->SetInAt(1, Location::Any());
2390 break;
2391 case DataType::Type::kFloat32:
2392 case DataType::Type::kFloat64:
2393 locations->SetInAt(0, Location::RequiresFpuRegister());
2394 locations->SetInAt(1, Location::Any());
2395 break;
2396 default:
2397 locations->SetInAt(0, Location::RequiresRegister());
2398 locations->SetInAt(1, Location::Any());
2399 break;
2400 }
2401 if (!cond->IsEmittedAtUseSite()) {
2402 locations->SetOut(Location::RequiresRegister());
2403 }
2404 }
2405
HandleCondition(HCondition * cond)2406 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2407 if (cond->IsEmittedAtUseSite()) {
2408 return;
2409 }
2410
2411 LocationSummary* locations = cond->GetLocations();
2412 Location lhs = locations->InAt(0);
2413 Location rhs = locations->InAt(1);
2414 CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2415 NearLabel true_label, false_label;
2416
2417 switch (cond->InputAt(0)->GetType()) {
2418 default:
2419 // Integer case.
2420
2421 // Clear output register: setcc only sets the low byte.
2422 __ xorl(reg, reg);
2423
2424 codegen_->GenerateIntCompare(lhs, rhs);
2425 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2426 return;
2427 case DataType::Type::kInt64:
2428 // Clear output register: setcc only sets the low byte.
2429 __ xorl(reg, reg);
2430
2431 codegen_->GenerateLongCompare(lhs, rhs);
2432 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2433 return;
2434 case DataType::Type::kFloat32: {
2435 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2436 if (rhs.IsConstant()) {
2437 float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2438 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2439 } else if (rhs.IsStackSlot()) {
2440 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2441 } else {
2442 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2443 }
2444 GenerateFPJumps(cond, &true_label, &false_label);
2445 break;
2446 }
2447 case DataType::Type::kFloat64: {
2448 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2449 if (rhs.IsConstant()) {
2450 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2451 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2452 } else if (rhs.IsDoubleStackSlot()) {
2453 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2454 } else {
2455 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2456 }
2457 GenerateFPJumps(cond, &true_label, &false_label);
2458 break;
2459 }
2460 }
2461
2462 // Convert the jumps into the result.
2463 NearLabel done_label;
2464
2465 // False case: result = 0.
2466 __ Bind(&false_label);
2467 __ xorl(reg, reg);
2468 __ jmp(&done_label);
2469
2470 // True case: result = 1.
2471 __ Bind(&true_label);
2472 __ movl(reg, Immediate(1));
2473 __ Bind(&done_label);
2474 }
2475
VisitEqual(HEqual * comp)2476 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2477 HandleCondition(comp);
2478 }
2479
VisitEqual(HEqual * comp)2480 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2481 HandleCondition(comp);
2482 }
2483
VisitNotEqual(HNotEqual * comp)2484 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2485 HandleCondition(comp);
2486 }
2487
VisitNotEqual(HNotEqual * comp)2488 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2489 HandleCondition(comp);
2490 }
2491
VisitLessThan(HLessThan * comp)2492 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2493 HandleCondition(comp);
2494 }
2495
VisitLessThan(HLessThan * comp)2496 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2497 HandleCondition(comp);
2498 }
2499
VisitLessThanOrEqual(HLessThanOrEqual * comp)2500 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2501 HandleCondition(comp);
2502 }
2503
VisitLessThanOrEqual(HLessThanOrEqual * comp)2504 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2505 HandleCondition(comp);
2506 }
2507
VisitGreaterThan(HGreaterThan * comp)2508 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2509 HandleCondition(comp);
2510 }
2511
VisitGreaterThan(HGreaterThan * comp)2512 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2513 HandleCondition(comp);
2514 }
2515
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2516 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2517 HandleCondition(comp);
2518 }
2519
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2520 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2521 HandleCondition(comp);
2522 }
2523
VisitBelow(HBelow * comp)2524 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2525 HandleCondition(comp);
2526 }
2527
VisitBelow(HBelow * comp)2528 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2529 HandleCondition(comp);
2530 }
2531
VisitBelowOrEqual(HBelowOrEqual * comp)2532 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2533 HandleCondition(comp);
2534 }
2535
VisitBelowOrEqual(HBelowOrEqual * comp)2536 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2537 HandleCondition(comp);
2538 }
2539
VisitAbove(HAbove * comp)2540 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2541 HandleCondition(comp);
2542 }
2543
VisitAbove(HAbove * comp)2544 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2545 HandleCondition(comp);
2546 }
2547
VisitAboveOrEqual(HAboveOrEqual * comp)2548 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2549 HandleCondition(comp);
2550 }
2551
VisitAboveOrEqual(HAboveOrEqual * comp)2552 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2553 HandleCondition(comp);
2554 }
2555
VisitCompare(HCompare * compare)2556 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2557 LocationSummary* locations =
2558 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2559 switch (compare->InputAt(0)->GetType()) {
2560 case DataType::Type::kBool:
2561 case DataType::Type::kUint8:
2562 case DataType::Type::kInt8:
2563 case DataType::Type::kUint16:
2564 case DataType::Type::kInt16:
2565 case DataType::Type::kInt32:
2566 case DataType::Type::kInt64: {
2567 locations->SetInAt(0, Location::RequiresRegister());
2568 locations->SetInAt(1, Location::Any());
2569 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2570 break;
2571 }
2572 case DataType::Type::kFloat32:
2573 case DataType::Type::kFloat64: {
2574 locations->SetInAt(0, Location::RequiresFpuRegister());
2575 locations->SetInAt(1, Location::Any());
2576 locations->SetOut(Location::RequiresRegister());
2577 break;
2578 }
2579 default:
2580 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2581 }
2582 }
2583
VisitCompare(HCompare * compare)2584 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2585 LocationSummary* locations = compare->GetLocations();
2586 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2587 Location left = locations->InAt(0);
2588 Location right = locations->InAt(1);
2589
2590 NearLabel less, greater, done;
2591 DataType::Type type = compare->InputAt(0)->GetType();
2592 Condition less_cond = kLess;
2593
2594 switch (type) {
2595 case DataType::Type::kBool:
2596 case DataType::Type::kUint8:
2597 case DataType::Type::kInt8:
2598 case DataType::Type::kUint16:
2599 case DataType::Type::kInt16:
2600 case DataType::Type::kInt32: {
2601 codegen_->GenerateIntCompare(left, right);
2602 break;
2603 }
2604 case DataType::Type::kInt64: {
2605 codegen_->GenerateLongCompare(left, right);
2606 break;
2607 }
2608 case DataType::Type::kFloat32: {
2609 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2610 if (right.IsConstant()) {
2611 float value = right.GetConstant()->AsFloatConstant()->GetValue();
2612 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2613 } else if (right.IsStackSlot()) {
2614 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2615 } else {
2616 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2617 }
2618 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2619 less_cond = kBelow; // ucomis{s,d} sets CF
2620 break;
2621 }
2622 case DataType::Type::kFloat64: {
2623 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2624 if (right.IsConstant()) {
2625 double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2626 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2627 } else if (right.IsDoubleStackSlot()) {
2628 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2629 } else {
2630 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2631 }
2632 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2633 less_cond = kBelow; // ucomis{s,d} sets CF
2634 break;
2635 }
2636 default:
2637 LOG(FATAL) << "Unexpected compare type " << type;
2638 }
2639
2640 __ movl(out, Immediate(0));
2641 __ j(kEqual, &done);
2642 __ j(less_cond, &less);
2643
2644 __ Bind(&greater);
2645 __ movl(out, Immediate(1));
2646 __ jmp(&done);
2647
2648 __ Bind(&less);
2649 __ movl(out, Immediate(-1));
2650
2651 __ Bind(&done);
2652 }
2653
VisitIntConstant(HIntConstant * constant)2654 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2655 LocationSummary* locations =
2656 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2657 locations->SetOut(Location::ConstantLocation(constant));
2658 }
2659
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2660 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2661 // Will be generated at use site.
2662 }
2663
VisitNullConstant(HNullConstant * constant)2664 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2665 LocationSummary* locations =
2666 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2667 locations->SetOut(Location::ConstantLocation(constant));
2668 }
2669
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2670 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2671 // Will be generated at use site.
2672 }
2673
VisitLongConstant(HLongConstant * constant)2674 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2675 LocationSummary* locations =
2676 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2677 locations->SetOut(Location::ConstantLocation(constant));
2678 }
2679
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2680 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2681 // Will be generated at use site.
2682 }
2683
VisitFloatConstant(HFloatConstant * constant)2684 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2685 LocationSummary* locations =
2686 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2687 locations->SetOut(Location::ConstantLocation(constant));
2688 }
2689
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2690 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2691 // Will be generated at use site.
2692 }
2693
VisitDoubleConstant(HDoubleConstant * constant)2694 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2695 LocationSummary* locations =
2696 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2697 locations->SetOut(Location::ConstantLocation(constant));
2698 }
2699
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2700 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2701 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2702 // Will be generated at use site.
2703 }
2704
VisitConstructorFence(HConstructorFence * constructor_fence)2705 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2706 constructor_fence->SetLocations(nullptr);
2707 }
2708
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2709 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2710 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2711 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2712 }
2713
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2714 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2715 memory_barrier->SetLocations(nullptr);
2716 }
2717
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2718 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2719 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2720 }
2721
VisitReturnVoid(HReturnVoid * ret)2722 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2723 ret->SetLocations(nullptr);
2724 }
2725
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2726 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2727 codegen_->GenerateFrameExit();
2728 }
2729
VisitReturn(HReturn * ret)2730 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2731 LocationSummary* locations =
2732 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2733 SetInForReturnValue(ret, locations);
2734 }
2735
VisitReturn(HReturn * ret)2736 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2737 switch (ret->InputAt(0)->GetType()) {
2738 case DataType::Type::kReference:
2739 case DataType::Type::kBool:
2740 case DataType::Type::kUint8:
2741 case DataType::Type::kInt8:
2742 case DataType::Type::kUint16:
2743 case DataType::Type::kInt16:
2744 case DataType::Type::kInt32:
2745 case DataType::Type::kInt64:
2746 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2747 break;
2748
2749 case DataType::Type::kFloat32: {
2750 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2751 XMM0);
2752 // To simplify callers of an OSR method, we put the return value in both
2753 // floating point and core register.
2754 if (GetGraph()->IsCompilingOsr()) {
2755 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false);
2756 }
2757 break;
2758 }
2759 case DataType::Type::kFloat64: {
2760 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2761 XMM0);
2762 // To simplify callers of an OSR method, we put the return value in both
2763 // floating point and core register.
2764 if (GetGraph()->IsCompilingOsr()) {
2765 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true);
2766 }
2767 break;
2768 }
2769
2770 default:
2771 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2772 }
2773 codegen_->GenerateFrameExit();
2774 }
2775
GetReturnLocation(DataType::Type type) const2776 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2777 switch (type) {
2778 case DataType::Type::kReference:
2779 case DataType::Type::kBool:
2780 case DataType::Type::kUint8:
2781 case DataType::Type::kInt8:
2782 case DataType::Type::kUint16:
2783 case DataType::Type::kInt16:
2784 case DataType::Type::kUint32:
2785 case DataType::Type::kInt32:
2786 case DataType::Type::kUint64:
2787 case DataType::Type::kInt64:
2788 return Location::RegisterLocation(RAX);
2789
2790 case DataType::Type::kVoid:
2791 return Location::NoLocation();
2792
2793 case DataType::Type::kFloat64:
2794 case DataType::Type::kFloat32:
2795 return Location::FpuRegisterLocation(XMM0);
2796 }
2797
2798 UNREACHABLE();
2799 }
2800
GetMethodLocation() const2801 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2802 return Location::RegisterLocation(kMethodRegisterArgument);
2803 }
2804
GetNextLocation(DataType::Type type)2805 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2806 switch (type) {
2807 case DataType::Type::kReference:
2808 case DataType::Type::kBool:
2809 case DataType::Type::kUint8:
2810 case DataType::Type::kInt8:
2811 case DataType::Type::kUint16:
2812 case DataType::Type::kInt16:
2813 case DataType::Type::kInt32: {
2814 uint32_t index = gp_index_++;
2815 stack_index_++;
2816 if (index < calling_convention.GetNumberOfRegisters()) {
2817 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2818 } else {
2819 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2820 }
2821 }
2822
2823 case DataType::Type::kInt64: {
2824 uint32_t index = gp_index_;
2825 stack_index_ += 2;
2826 if (index < calling_convention.GetNumberOfRegisters()) {
2827 gp_index_ += 1;
2828 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2829 } else {
2830 gp_index_ += 2;
2831 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2832 }
2833 }
2834
2835 case DataType::Type::kFloat32: {
2836 uint32_t index = float_index_++;
2837 stack_index_++;
2838 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2839 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2840 } else {
2841 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2842 }
2843 }
2844
2845 case DataType::Type::kFloat64: {
2846 uint32_t index = float_index_++;
2847 stack_index_ += 2;
2848 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2849 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2850 } else {
2851 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2852 }
2853 }
2854
2855 case DataType::Type::kUint32:
2856 case DataType::Type::kUint64:
2857 case DataType::Type::kVoid:
2858 LOG(FATAL) << "Unexpected parameter type " << type;
2859 UNREACHABLE();
2860 }
2861 return Location::NoLocation();
2862 }
2863
GetNextLocation(DataType::Type type)2864 Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2865 DCHECK_NE(type, DataType::Type::kReference);
2866
2867 Location location = Location::NoLocation();
2868 if (DataType::IsFloatingPointType(type)) {
2869 if (fpr_index_ < kParameterFloatRegistersLength) {
2870 location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
2871 ++fpr_index_;
2872 }
2873 } else {
2874 // Native ABI uses the same registers as managed, except that the method register RDI
2875 // is a normal argument.
2876 if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
2877 location = Location::RegisterLocation(
2878 gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
2879 ++gpr_index_;
2880 }
2881 }
2882 if (location.IsInvalid()) {
2883 if (DataType::Is64BitType(type)) {
2884 location = Location::DoubleStackSlot(stack_offset_);
2885 } else {
2886 location = Location::StackSlot(stack_offset_);
2887 }
2888 stack_offset_ += kFramePointerSize;
2889
2890 if (for_register_allocation_) {
2891 location = Location::Any();
2892 }
2893 }
2894 return location;
2895 }
2896
GetReturnLocation(DataType::Type type) const2897 Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
2898 const {
2899 // We perform conversion to the managed ABI return register after the call if needed.
2900 InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
2901 return dex_calling_convention.GetReturnLocation(type);
2902 }
2903
GetMethodLocation() const2904 Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
2905 // Pass the method in the hidden argument RAX.
2906 return Location::RegisterLocation(RAX);
2907 }
2908
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2909 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2910 // The trampoline uses the same calling convention as dex calling conventions,
2911 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2912 // the method_idx.
2913 HandleInvoke(invoke);
2914 }
2915
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2916 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2917 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2918 }
2919
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2920 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2921 // Explicit clinit checks triggered by static invokes must have been pruned by
2922 // art::PrepareForRegisterAllocation.
2923 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2924
2925 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2926 if (intrinsic.TryDispatch(invoke)) {
2927 return;
2928 }
2929
2930 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2931 CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
2932 /*for_register_allocation=*/ true);
2933 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2934 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
2935 } else {
2936 HandleInvoke(invoke);
2937 }
2938 }
2939
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2940 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2941 if (invoke->GetLocations()->Intrinsified()) {
2942 IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2943 intrinsic.Dispatch(invoke);
2944 return true;
2945 }
2946 return false;
2947 }
2948
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2949 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2950 // Explicit clinit checks triggered by static invokes must have been pruned by
2951 // art::PrepareForRegisterAllocation.
2952 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2953
2954 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2955 return;
2956 }
2957
2958 LocationSummary* locations = invoke->GetLocations();
2959 codegen_->GenerateStaticOrDirectCall(
2960 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2961 }
2962
HandleInvoke(HInvoke * invoke)2963 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2964 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2965 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2966 }
2967
VisitInvokeVirtual(HInvokeVirtual * invoke)2968 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2969 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2970 if (intrinsic.TryDispatch(invoke)) {
2971 return;
2972 }
2973
2974 HandleInvoke(invoke);
2975 }
2976
VisitInvokeVirtual(HInvokeVirtual * invoke)2977 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2978 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2979 return;
2980 }
2981
2982 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2983 DCHECK(!codegen_->IsLeafMethod());
2984 }
2985
VisitInvokeInterface(HInvokeInterface * invoke)2986 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2987 HandleInvoke(invoke);
2988 // Add the hidden argument.
2989 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2990 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2991 Location::RegisterLocation(RAX));
2992 }
2993 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2994 }
2995
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)2996 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
2997 CpuRegister klass) {
2998 DCHECK_EQ(RDI, klass.AsRegister());
2999 // We know the destination of an intrinsic, so no need to record inline
3000 // caches.
3001 if (!instruction->GetLocations()->Intrinsified() &&
3002 GetGraph()->IsCompilingBaseline() &&
3003 !Runtime::Current()->IsAotCompiler()) {
3004 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3005 DCHECK(info != nullptr);
3006 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
3007 uint64_t address = reinterpret_cast64<uint64_t>(cache);
3008 NearLabel done;
3009 __ movq(CpuRegister(TMP), Immediate(address));
3010 // Fast path for a monomorphic cache.
3011 __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
3012 __ j(kEqual, &done);
3013 GenerateInvokeRuntime(
3014 GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
3015 __ Bind(&done);
3016 }
3017 }
3018
VisitInvokeInterface(HInvokeInterface * invoke)3019 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3020 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3021 LocationSummary* locations = invoke->GetLocations();
3022 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3023 Location receiver = locations->InAt(0);
3024 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
3025
3026 if (receiver.IsStackSlot()) {
3027 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
3028 // /* HeapReference<Class> */ temp = temp->klass_
3029 __ movl(temp, Address(temp, class_offset));
3030 } else {
3031 // /* HeapReference<Class> */ temp = receiver->klass_
3032 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
3033 }
3034 codegen_->MaybeRecordImplicitNullCheck(invoke);
3035 // Instead of simply (possibly) unpoisoning `temp` here, we should
3036 // emit a read barrier for the previous class reference load.
3037 // However this is not required in practice, as this is an
3038 // intermediate/temporary reference and because the current
3039 // concurrent copying collector keeps the from-space memory
3040 // intact/accessible until the end of the marking phase (the
3041 // concurrent copying collector may not in the future).
3042 __ MaybeUnpoisonHeapReference(temp);
3043
3044 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3045
3046 if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
3047 invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
3048 Location hidden_reg = locations->GetTemp(1);
3049 // Set the hidden argument. This is safe to do this here, as RAX
3050 // won't be modified thereafter, before the `call` instruction.
3051 // We also do it after MaybeGenerateInlineCache that may use RAX.
3052 DCHECK_EQ(RAX, hidden_reg.AsRegister<Register>());
3053 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3054 }
3055
3056 // temp = temp->GetAddressOfIMT()
3057 __ movq(temp,
3058 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
3059 // temp = temp->GetImtEntryAt(method_offset);
3060 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3061 invoke->GetImtIndex(), kX86_64PointerSize));
3062 // temp = temp->GetImtEntryAt(method_offset);
3063 __ movq(temp, Address(temp, method_offset));
3064 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3065 // We pass the method from the IMT in case of a conflict. This will ensure
3066 // we go into the runtime to resolve the actual method.
3067 Location hidden_reg = locations->GetTemp(1);
3068 __ movq(hidden_reg.AsRegister<CpuRegister>(), temp);
3069 }
3070 // call temp->GetEntryPoint();
3071 __ call(Address(
3072 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
3073
3074 DCHECK(!codegen_->IsLeafMethod());
3075 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
3076 }
3077
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3078 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3079 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3080 if (intrinsic.TryDispatch(invoke)) {
3081 return;
3082 }
3083 HandleInvoke(invoke);
3084 }
3085
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3086 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3087 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3088 return;
3089 }
3090 codegen_->GenerateInvokePolymorphicCall(invoke);
3091 }
3092
VisitInvokeCustom(HInvokeCustom * invoke)3093 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3094 HandleInvoke(invoke);
3095 }
3096
VisitInvokeCustom(HInvokeCustom * invoke)3097 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3098 codegen_->GenerateInvokeCustomCall(invoke);
3099 }
3100
VisitNeg(HNeg * neg)3101 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
3102 LocationSummary* locations =
3103 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3104 switch (neg->GetResultType()) {
3105 case DataType::Type::kInt32:
3106 case DataType::Type::kInt64:
3107 locations->SetInAt(0, Location::RequiresRegister());
3108 locations->SetOut(Location::SameAsFirstInput());
3109 break;
3110
3111 case DataType::Type::kFloat32:
3112 case DataType::Type::kFloat64:
3113 locations->SetInAt(0, Location::RequiresFpuRegister());
3114 locations->SetOut(Location::SameAsFirstInput());
3115 locations->AddTemp(Location::RequiresFpuRegister());
3116 break;
3117
3118 default:
3119 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3120 }
3121 }
3122
VisitNeg(HNeg * neg)3123 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
3124 LocationSummary* locations = neg->GetLocations();
3125 Location out = locations->Out();
3126 Location in = locations->InAt(0);
3127 switch (neg->GetResultType()) {
3128 case DataType::Type::kInt32:
3129 DCHECK(in.IsRegister());
3130 DCHECK(in.Equals(out));
3131 __ negl(out.AsRegister<CpuRegister>());
3132 break;
3133
3134 case DataType::Type::kInt64:
3135 DCHECK(in.IsRegister());
3136 DCHECK(in.Equals(out));
3137 __ negq(out.AsRegister<CpuRegister>());
3138 break;
3139
3140 case DataType::Type::kFloat32: {
3141 DCHECK(in.Equals(out));
3142 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3143 // Implement float negation with an exclusive or with value
3144 // 0x80000000 (mask for bit 31, representing the sign of a
3145 // single-precision floating-point number).
3146 __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
3147 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3148 break;
3149 }
3150
3151 case DataType::Type::kFloat64: {
3152 DCHECK(in.Equals(out));
3153 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3154 // Implement double negation with an exclusive or with value
3155 // 0x8000000000000000 (mask for bit 63, representing the sign of
3156 // a double-precision floating-point number).
3157 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
3158 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3159 break;
3160 }
3161
3162 default:
3163 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3164 }
3165 }
3166
VisitTypeConversion(HTypeConversion * conversion)3167 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3168 LocationSummary* locations =
3169 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
3170 DataType::Type result_type = conversion->GetResultType();
3171 DataType::Type input_type = conversion->GetInputType();
3172 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3173 << input_type << " -> " << result_type;
3174
3175 switch (result_type) {
3176 case DataType::Type::kUint8:
3177 case DataType::Type::kInt8:
3178 case DataType::Type::kUint16:
3179 case DataType::Type::kInt16:
3180 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3181 locations->SetInAt(0, Location::Any());
3182 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3183 break;
3184
3185 case DataType::Type::kInt32:
3186 switch (input_type) {
3187 case DataType::Type::kInt64:
3188 locations->SetInAt(0, Location::Any());
3189 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3190 break;
3191
3192 case DataType::Type::kFloat32:
3193 locations->SetInAt(0, Location::RequiresFpuRegister());
3194 locations->SetOut(Location::RequiresRegister());
3195 break;
3196
3197 case DataType::Type::kFloat64:
3198 locations->SetInAt(0, Location::RequiresFpuRegister());
3199 locations->SetOut(Location::RequiresRegister());
3200 break;
3201
3202 default:
3203 LOG(FATAL) << "Unexpected type conversion from " << input_type
3204 << " to " << result_type;
3205 }
3206 break;
3207
3208 case DataType::Type::kInt64:
3209 switch (input_type) {
3210 case DataType::Type::kBool:
3211 case DataType::Type::kUint8:
3212 case DataType::Type::kInt8:
3213 case DataType::Type::kUint16:
3214 case DataType::Type::kInt16:
3215 case DataType::Type::kInt32:
3216 // TODO: We would benefit from a (to-be-implemented)
3217 // Location::RegisterOrStackSlot requirement for this input.
3218 locations->SetInAt(0, Location::RequiresRegister());
3219 locations->SetOut(Location::RequiresRegister());
3220 break;
3221
3222 case DataType::Type::kFloat32:
3223 locations->SetInAt(0, Location::RequiresFpuRegister());
3224 locations->SetOut(Location::RequiresRegister());
3225 break;
3226
3227 case DataType::Type::kFloat64:
3228 locations->SetInAt(0, Location::RequiresFpuRegister());
3229 locations->SetOut(Location::RequiresRegister());
3230 break;
3231
3232 default:
3233 LOG(FATAL) << "Unexpected type conversion from " << input_type
3234 << " to " << result_type;
3235 }
3236 break;
3237
3238 case DataType::Type::kFloat32:
3239 switch (input_type) {
3240 case DataType::Type::kBool:
3241 case DataType::Type::kUint8:
3242 case DataType::Type::kInt8:
3243 case DataType::Type::kUint16:
3244 case DataType::Type::kInt16:
3245 case DataType::Type::kInt32:
3246 locations->SetInAt(0, Location::Any());
3247 locations->SetOut(Location::RequiresFpuRegister());
3248 break;
3249
3250 case DataType::Type::kInt64:
3251 locations->SetInAt(0, Location::Any());
3252 locations->SetOut(Location::RequiresFpuRegister());
3253 break;
3254
3255 case DataType::Type::kFloat64:
3256 locations->SetInAt(0, Location::Any());
3257 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3258 break;
3259
3260 default:
3261 LOG(FATAL) << "Unexpected type conversion from " << input_type
3262 << " to " << result_type;
3263 }
3264 break;
3265
3266 case DataType::Type::kFloat64:
3267 switch (input_type) {
3268 case DataType::Type::kBool:
3269 case DataType::Type::kUint8:
3270 case DataType::Type::kInt8:
3271 case DataType::Type::kUint16:
3272 case DataType::Type::kInt16:
3273 case DataType::Type::kInt32:
3274 locations->SetInAt(0, Location::Any());
3275 locations->SetOut(Location::RequiresFpuRegister());
3276 break;
3277
3278 case DataType::Type::kInt64:
3279 locations->SetInAt(0, Location::Any());
3280 locations->SetOut(Location::RequiresFpuRegister());
3281 break;
3282
3283 case DataType::Type::kFloat32:
3284 locations->SetInAt(0, Location::Any());
3285 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3286 break;
3287
3288 default:
3289 LOG(FATAL) << "Unexpected type conversion from " << input_type
3290 << " to " << result_type;
3291 }
3292 break;
3293
3294 default:
3295 LOG(FATAL) << "Unexpected type conversion from " << input_type
3296 << " to " << result_type;
3297 }
3298 }
3299
VisitTypeConversion(HTypeConversion * conversion)3300 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3301 LocationSummary* locations = conversion->GetLocations();
3302 Location out = locations->Out();
3303 Location in = locations->InAt(0);
3304 DataType::Type result_type = conversion->GetResultType();
3305 DataType::Type input_type = conversion->GetInputType();
3306 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3307 << input_type << " -> " << result_type;
3308 switch (result_type) {
3309 case DataType::Type::kUint8:
3310 switch (input_type) {
3311 case DataType::Type::kInt8:
3312 case DataType::Type::kUint16:
3313 case DataType::Type::kInt16:
3314 case DataType::Type::kInt32:
3315 case DataType::Type::kInt64:
3316 if (in.IsRegister()) {
3317 __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3318 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3319 __ movzxb(out.AsRegister<CpuRegister>(),
3320 Address(CpuRegister(RSP), in.GetStackIndex()));
3321 } else {
3322 __ movl(out.AsRegister<CpuRegister>(),
3323 Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
3324 }
3325 break;
3326
3327 default:
3328 LOG(FATAL) << "Unexpected type conversion from " << input_type
3329 << " to " << result_type;
3330 }
3331 break;
3332
3333 case DataType::Type::kInt8:
3334 switch (input_type) {
3335 case DataType::Type::kUint8:
3336 case DataType::Type::kUint16:
3337 case DataType::Type::kInt16:
3338 case DataType::Type::kInt32:
3339 case DataType::Type::kInt64:
3340 if (in.IsRegister()) {
3341 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3342 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3343 __ movsxb(out.AsRegister<CpuRegister>(),
3344 Address(CpuRegister(RSP), in.GetStackIndex()));
3345 } else {
3346 __ movl(out.AsRegister<CpuRegister>(),
3347 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
3348 }
3349 break;
3350
3351 default:
3352 LOG(FATAL) << "Unexpected type conversion from " << input_type
3353 << " to " << result_type;
3354 }
3355 break;
3356
3357 case DataType::Type::kUint16:
3358 switch (input_type) {
3359 case DataType::Type::kInt8:
3360 case DataType::Type::kInt16:
3361 case DataType::Type::kInt32:
3362 case DataType::Type::kInt64:
3363 if (in.IsRegister()) {
3364 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3365 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3366 __ movzxw(out.AsRegister<CpuRegister>(),
3367 Address(CpuRegister(RSP), in.GetStackIndex()));
3368 } else {
3369 __ movl(out.AsRegister<CpuRegister>(),
3370 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
3371 }
3372 break;
3373
3374 default:
3375 LOG(FATAL) << "Unexpected type conversion from " << input_type
3376 << " to " << result_type;
3377 }
3378 break;
3379
3380 case DataType::Type::kInt16:
3381 switch (input_type) {
3382 case DataType::Type::kUint16:
3383 case DataType::Type::kInt32:
3384 case DataType::Type::kInt64:
3385 if (in.IsRegister()) {
3386 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3387 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3388 __ movsxw(out.AsRegister<CpuRegister>(),
3389 Address(CpuRegister(RSP), in.GetStackIndex()));
3390 } else {
3391 __ movl(out.AsRegister<CpuRegister>(),
3392 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
3393 }
3394 break;
3395
3396 default:
3397 LOG(FATAL) << "Unexpected type conversion from " << input_type
3398 << " to " << result_type;
3399 }
3400 break;
3401
3402 case DataType::Type::kInt32:
3403 switch (input_type) {
3404 case DataType::Type::kInt64:
3405 if (in.IsRegister()) {
3406 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3407 } else if (in.IsDoubleStackSlot()) {
3408 __ movl(out.AsRegister<CpuRegister>(),
3409 Address(CpuRegister(RSP), in.GetStackIndex()));
3410 } else {
3411 DCHECK(in.IsConstant());
3412 DCHECK(in.GetConstant()->IsLongConstant());
3413 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3414 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3415 }
3416 break;
3417
3418 case DataType::Type::kFloat32: {
3419 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3420 CpuRegister output = out.AsRegister<CpuRegister>();
3421 NearLabel done, nan;
3422
3423 __ movl(output, Immediate(kPrimIntMax));
3424 // if input >= (float)INT_MAX goto done
3425 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
3426 __ j(kAboveEqual, &done);
3427 // if input == NaN goto nan
3428 __ j(kUnordered, &nan);
3429 // output = float-to-int-truncate(input)
3430 __ cvttss2si(output, input, false);
3431 __ jmp(&done);
3432 __ Bind(&nan);
3433 // output = 0
3434 __ xorl(output, output);
3435 __ Bind(&done);
3436 break;
3437 }
3438
3439 case DataType::Type::kFloat64: {
3440 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3441 CpuRegister output = out.AsRegister<CpuRegister>();
3442 NearLabel done, nan;
3443
3444 __ movl(output, Immediate(kPrimIntMax));
3445 // if input >= (double)INT_MAX goto done
3446 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3447 __ j(kAboveEqual, &done);
3448 // if input == NaN goto nan
3449 __ j(kUnordered, &nan);
3450 // output = double-to-int-truncate(input)
3451 __ cvttsd2si(output, input);
3452 __ jmp(&done);
3453 __ Bind(&nan);
3454 // output = 0
3455 __ xorl(output, output);
3456 __ Bind(&done);
3457 break;
3458 }
3459
3460 default:
3461 LOG(FATAL) << "Unexpected type conversion from " << input_type
3462 << " to " << result_type;
3463 }
3464 break;
3465
3466 case DataType::Type::kInt64:
3467 switch (input_type) {
3468 DCHECK(out.IsRegister());
3469 case DataType::Type::kBool:
3470 case DataType::Type::kUint8:
3471 case DataType::Type::kInt8:
3472 case DataType::Type::kUint16:
3473 case DataType::Type::kInt16:
3474 case DataType::Type::kInt32:
3475 DCHECK(in.IsRegister());
3476 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3477 break;
3478
3479 case DataType::Type::kFloat32: {
3480 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3481 CpuRegister output = out.AsRegister<CpuRegister>();
3482 NearLabel done, nan;
3483
3484 codegen_->Load64BitValue(output, kPrimLongMax);
3485 // if input >= (float)LONG_MAX goto done
3486 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3487 __ j(kAboveEqual, &done);
3488 // if input == NaN goto nan
3489 __ j(kUnordered, &nan);
3490 // output = float-to-long-truncate(input)
3491 __ cvttss2si(output, input, true);
3492 __ jmp(&done);
3493 __ Bind(&nan);
3494 // output = 0
3495 __ xorl(output, output);
3496 __ Bind(&done);
3497 break;
3498 }
3499
3500 case DataType::Type::kFloat64: {
3501 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3502 CpuRegister output = out.AsRegister<CpuRegister>();
3503 NearLabel done, nan;
3504
3505 codegen_->Load64BitValue(output, kPrimLongMax);
3506 // if input >= (double)LONG_MAX goto done
3507 __ comisd(input, codegen_->LiteralDoubleAddress(
3508 static_cast<double>(kPrimLongMax)));
3509 __ j(kAboveEqual, &done);
3510 // if input == NaN goto nan
3511 __ j(kUnordered, &nan);
3512 // output = double-to-long-truncate(input)
3513 __ cvttsd2si(output, input, true);
3514 __ jmp(&done);
3515 __ Bind(&nan);
3516 // output = 0
3517 __ xorl(output, output);
3518 __ Bind(&done);
3519 break;
3520 }
3521
3522 default:
3523 LOG(FATAL) << "Unexpected type conversion from " << input_type
3524 << " to " << result_type;
3525 }
3526 break;
3527
3528 case DataType::Type::kFloat32:
3529 switch (input_type) {
3530 case DataType::Type::kBool:
3531 case DataType::Type::kUint8:
3532 case DataType::Type::kInt8:
3533 case DataType::Type::kUint16:
3534 case DataType::Type::kInt16:
3535 case DataType::Type::kInt32:
3536 if (in.IsRegister()) {
3537 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3538 } else if (in.IsConstant()) {
3539 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3540 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3541 codegen_->Load32BitValue(dest, static_cast<float>(v));
3542 } else {
3543 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3544 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3545 }
3546 break;
3547
3548 case DataType::Type::kInt64:
3549 if (in.IsRegister()) {
3550 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3551 } else if (in.IsConstant()) {
3552 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3553 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3554 codegen_->Load32BitValue(dest, static_cast<float>(v));
3555 } else {
3556 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3557 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3558 }
3559 break;
3560
3561 case DataType::Type::kFloat64:
3562 if (in.IsFpuRegister()) {
3563 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3564 } else if (in.IsConstant()) {
3565 double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3566 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3567 codegen_->Load32BitValue(dest, static_cast<float>(v));
3568 } else {
3569 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3570 Address(CpuRegister(RSP), in.GetStackIndex()));
3571 }
3572 break;
3573
3574 default:
3575 LOG(FATAL) << "Unexpected type conversion from " << input_type
3576 << " to " << result_type;
3577 }
3578 break;
3579
3580 case DataType::Type::kFloat64:
3581 switch (input_type) {
3582 case DataType::Type::kBool:
3583 case DataType::Type::kUint8:
3584 case DataType::Type::kInt8:
3585 case DataType::Type::kUint16:
3586 case DataType::Type::kInt16:
3587 case DataType::Type::kInt32:
3588 if (in.IsRegister()) {
3589 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3590 } else if (in.IsConstant()) {
3591 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3592 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3593 codegen_->Load64BitValue(dest, static_cast<double>(v));
3594 } else {
3595 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3596 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3597 }
3598 break;
3599
3600 case DataType::Type::kInt64:
3601 if (in.IsRegister()) {
3602 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3603 } else if (in.IsConstant()) {
3604 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3605 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3606 codegen_->Load64BitValue(dest, static_cast<double>(v));
3607 } else {
3608 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3609 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3610 }
3611 break;
3612
3613 case DataType::Type::kFloat32:
3614 if (in.IsFpuRegister()) {
3615 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3616 } else if (in.IsConstant()) {
3617 float v = in.GetConstant()->AsFloatConstant()->GetValue();
3618 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3619 codegen_->Load64BitValue(dest, static_cast<double>(v));
3620 } else {
3621 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3622 Address(CpuRegister(RSP), in.GetStackIndex()));
3623 }
3624 break;
3625
3626 default:
3627 LOG(FATAL) << "Unexpected type conversion from " << input_type
3628 << " to " << result_type;
3629 }
3630 break;
3631
3632 default:
3633 LOG(FATAL) << "Unexpected type conversion from " << input_type
3634 << " to " << result_type;
3635 }
3636 }
3637
VisitAdd(HAdd * add)3638 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3639 LocationSummary* locations =
3640 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3641 switch (add->GetResultType()) {
3642 case DataType::Type::kInt32: {
3643 locations->SetInAt(0, Location::RequiresRegister());
3644 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3645 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3646 break;
3647 }
3648
3649 case DataType::Type::kInt64: {
3650 locations->SetInAt(0, Location::RequiresRegister());
3651 // We can use a leaq or addq if the constant can fit in an immediate.
3652 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3653 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3654 break;
3655 }
3656
3657 case DataType::Type::kFloat64:
3658 case DataType::Type::kFloat32: {
3659 locations->SetInAt(0, Location::RequiresFpuRegister());
3660 locations->SetInAt(1, Location::Any());
3661 locations->SetOut(Location::SameAsFirstInput());
3662 break;
3663 }
3664
3665 default:
3666 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3667 }
3668 }
3669
VisitAdd(HAdd * add)3670 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3671 LocationSummary* locations = add->GetLocations();
3672 Location first = locations->InAt(0);
3673 Location second = locations->InAt(1);
3674 Location out = locations->Out();
3675
3676 switch (add->GetResultType()) {
3677 case DataType::Type::kInt32: {
3678 if (second.IsRegister()) {
3679 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3680 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3681 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3682 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3683 } else {
3684 __ leal(out.AsRegister<CpuRegister>(), Address(
3685 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3686 }
3687 } else if (second.IsConstant()) {
3688 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3689 __ addl(out.AsRegister<CpuRegister>(),
3690 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3691 } else {
3692 __ leal(out.AsRegister<CpuRegister>(), Address(
3693 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3694 }
3695 } else {
3696 DCHECK(first.Equals(locations->Out()));
3697 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3698 }
3699 break;
3700 }
3701
3702 case DataType::Type::kInt64: {
3703 if (second.IsRegister()) {
3704 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3705 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3706 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3707 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3708 } else {
3709 __ leaq(out.AsRegister<CpuRegister>(), Address(
3710 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3711 }
3712 } else {
3713 DCHECK(second.IsConstant());
3714 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3715 int32_t int32_value = Low32Bits(value);
3716 DCHECK_EQ(int32_value, value);
3717 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3718 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3719 } else {
3720 __ leaq(out.AsRegister<CpuRegister>(), Address(
3721 first.AsRegister<CpuRegister>(), int32_value));
3722 }
3723 }
3724 break;
3725 }
3726
3727 case DataType::Type::kFloat32: {
3728 if (second.IsFpuRegister()) {
3729 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3730 } else if (second.IsConstant()) {
3731 __ addss(first.AsFpuRegister<XmmRegister>(),
3732 codegen_->LiteralFloatAddress(
3733 second.GetConstant()->AsFloatConstant()->GetValue()));
3734 } else {
3735 DCHECK(second.IsStackSlot());
3736 __ addss(first.AsFpuRegister<XmmRegister>(),
3737 Address(CpuRegister(RSP), second.GetStackIndex()));
3738 }
3739 break;
3740 }
3741
3742 case DataType::Type::kFloat64: {
3743 if (second.IsFpuRegister()) {
3744 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3745 } else if (second.IsConstant()) {
3746 __ addsd(first.AsFpuRegister<XmmRegister>(),
3747 codegen_->LiteralDoubleAddress(
3748 second.GetConstant()->AsDoubleConstant()->GetValue()));
3749 } else {
3750 DCHECK(second.IsDoubleStackSlot());
3751 __ addsd(first.AsFpuRegister<XmmRegister>(),
3752 Address(CpuRegister(RSP), second.GetStackIndex()));
3753 }
3754 break;
3755 }
3756
3757 default:
3758 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3759 }
3760 }
3761
VisitSub(HSub * sub)3762 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3763 LocationSummary* locations =
3764 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3765 switch (sub->GetResultType()) {
3766 case DataType::Type::kInt32: {
3767 locations->SetInAt(0, Location::RequiresRegister());
3768 locations->SetInAt(1, Location::Any());
3769 locations->SetOut(Location::SameAsFirstInput());
3770 break;
3771 }
3772 case DataType::Type::kInt64: {
3773 locations->SetInAt(0, Location::RequiresRegister());
3774 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3775 locations->SetOut(Location::SameAsFirstInput());
3776 break;
3777 }
3778 case DataType::Type::kFloat32:
3779 case DataType::Type::kFloat64: {
3780 locations->SetInAt(0, Location::RequiresFpuRegister());
3781 locations->SetInAt(1, Location::Any());
3782 locations->SetOut(Location::SameAsFirstInput());
3783 break;
3784 }
3785 default:
3786 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3787 }
3788 }
3789
VisitSub(HSub * sub)3790 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3791 LocationSummary* locations = sub->GetLocations();
3792 Location first = locations->InAt(0);
3793 Location second = locations->InAt(1);
3794 DCHECK(first.Equals(locations->Out()));
3795 switch (sub->GetResultType()) {
3796 case DataType::Type::kInt32: {
3797 if (second.IsRegister()) {
3798 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3799 } else if (second.IsConstant()) {
3800 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3801 __ subl(first.AsRegister<CpuRegister>(), imm);
3802 } else {
3803 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3804 }
3805 break;
3806 }
3807 case DataType::Type::kInt64: {
3808 if (second.IsConstant()) {
3809 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3810 DCHECK(IsInt<32>(value));
3811 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3812 } else {
3813 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3814 }
3815 break;
3816 }
3817
3818 case DataType::Type::kFloat32: {
3819 if (second.IsFpuRegister()) {
3820 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3821 } else if (second.IsConstant()) {
3822 __ subss(first.AsFpuRegister<XmmRegister>(),
3823 codegen_->LiteralFloatAddress(
3824 second.GetConstant()->AsFloatConstant()->GetValue()));
3825 } else {
3826 DCHECK(second.IsStackSlot());
3827 __ subss(first.AsFpuRegister<XmmRegister>(),
3828 Address(CpuRegister(RSP), second.GetStackIndex()));
3829 }
3830 break;
3831 }
3832
3833 case DataType::Type::kFloat64: {
3834 if (second.IsFpuRegister()) {
3835 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3836 } else if (second.IsConstant()) {
3837 __ subsd(first.AsFpuRegister<XmmRegister>(),
3838 codegen_->LiteralDoubleAddress(
3839 second.GetConstant()->AsDoubleConstant()->GetValue()));
3840 } else {
3841 DCHECK(second.IsDoubleStackSlot());
3842 __ subsd(first.AsFpuRegister<XmmRegister>(),
3843 Address(CpuRegister(RSP), second.GetStackIndex()));
3844 }
3845 break;
3846 }
3847
3848 default:
3849 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3850 }
3851 }
3852
VisitMul(HMul * mul)3853 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3854 LocationSummary* locations =
3855 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3856 switch (mul->GetResultType()) {
3857 case DataType::Type::kInt32: {
3858 locations->SetInAt(0, Location::RequiresRegister());
3859 locations->SetInAt(1, Location::Any());
3860 if (mul->InputAt(1)->IsIntConstant()) {
3861 // Can use 3 operand multiply.
3862 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3863 } else {
3864 locations->SetOut(Location::SameAsFirstInput());
3865 }
3866 break;
3867 }
3868 case DataType::Type::kInt64: {
3869 locations->SetInAt(0, Location::RequiresRegister());
3870 locations->SetInAt(1, Location::Any());
3871 if (mul->InputAt(1)->IsLongConstant() &&
3872 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3873 // Can use 3 operand multiply.
3874 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3875 } else {
3876 locations->SetOut(Location::SameAsFirstInput());
3877 }
3878 break;
3879 }
3880 case DataType::Type::kFloat32:
3881 case DataType::Type::kFloat64: {
3882 locations->SetInAt(0, Location::RequiresFpuRegister());
3883 locations->SetInAt(1, Location::Any());
3884 locations->SetOut(Location::SameAsFirstInput());
3885 break;
3886 }
3887
3888 default:
3889 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3890 }
3891 }
3892
VisitMul(HMul * mul)3893 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3894 LocationSummary* locations = mul->GetLocations();
3895 Location first = locations->InAt(0);
3896 Location second = locations->InAt(1);
3897 Location out = locations->Out();
3898 switch (mul->GetResultType()) {
3899 case DataType::Type::kInt32:
3900 // The constant may have ended up in a register, so test explicitly to avoid
3901 // problems where the output may not be the same as the first operand.
3902 if (mul->InputAt(1)->IsIntConstant()) {
3903 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3904 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3905 } else if (second.IsRegister()) {
3906 DCHECK(first.Equals(out));
3907 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3908 } else {
3909 DCHECK(first.Equals(out));
3910 DCHECK(second.IsStackSlot());
3911 __ imull(first.AsRegister<CpuRegister>(),
3912 Address(CpuRegister(RSP), second.GetStackIndex()));
3913 }
3914 break;
3915 case DataType::Type::kInt64: {
3916 // The constant may have ended up in a register, so test explicitly to avoid
3917 // problems where the output may not be the same as the first operand.
3918 if (mul->InputAt(1)->IsLongConstant()) {
3919 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3920 if (IsInt<32>(value)) {
3921 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3922 Immediate(static_cast<int32_t>(value)));
3923 } else {
3924 // Have to use the constant area.
3925 DCHECK(first.Equals(out));
3926 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3927 }
3928 } else if (second.IsRegister()) {
3929 DCHECK(first.Equals(out));
3930 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3931 } else {
3932 DCHECK(second.IsDoubleStackSlot());
3933 DCHECK(first.Equals(out));
3934 __ imulq(first.AsRegister<CpuRegister>(),
3935 Address(CpuRegister(RSP), second.GetStackIndex()));
3936 }
3937 break;
3938 }
3939
3940 case DataType::Type::kFloat32: {
3941 DCHECK(first.Equals(out));
3942 if (second.IsFpuRegister()) {
3943 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3944 } else if (second.IsConstant()) {
3945 __ mulss(first.AsFpuRegister<XmmRegister>(),
3946 codegen_->LiteralFloatAddress(
3947 second.GetConstant()->AsFloatConstant()->GetValue()));
3948 } else {
3949 DCHECK(second.IsStackSlot());
3950 __ mulss(first.AsFpuRegister<XmmRegister>(),
3951 Address(CpuRegister(RSP), second.GetStackIndex()));
3952 }
3953 break;
3954 }
3955
3956 case DataType::Type::kFloat64: {
3957 DCHECK(first.Equals(out));
3958 if (second.IsFpuRegister()) {
3959 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3960 } else if (second.IsConstant()) {
3961 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3962 codegen_->LiteralDoubleAddress(
3963 second.GetConstant()->AsDoubleConstant()->GetValue()));
3964 } else {
3965 DCHECK(second.IsDoubleStackSlot());
3966 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3967 Address(CpuRegister(RSP), second.GetStackIndex()));
3968 }
3969 break;
3970 }
3971
3972 default:
3973 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3974 }
3975 }
3976
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3977 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3978 uint32_t stack_adjustment, bool is_float) {
3979 if (source.IsStackSlot()) {
3980 DCHECK(is_float);
3981 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3982 } else if (source.IsDoubleStackSlot()) {
3983 DCHECK(!is_float);
3984 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3985 } else {
3986 // Write the value to the temporary location on the stack and load to FP stack.
3987 if (is_float) {
3988 Location stack_temp = Location::StackSlot(temp_offset);
3989 codegen_->Move(stack_temp, source);
3990 __ flds(Address(CpuRegister(RSP), temp_offset));
3991 } else {
3992 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3993 codegen_->Move(stack_temp, source);
3994 __ fldl(Address(CpuRegister(RSP), temp_offset));
3995 }
3996 }
3997 }
3998
GenerateRemFP(HRem * rem)3999 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
4000 DataType::Type type = rem->GetResultType();
4001 bool is_float = type == DataType::Type::kFloat32;
4002 size_t elem_size = DataType::Size(type);
4003 LocationSummary* locations = rem->GetLocations();
4004 Location first = locations->InAt(0);
4005 Location second = locations->InAt(1);
4006 Location out = locations->Out();
4007
4008 // Create stack space for 2 elements.
4009 // TODO: enhance register allocator to ask for stack temporaries.
4010 __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
4011
4012 // Load the values to the FP stack in reverse order, using temporaries if needed.
4013 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
4014 PushOntoFPStack(first, 0, 2 * elem_size, is_float);
4015
4016 // Loop doing FPREM until we stabilize.
4017 NearLabel retry;
4018 __ Bind(&retry);
4019 __ fprem();
4020
4021 // Move FP status to AX.
4022 __ fstsw();
4023
4024 // And see if the argument reduction is complete. This is signaled by the
4025 // C2 FPU flag bit set to 0.
4026 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
4027 __ j(kNotEqual, &retry);
4028
4029 // We have settled on the final value. Retrieve it into an XMM register.
4030 // Store FP top of stack to real stack.
4031 if (is_float) {
4032 __ fsts(Address(CpuRegister(RSP), 0));
4033 } else {
4034 __ fstl(Address(CpuRegister(RSP), 0));
4035 }
4036
4037 // Pop the 2 items from the FP stack.
4038 __ fucompp();
4039
4040 // Load the value from the stack into an XMM register.
4041 DCHECK(out.IsFpuRegister()) << out;
4042 if (is_float) {
4043 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4044 } else {
4045 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4046 }
4047
4048 // And remove the temporary stack space we allocated.
4049 __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
4050 }
4051
DivRemOneOrMinusOne(HBinaryOperation * instruction)4052 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4053 DCHECK(instruction->IsDiv() || instruction->IsRem());
4054
4055 LocationSummary* locations = instruction->GetLocations();
4056 Location second = locations->InAt(1);
4057 DCHECK(second.IsConstant());
4058
4059 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4060 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
4061 int64_t imm = Int64FromConstant(second.GetConstant());
4062
4063 DCHECK(imm == 1 || imm == -1);
4064
4065 switch (instruction->GetResultType()) {
4066 case DataType::Type::kInt32: {
4067 if (instruction->IsRem()) {
4068 __ xorl(output_register, output_register);
4069 } else {
4070 __ movl(output_register, input_register);
4071 if (imm == -1) {
4072 __ negl(output_register);
4073 }
4074 }
4075 break;
4076 }
4077
4078 case DataType::Type::kInt64: {
4079 if (instruction->IsRem()) {
4080 __ xorl(output_register, output_register);
4081 } else {
4082 __ movq(output_register, input_register);
4083 if (imm == -1) {
4084 __ negq(output_register);
4085 }
4086 }
4087 break;
4088 }
4089
4090 default:
4091 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
4092 }
4093 }
RemByPowerOfTwo(HRem * instruction)4094 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
4095 LocationSummary* locations = instruction->GetLocations();
4096 Location second = locations->InAt(1);
4097 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4098 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4099 int64_t imm = Int64FromConstant(second.GetConstant());
4100 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4101 uint64_t abs_imm = AbsOrMin(imm);
4102 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4103 if (instruction->GetResultType() == DataType::Type::kInt32) {
4104 NearLabel done;
4105 __ movl(out, numerator);
4106 __ andl(out, Immediate(abs_imm-1));
4107 __ j(Condition::kZero, &done);
4108 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4109 __ testl(numerator, numerator);
4110 __ cmov(Condition::kLess, out, tmp, false);
4111 __ Bind(&done);
4112
4113 } else {
4114 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4115 codegen_->Load64BitValue(tmp, abs_imm - 1);
4116 NearLabel done;
4117
4118 __ movq(out, numerator);
4119 __ andq(out, tmp);
4120 __ j(Condition::kZero, &done);
4121 __ movq(tmp, numerator);
4122 __ sarq(tmp, Immediate(63));
4123 __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
4124 __ orq(out, tmp);
4125 __ Bind(&done);
4126 }
4127 }
DivByPowerOfTwo(HDiv * instruction)4128 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
4129 LocationSummary* locations = instruction->GetLocations();
4130 Location second = locations->InAt(1);
4131
4132 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4133 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4134
4135 int64_t imm = Int64FromConstant(second.GetConstant());
4136 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4137 uint64_t abs_imm = AbsOrMin(imm);
4138
4139 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4140
4141 if (instruction->GetResultType() == DataType::Type::kInt32) {
4142 // When denominator is equal to 2, we can add signed bit and numerator to tmp.
4143 // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
4144 if (abs_imm == 2) {
4145 __ leal(tmp, Address(numerator, 0));
4146 __ shrl(tmp, Immediate(31));
4147 __ addl(tmp, numerator);
4148 } else {
4149 __ leal(tmp, Address(numerator, abs_imm - 1));
4150 __ testl(numerator, numerator);
4151 __ cmov(kGreaterEqual, tmp, numerator);
4152 }
4153 int shift = CTZ(imm);
4154 __ sarl(tmp, Immediate(shift));
4155
4156 if (imm < 0) {
4157 __ negl(tmp);
4158 }
4159
4160 __ movl(output_register, tmp);
4161 } else {
4162 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4163 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
4164 if (abs_imm == 2) {
4165 __ movq(rdx, numerator);
4166 __ shrq(rdx, Immediate(63));
4167 __ addq(rdx, numerator);
4168 } else {
4169 codegen_->Load64BitValue(rdx, abs_imm - 1);
4170 __ addq(rdx, numerator);
4171 __ testq(numerator, numerator);
4172 __ cmov(kGreaterEqual, rdx, numerator);
4173 }
4174 int shift = CTZ(imm);
4175 __ sarq(rdx, Immediate(shift));
4176
4177 if (imm < 0) {
4178 __ negq(rdx);
4179 }
4180
4181 __ movq(output_register, rdx);
4182 }
4183 }
4184
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4185 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4186 DCHECK(instruction->IsDiv() || instruction->IsRem());
4187
4188 LocationSummary* locations = instruction->GetLocations();
4189 Location second = locations->InAt(1);
4190
4191 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
4192 : locations->GetTemp(0).AsRegister<CpuRegister>();
4193 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
4194 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
4195 : locations->Out().AsRegister<CpuRegister>();
4196 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4197
4198 DCHECK_EQ(RAX, eax.AsRegister());
4199 DCHECK_EQ(RDX, edx.AsRegister());
4200 if (instruction->IsDiv()) {
4201 DCHECK_EQ(RAX, out.AsRegister());
4202 } else {
4203 DCHECK_EQ(RDX, out.AsRegister());
4204 }
4205
4206 int64_t magic;
4207 int shift;
4208
4209 // TODO: can these branches be written as one?
4210 if (instruction->GetResultType() == DataType::Type::kInt32) {
4211 int imm = second.GetConstant()->AsIntConstant()->GetValue();
4212
4213 CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
4214
4215 __ movl(numerator, eax);
4216
4217 __ movl(eax, Immediate(magic));
4218 __ imull(numerator);
4219
4220 if (imm > 0 && magic < 0) {
4221 __ addl(edx, numerator);
4222 } else if (imm < 0 && magic > 0) {
4223 __ subl(edx, numerator);
4224 }
4225
4226 if (shift != 0) {
4227 __ sarl(edx, Immediate(shift));
4228 }
4229
4230 __ movl(eax, edx);
4231 __ shrl(edx, Immediate(31));
4232 __ addl(edx, eax);
4233
4234 if (instruction->IsRem()) {
4235 __ movl(eax, numerator);
4236 __ imull(edx, Immediate(imm));
4237 __ subl(eax, edx);
4238 __ movl(edx, eax);
4239 } else {
4240 __ movl(eax, edx);
4241 }
4242 } else {
4243 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
4244
4245 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4246
4247 CpuRegister rax = eax;
4248 CpuRegister rdx = edx;
4249
4250 CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
4251
4252 // Save the numerator.
4253 __ movq(numerator, rax);
4254
4255 // RAX = magic
4256 codegen_->Load64BitValue(rax, magic);
4257
4258 // RDX:RAX = magic * numerator
4259 __ imulq(numerator);
4260
4261 if (imm > 0 && magic < 0) {
4262 // RDX += numerator
4263 __ addq(rdx, numerator);
4264 } else if (imm < 0 && magic > 0) {
4265 // RDX -= numerator
4266 __ subq(rdx, numerator);
4267 }
4268
4269 // Shift if needed.
4270 if (shift != 0) {
4271 __ sarq(rdx, Immediate(shift));
4272 }
4273
4274 // RDX += 1 if RDX < 0
4275 __ movq(rax, rdx);
4276 __ shrq(rdx, Immediate(63));
4277 __ addq(rdx, rax);
4278
4279 if (instruction->IsRem()) {
4280 __ movq(rax, numerator);
4281
4282 if (IsInt<32>(imm)) {
4283 __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
4284 } else {
4285 __ imulq(rdx, codegen_->LiteralInt64Address(imm));
4286 }
4287
4288 __ subq(rax, rdx);
4289 __ movq(rdx, rax);
4290 } else {
4291 __ movq(rax, rdx);
4292 }
4293 }
4294 }
4295
GenerateDivRemIntegral(HBinaryOperation * instruction)4296 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4297 DCHECK(instruction->IsDiv() || instruction->IsRem());
4298 DataType::Type type = instruction->GetResultType();
4299 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4300
4301 bool is_div = instruction->IsDiv();
4302 LocationSummary* locations = instruction->GetLocations();
4303
4304 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4305 Location second = locations->InAt(1);
4306
4307 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
4308 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
4309
4310 if (second.IsConstant()) {
4311 int64_t imm = Int64FromConstant(second.GetConstant());
4312
4313 if (imm == 0) {
4314 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4315 } else if (imm == 1 || imm == -1) {
4316 DivRemOneOrMinusOne(instruction);
4317 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4318 if (is_div) {
4319 DivByPowerOfTwo(instruction->AsDiv());
4320 } else {
4321 RemByPowerOfTwo(instruction->AsRem());
4322 }
4323 } else {
4324 DCHECK(imm <= -2 || imm >= 2);
4325 GenerateDivRemWithAnyConstant(instruction);
4326 }
4327 } else {
4328 SlowPathCode* slow_path =
4329 new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
4330 instruction, out.AsRegister(), type, is_div);
4331 codegen_->AddSlowPath(slow_path);
4332
4333 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4334 // 0x80000000(00000000)/-1 triggers an arithmetic exception!
4335 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
4336 // so it's safe to just use negl instead of more complex comparisons.
4337 if (type == DataType::Type::kInt32) {
4338 __ cmpl(second_reg, Immediate(-1));
4339 __ j(kEqual, slow_path->GetEntryLabel());
4340 // edx:eax <- sign-extended of eax
4341 __ cdq();
4342 // eax = quotient, edx = remainder
4343 __ idivl(second_reg);
4344 } else {
4345 __ cmpq(second_reg, Immediate(-1));
4346 __ j(kEqual, slow_path->GetEntryLabel());
4347 // rdx:rax <- sign-extended of rax
4348 __ cqo();
4349 // rax = quotient, rdx = remainder
4350 __ idivq(second_reg);
4351 }
4352 __ Bind(slow_path->GetExitLabel());
4353 }
4354 }
4355
VisitDiv(HDiv * div)4356 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
4357 LocationSummary* locations =
4358 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
4359 switch (div->GetResultType()) {
4360 case DataType::Type::kInt32:
4361 case DataType::Type::kInt64: {
4362 locations->SetInAt(0, Location::RegisterLocation(RAX));
4363 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4364 locations->SetOut(Location::SameAsFirstInput());
4365 // Intel uses edx:eax as the dividend.
4366 locations->AddTemp(Location::RegisterLocation(RDX));
4367 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
4368 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
4369 // output and request another temp.
4370 if (div->InputAt(1)->IsConstant()) {
4371 locations->AddTemp(Location::RequiresRegister());
4372 }
4373 break;
4374 }
4375
4376 case DataType::Type::kFloat32:
4377 case DataType::Type::kFloat64: {
4378 locations->SetInAt(0, Location::RequiresFpuRegister());
4379 locations->SetInAt(1, Location::Any());
4380 locations->SetOut(Location::SameAsFirstInput());
4381 break;
4382 }
4383
4384 default:
4385 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4386 }
4387 }
4388
VisitDiv(HDiv * div)4389 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
4390 LocationSummary* locations = div->GetLocations();
4391 Location first = locations->InAt(0);
4392 Location second = locations->InAt(1);
4393 DCHECK(first.Equals(locations->Out()));
4394
4395 DataType::Type type = div->GetResultType();
4396 switch (type) {
4397 case DataType::Type::kInt32:
4398 case DataType::Type::kInt64: {
4399 GenerateDivRemIntegral(div);
4400 break;
4401 }
4402
4403 case DataType::Type::kFloat32: {
4404 if (second.IsFpuRegister()) {
4405 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4406 } else if (second.IsConstant()) {
4407 __ divss(first.AsFpuRegister<XmmRegister>(),
4408 codegen_->LiteralFloatAddress(
4409 second.GetConstant()->AsFloatConstant()->GetValue()));
4410 } else {
4411 DCHECK(second.IsStackSlot());
4412 __ divss(first.AsFpuRegister<XmmRegister>(),
4413 Address(CpuRegister(RSP), second.GetStackIndex()));
4414 }
4415 break;
4416 }
4417
4418 case DataType::Type::kFloat64: {
4419 if (second.IsFpuRegister()) {
4420 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4421 } else if (second.IsConstant()) {
4422 __ divsd(first.AsFpuRegister<XmmRegister>(),
4423 codegen_->LiteralDoubleAddress(
4424 second.GetConstant()->AsDoubleConstant()->GetValue()));
4425 } else {
4426 DCHECK(second.IsDoubleStackSlot());
4427 __ divsd(first.AsFpuRegister<XmmRegister>(),
4428 Address(CpuRegister(RSP), second.GetStackIndex()));
4429 }
4430 break;
4431 }
4432
4433 default:
4434 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4435 }
4436 }
4437
VisitRem(HRem * rem)4438 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4439 DataType::Type type = rem->GetResultType();
4440 LocationSummary* locations =
4441 new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4442
4443 switch (type) {
4444 case DataType::Type::kInt32:
4445 case DataType::Type::kInt64: {
4446 locations->SetInAt(0, Location::RegisterLocation(RAX));
4447 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4448 // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4449 locations->SetOut(Location::RegisterLocation(RDX));
4450 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4451 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4452 // output and request another temp.
4453 if (rem->InputAt(1)->IsConstant()) {
4454 locations->AddTemp(Location::RequiresRegister());
4455 }
4456 break;
4457 }
4458
4459 case DataType::Type::kFloat32:
4460 case DataType::Type::kFloat64: {
4461 locations->SetInAt(0, Location::Any());
4462 locations->SetInAt(1, Location::Any());
4463 locations->SetOut(Location::RequiresFpuRegister());
4464 locations->AddTemp(Location::RegisterLocation(RAX));
4465 break;
4466 }
4467
4468 default:
4469 LOG(FATAL) << "Unexpected rem type " << type;
4470 }
4471 }
4472
VisitRem(HRem * rem)4473 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4474 DataType::Type type = rem->GetResultType();
4475 switch (type) {
4476 case DataType::Type::kInt32:
4477 case DataType::Type::kInt64: {
4478 GenerateDivRemIntegral(rem);
4479 break;
4480 }
4481 case DataType::Type::kFloat32:
4482 case DataType::Type::kFloat64: {
4483 GenerateRemFP(rem);
4484 break;
4485 }
4486 default:
4487 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4488 }
4489 }
4490
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4491 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4492 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4493 switch (minmax->GetResultType()) {
4494 case DataType::Type::kInt32:
4495 case DataType::Type::kInt64:
4496 locations->SetInAt(0, Location::RequiresRegister());
4497 locations->SetInAt(1, Location::RequiresRegister());
4498 locations->SetOut(Location::SameAsFirstInput());
4499 break;
4500 case DataType::Type::kFloat32:
4501 case DataType::Type::kFloat64:
4502 locations->SetInAt(0, Location::RequiresFpuRegister());
4503 locations->SetInAt(1, Location::RequiresFpuRegister());
4504 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4505 // the second input to be the output (we can simply swap inputs).
4506 locations->SetOut(Location::SameAsFirstInput());
4507 break;
4508 default:
4509 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4510 }
4511 }
4512
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4513 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4514 bool is_min,
4515 DataType::Type type) {
4516 Location op1_loc = locations->InAt(0);
4517 Location op2_loc = locations->InAt(1);
4518
4519 // Shortcut for same input locations.
4520 if (op1_loc.Equals(op2_loc)) {
4521 // Can return immediately, as op1_loc == out_loc.
4522 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4523 // a copy here.
4524 DCHECK(locations->Out().Equals(op1_loc));
4525 return;
4526 }
4527
4528 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4529 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4530
4531 // (out := op1)
4532 // out <=? op2
4533 // if out is min jmp done
4534 // out := op2
4535 // done:
4536
4537 if (type == DataType::Type::kInt64) {
4538 __ cmpq(out, op2);
4539 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4540 } else {
4541 DCHECK_EQ(type, DataType::Type::kInt32);
4542 __ cmpl(out, op2);
4543 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4544 }
4545 }
4546
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4547 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4548 bool is_min,
4549 DataType::Type type) {
4550 Location op1_loc = locations->InAt(0);
4551 Location op2_loc = locations->InAt(1);
4552 Location out_loc = locations->Out();
4553 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4554
4555 // Shortcut for same input locations.
4556 if (op1_loc.Equals(op2_loc)) {
4557 DCHECK(out_loc.Equals(op1_loc));
4558 return;
4559 }
4560
4561 // (out := op1)
4562 // out <=? op2
4563 // if Nan jmp Nan_label
4564 // if out is min jmp done
4565 // if op2 is min jmp op2_label
4566 // handle -0/+0
4567 // jmp done
4568 // Nan_label:
4569 // out := NaN
4570 // op2_label:
4571 // out := op2
4572 // done:
4573 //
4574 // This removes one jmp, but needs to copy one input (op1) to out.
4575 //
4576 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4577
4578 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4579
4580 NearLabel nan, done, op2_label;
4581 if (type == DataType::Type::kFloat64) {
4582 __ ucomisd(out, op2);
4583 } else {
4584 DCHECK_EQ(type, DataType::Type::kFloat32);
4585 __ ucomiss(out, op2);
4586 }
4587
4588 __ j(Condition::kParityEven, &nan);
4589
4590 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4591 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4592
4593 // Handle 0.0/-0.0.
4594 if (is_min) {
4595 if (type == DataType::Type::kFloat64) {
4596 __ orpd(out, op2);
4597 } else {
4598 __ orps(out, op2);
4599 }
4600 } else {
4601 if (type == DataType::Type::kFloat64) {
4602 __ andpd(out, op2);
4603 } else {
4604 __ andps(out, op2);
4605 }
4606 }
4607 __ jmp(&done);
4608
4609 // NaN handling.
4610 __ Bind(&nan);
4611 if (type == DataType::Type::kFloat64) {
4612 __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4613 } else {
4614 __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4615 }
4616 __ jmp(&done);
4617
4618 // out := op2;
4619 __ Bind(&op2_label);
4620 if (type == DataType::Type::kFloat64) {
4621 __ movsd(out, op2);
4622 } else {
4623 __ movss(out, op2);
4624 }
4625
4626 // Done.
4627 __ Bind(&done);
4628 }
4629
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4630 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4631 DataType::Type type = minmax->GetResultType();
4632 switch (type) {
4633 case DataType::Type::kInt32:
4634 case DataType::Type::kInt64:
4635 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4636 break;
4637 case DataType::Type::kFloat32:
4638 case DataType::Type::kFloat64:
4639 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4640 break;
4641 default:
4642 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4643 }
4644 }
4645
VisitMin(HMin * min)4646 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4647 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4648 }
4649
VisitMin(HMin * min)4650 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4651 GenerateMinMax(min, /*is_min*/ true);
4652 }
4653
VisitMax(HMax * max)4654 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4655 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4656 }
4657
VisitMax(HMax * max)4658 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4659 GenerateMinMax(max, /*is_min*/ false);
4660 }
4661
VisitAbs(HAbs * abs)4662 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4663 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4664 switch (abs->GetResultType()) {
4665 case DataType::Type::kInt32:
4666 case DataType::Type::kInt64:
4667 locations->SetInAt(0, Location::RequiresRegister());
4668 locations->SetOut(Location::SameAsFirstInput());
4669 locations->AddTemp(Location::RequiresRegister());
4670 break;
4671 case DataType::Type::kFloat32:
4672 case DataType::Type::kFloat64:
4673 locations->SetInAt(0, Location::RequiresFpuRegister());
4674 locations->SetOut(Location::SameAsFirstInput());
4675 locations->AddTemp(Location::RequiresFpuRegister());
4676 break;
4677 default:
4678 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4679 }
4680 }
4681
VisitAbs(HAbs * abs)4682 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4683 LocationSummary* locations = abs->GetLocations();
4684 switch (abs->GetResultType()) {
4685 case DataType::Type::kInt32: {
4686 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4687 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4688 // Create mask.
4689 __ movl(mask, out);
4690 __ sarl(mask, Immediate(31));
4691 // Add mask.
4692 __ addl(out, mask);
4693 __ xorl(out, mask);
4694 break;
4695 }
4696 case DataType::Type::kInt64: {
4697 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4698 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4699 // Create mask.
4700 __ movq(mask, out);
4701 __ sarq(mask, Immediate(63));
4702 // Add mask.
4703 __ addq(out, mask);
4704 __ xorq(out, mask);
4705 break;
4706 }
4707 case DataType::Type::kFloat32: {
4708 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4709 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4710 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4711 __ andps(out, mask);
4712 break;
4713 }
4714 case DataType::Type::kFloat64: {
4715 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4716 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4717 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4718 __ andpd(out, mask);
4719 break;
4720 }
4721 default:
4722 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4723 }
4724 }
4725
VisitDivZeroCheck(HDivZeroCheck * instruction)4726 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4727 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4728 locations->SetInAt(0, Location::Any());
4729 }
4730
VisitDivZeroCheck(HDivZeroCheck * instruction)4731 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4732 SlowPathCode* slow_path =
4733 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4734 codegen_->AddSlowPath(slow_path);
4735
4736 LocationSummary* locations = instruction->GetLocations();
4737 Location value = locations->InAt(0);
4738
4739 switch (instruction->GetType()) {
4740 case DataType::Type::kBool:
4741 case DataType::Type::kUint8:
4742 case DataType::Type::kInt8:
4743 case DataType::Type::kUint16:
4744 case DataType::Type::kInt16:
4745 case DataType::Type::kInt32: {
4746 if (value.IsRegister()) {
4747 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4748 __ j(kEqual, slow_path->GetEntryLabel());
4749 } else if (value.IsStackSlot()) {
4750 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4751 __ j(kEqual, slow_path->GetEntryLabel());
4752 } else {
4753 DCHECK(value.IsConstant()) << value;
4754 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4755 __ jmp(slow_path->GetEntryLabel());
4756 }
4757 }
4758 break;
4759 }
4760 case DataType::Type::kInt64: {
4761 if (value.IsRegister()) {
4762 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4763 __ j(kEqual, slow_path->GetEntryLabel());
4764 } else if (value.IsDoubleStackSlot()) {
4765 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4766 __ j(kEqual, slow_path->GetEntryLabel());
4767 } else {
4768 DCHECK(value.IsConstant()) << value;
4769 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4770 __ jmp(slow_path->GetEntryLabel());
4771 }
4772 }
4773 break;
4774 }
4775 default:
4776 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4777 }
4778 }
4779
HandleShift(HBinaryOperation * op)4780 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4781 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4782
4783 LocationSummary* locations =
4784 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4785
4786 switch (op->GetResultType()) {
4787 case DataType::Type::kInt32:
4788 case DataType::Type::kInt64: {
4789 locations->SetInAt(0, Location::RequiresRegister());
4790 // The shift count needs to be in CL.
4791 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4792 locations->SetOut(Location::SameAsFirstInput());
4793 break;
4794 }
4795 default:
4796 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4797 }
4798 }
4799
HandleShift(HBinaryOperation * op)4800 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4801 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4802
4803 LocationSummary* locations = op->GetLocations();
4804 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4805 Location second = locations->InAt(1);
4806
4807 switch (op->GetResultType()) {
4808 case DataType::Type::kInt32: {
4809 if (second.IsRegister()) {
4810 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4811 if (op->IsShl()) {
4812 __ shll(first_reg, second_reg);
4813 } else if (op->IsShr()) {
4814 __ sarl(first_reg, second_reg);
4815 } else {
4816 __ shrl(first_reg, second_reg);
4817 }
4818 } else {
4819 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4820 if (op->IsShl()) {
4821 __ shll(first_reg, imm);
4822 } else if (op->IsShr()) {
4823 __ sarl(first_reg, imm);
4824 } else {
4825 __ shrl(first_reg, imm);
4826 }
4827 }
4828 break;
4829 }
4830 case DataType::Type::kInt64: {
4831 if (second.IsRegister()) {
4832 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4833 if (op->IsShl()) {
4834 __ shlq(first_reg, second_reg);
4835 } else if (op->IsShr()) {
4836 __ sarq(first_reg, second_reg);
4837 } else {
4838 __ shrq(first_reg, second_reg);
4839 }
4840 } else {
4841 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4842 if (op->IsShl()) {
4843 __ shlq(first_reg, imm);
4844 } else if (op->IsShr()) {
4845 __ sarq(first_reg, imm);
4846 } else {
4847 __ shrq(first_reg, imm);
4848 }
4849 }
4850 break;
4851 }
4852 default:
4853 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4854 UNREACHABLE();
4855 }
4856 }
4857
VisitRor(HRor * ror)4858 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
4859 LocationSummary* locations =
4860 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4861
4862 switch (ror->GetResultType()) {
4863 case DataType::Type::kInt32:
4864 case DataType::Type::kInt64: {
4865 locations->SetInAt(0, Location::RequiresRegister());
4866 // The shift count needs to be in CL (unless it is a constant).
4867 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
4868 locations->SetOut(Location::SameAsFirstInput());
4869 break;
4870 }
4871 default:
4872 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4873 UNREACHABLE();
4874 }
4875 }
4876
VisitRor(HRor * ror)4877 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
4878 LocationSummary* locations = ror->GetLocations();
4879 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4880 Location second = locations->InAt(1);
4881
4882 switch (ror->GetResultType()) {
4883 case DataType::Type::kInt32:
4884 if (second.IsRegister()) {
4885 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4886 __ rorl(first_reg, second_reg);
4887 } else {
4888 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4889 __ rorl(first_reg, imm);
4890 }
4891 break;
4892 case DataType::Type::kInt64:
4893 if (second.IsRegister()) {
4894 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4895 __ rorq(first_reg, second_reg);
4896 } else {
4897 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4898 __ rorq(first_reg, imm);
4899 }
4900 break;
4901 default:
4902 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4903 UNREACHABLE();
4904 }
4905 }
4906
VisitShl(HShl * shl)4907 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4908 HandleShift(shl);
4909 }
4910
VisitShl(HShl * shl)4911 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4912 HandleShift(shl);
4913 }
4914
VisitShr(HShr * shr)4915 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4916 HandleShift(shr);
4917 }
4918
VisitShr(HShr * shr)4919 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4920 HandleShift(shr);
4921 }
4922
VisitUShr(HUShr * ushr)4923 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4924 HandleShift(ushr);
4925 }
4926
VisitUShr(HUShr * ushr)4927 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4928 HandleShift(ushr);
4929 }
4930
VisitNewInstance(HNewInstance * instruction)4931 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4932 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4933 instruction, LocationSummary::kCallOnMainOnly);
4934 InvokeRuntimeCallingConvention calling_convention;
4935 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4936 locations->SetOut(Location::RegisterLocation(RAX));
4937 }
4938
VisitNewInstance(HNewInstance * instruction)4939 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4940 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4941 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4942 DCHECK(!codegen_->IsLeafMethod());
4943 }
4944
VisitNewArray(HNewArray * instruction)4945 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4946 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4947 instruction, LocationSummary::kCallOnMainOnly);
4948 InvokeRuntimeCallingConvention calling_convention;
4949 locations->SetOut(Location::RegisterLocation(RAX));
4950 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4951 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4952 }
4953
VisitNewArray(HNewArray * instruction)4954 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4955 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4956 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4957 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4958 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4959 DCHECK(!codegen_->IsLeafMethod());
4960 }
4961
VisitParameterValue(HParameterValue * instruction)4962 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4963 LocationSummary* locations =
4964 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4965 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4966 if (location.IsStackSlot()) {
4967 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4968 } else if (location.IsDoubleStackSlot()) {
4969 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4970 }
4971 locations->SetOut(location);
4972 }
4973
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4974 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4975 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4976 // Nothing to do, the parameter is already at its location.
4977 }
4978
VisitCurrentMethod(HCurrentMethod * instruction)4979 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4980 LocationSummary* locations =
4981 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4982 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4983 }
4984
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4985 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4986 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4987 // Nothing to do, the method is already at its location.
4988 }
4989
VisitClassTableGet(HClassTableGet * instruction)4990 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4991 LocationSummary* locations =
4992 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4993 locations->SetInAt(0, Location::RequiresRegister());
4994 locations->SetOut(Location::RequiresRegister());
4995 }
4996
VisitClassTableGet(HClassTableGet * instruction)4997 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4998 LocationSummary* locations = instruction->GetLocations();
4999 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5000 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5001 instruction->GetIndex(), kX86_64PointerSize).SizeValue();
5002 __ movq(locations->Out().AsRegister<CpuRegister>(),
5003 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
5004 } else {
5005 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5006 instruction->GetIndex(), kX86_64PointerSize));
5007 __ movq(locations->Out().AsRegister<CpuRegister>(),
5008 Address(locations->InAt(0).AsRegister<CpuRegister>(),
5009 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
5010 __ movq(locations->Out().AsRegister<CpuRegister>(),
5011 Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
5012 }
5013 }
5014
VisitNot(HNot * not_)5015 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
5016 LocationSummary* locations =
5017 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5018 locations->SetInAt(0, Location::RequiresRegister());
5019 locations->SetOut(Location::SameAsFirstInput());
5020 }
5021
VisitNot(HNot * not_)5022 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
5023 LocationSummary* locations = not_->GetLocations();
5024 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5025 locations->Out().AsRegister<CpuRegister>().AsRegister());
5026 Location out = locations->Out();
5027 switch (not_->GetResultType()) {
5028 case DataType::Type::kInt32:
5029 __ notl(out.AsRegister<CpuRegister>());
5030 break;
5031
5032 case DataType::Type::kInt64:
5033 __ notq(out.AsRegister<CpuRegister>());
5034 break;
5035
5036 default:
5037 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5038 }
5039 }
5040
VisitBooleanNot(HBooleanNot * bool_not)5041 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5042 LocationSummary* locations =
5043 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5044 locations->SetInAt(0, Location::RequiresRegister());
5045 locations->SetOut(Location::SameAsFirstInput());
5046 }
5047
VisitBooleanNot(HBooleanNot * bool_not)5048 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5049 LocationSummary* locations = bool_not->GetLocations();
5050 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5051 locations->Out().AsRegister<CpuRegister>().AsRegister());
5052 Location out = locations->Out();
5053 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
5054 }
5055
VisitPhi(HPhi * instruction)5056 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
5057 LocationSummary* locations =
5058 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5059 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5060 locations->SetInAt(i, Location::Any());
5061 }
5062 locations->SetOut(Location::Any());
5063 }
5064
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5065 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5066 LOG(FATAL) << "Unimplemented";
5067 }
5068
GenerateMemoryBarrier(MemBarrierKind kind)5069 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
5070 /*
5071 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
5072 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
5073 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5074 */
5075 switch (kind) {
5076 case MemBarrierKind::kAnyAny: {
5077 MemoryFence();
5078 break;
5079 }
5080 case MemBarrierKind::kAnyStore:
5081 case MemBarrierKind::kLoadAny:
5082 case MemBarrierKind::kStoreStore: {
5083 // nop
5084 break;
5085 }
5086 case MemBarrierKind::kNTStoreStore:
5087 // Non-Temporal Store/Store needs an explicit fence.
5088 MemoryFence(/* non-temporal= */ true);
5089 break;
5090 }
5091 }
5092
HandleFieldGet(HInstruction * instruction)5093 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
5094 DCHECK(instruction->IsInstanceFieldGet() ||
5095 instruction->IsStaticFieldGet() ||
5096 instruction->IsPredicatedInstanceFieldGet());
5097
5098 bool object_field_get_with_read_barrier =
5099 gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5100 bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
5101 LocationSummary* locations =
5102 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5103 object_field_get_with_read_barrier
5104 ? LocationSummary::kCallOnSlowPath
5105 : LocationSummary::kNoCall);
5106 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5107 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5108 }
5109 // receiver_input
5110 locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
5111 if (is_predicated) {
5112 if (DataType::IsFloatingPointType(instruction->GetType())) {
5113 locations->SetInAt(0, Location::RequiresFpuRegister());
5114 } else {
5115 locations->SetInAt(0, Location::RequiresRegister());
5116 }
5117 }
5118 if (DataType::IsFloatingPointType(instruction->GetType())) {
5119 locations->SetOut(is_predicated ? Location::SameAsFirstInput()
5120 : Location::RequiresFpuRegister());
5121 } else {
5122 // The output overlaps for an object field get when read barriers are
5123 // enabled: we do not want the move to overwrite the object's location, as
5124 // we need it to emit the read barrier. For predicated instructions we can
5125 // always overlap since the output is SameAsFirst and the default value.
5126 locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(),
5127 object_field_get_with_read_barrier || is_predicated
5128 ? Location::kOutputOverlap
5129 : Location::kNoOutputOverlap);
5130 }
5131 }
5132
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5133 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
5134 const FieldInfo& field_info) {
5135 DCHECK(instruction->IsInstanceFieldGet() ||
5136 instruction->IsStaticFieldGet() ||
5137 instruction->IsPredicatedInstanceFieldGet());
5138
5139 LocationSummary* locations = instruction->GetLocations();
5140 Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
5141 CpuRegister base = base_loc.AsRegister<CpuRegister>();
5142 Location out = locations->Out();
5143 bool is_volatile = field_info.IsVolatile();
5144 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5145 DataType::Type load_type = instruction->GetType();
5146 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5147
5148 if (load_type == DataType::Type::kReference) {
5149 // /* HeapReference<Object> */ out = *(base + offset)
5150 if (gUseReadBarrier && kUseBakerReadBarrier) {
5151 // Note that a potential implicit null check is handled in this
5152 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
5153 codegen_->GenerateFieldLoadWithBakerReadBarrier(
5154 instruction, out, base, offset, /* needs_null_check= */ true);
5155 if (is_volatile) {
5156 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5157 }
5158 } else {
5159 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
5160 codegen_->MaybeRecordImplicitNullCheck(instruction);
5161 if (is_volatile) {
5162 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5163 }
5164 // If read barriers are enabled, emit read barriers other than
5165 // Baker's using a slow path (and also unpoison the loaded
5166 // reference, if heap poisoning is enabled).
5167 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5168 }
5169 } else {
5170 codegen_->LoadFromMemoryNoReference(load_type, out, Address(base, offset));
5171 codegen_->MaybeRecordImplicitNullCheck(instruction);
5172 if (is_volatile) {
5173 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5174 }
5175 }
5176 }
5177
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5178 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
5179 const FieldInfo& field_info) {
5180 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5181
5182 LocationSummary* locations =
5183 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5184 DataType::Type field_type = field_info.GetFieldType();
5185 bool is_volatile = field_info.IsVolatile();
5186 bool needs_write_barrier =
5187 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5188
5189 locations->SetInAt(0, Location::RequiresRegister());
5190 if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
5191 if (is_volatile) {
5192 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5193 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
5194 } else {
5195 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5196 }
5197 } else {
5198 if (is_volatile) {
5199 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5200 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
5201 } else {
5202 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5203 }
5204 }
5205
5206 // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of
5207 // InstructionCodeGeneratorX86_64::HandleFieldSet.
5208 if (needs_write_barrier) {
5209 // Temporary registers for the write barrier.
5210 locations->AddTemp(Location::RequiresRegister());
5211 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
5212 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5213 // Temporary register for the reference poisoning.
5214 locations->AddTemp(Location::RequiresRegister());
5215 }
5216 }
5217
Bswap(Location value,DataType::Type type,CpuRegister * temp)5218 void InstructionCodeGeneratorX86_64::Bswap(Location value,
5219 DataType::Type type,
5220 CpuRegister* temp) {
5221 switch (type) {
5222 case DataType::Type::kInt16:
5223 // This should sign-extend, even if reimplemented with an XCHG of 8-bit registers.
5224 __ bswapl(value.AsRegister<CpuRegister>());
5225 __ sarl(value.AsRegister<CpuRegister>(), Immediate(16));
5226 break;
5227 case DataType::Type::kUint16:
5228 // TODO: Can be done with an XCHG of 8-bit registers. This is straight from Quick.
5229 __ bswapl(value.AsRegister<CpuRegister>());
5230 __ shrl(value.AsRegister<CpuRegister>(), Immediate(16));
5231 break;
5232 case DataType::Type::kInt32:
5233 case DataType::Type::kUint32:
5234 __ bswapl(value.AsRegister<CpuRegister>());
5235 break;
5236 case DataType::Type::kInt64:
5237 case DataType::Type::kUint64:
5238 __ bswapq(value.AsRegister<CpuRegister>());
5239 break;
5240 case DataType::Type::kFloat32: {
5241 DCHECK_NE(temp, nullptr);
5242 __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
5243 __ bswapl(*temp);
5244 __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ false);
5245 break;
5246 }
5247 case DataType::Type::kFloat64: {
5248 DCHECK_NE(temp, nullptr);
5249 __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
5250 __ bswapq(*temp);
5251 __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ true);
5252 break;
5253 }
5254 default:
5255 LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
5256 UNREACHABLE();
5257 }
5258 }
5259
HandleFieldSet(HInstruction * instruction,uint32_t value_index,uint32_t extra_temp_index,DataType::Type field_type,Address field_addr,CpuRegister base,bool is_volatile,bool is_atomic,bool value_can_be_null,bool byte_swap,WriteBarrierKind write_barrier_kind)5260 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5261 uint32_t value_index,
5262 uint32_t extra_temp_index,
5263 DataType::Type field_type,
5264 Address field_addr,
5265 CpuRegister base,
5266 bool is_volatile,
5267 bool is_atomic,
5268 bool value_can_be_null,
5269 bool byte_swap,
5270 WriteBarrierKind write_barrier_kind) {
5271 LocationSummary* locations = instruction->GetLocations();
5272 Location value = locations->InAt(value_index);
5273
5274 if (is_volatile) {
5275 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5276 }
5277
5278 bool maybe_record_implicit_null_check_done = false;
5279
5280 if (value.IsConstant()) {
5281 switch (field_type) {
5282 case DataType::Type::kBool:
5283 case DataType::Type::kUint8:
5284 case DataType::Type::kInt8:
5285 __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5286 break;
5287 case DataType::Type::kUint16:
5288 case DataType::Type::kInt16: {
5289 int16_t v = CodeGenerator::GetInt16ValueOf(value.GetConstant());
5290 if (byte_swap) {
5291 v = BSWAP(v);
5292 }
5293 __ movw(field_addr, Immediate(v));
5294 break;
5295 }
5296 case DataType::Type::kUint32:
5297 case DataType::Type::kInt32:
5298 case DataType::Type::kFloat32:
5299 case DataType::Type::kReference: {
5300 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5301 if (byte_swap) {
5302 v = BSWAP(v);
5303 }
5304 DCHECK_IMPLIES(field_type == DataType::Type::kReference, v == 0);
5305 // Note: if heap poisoning is enabled, no need to poison
5306 // (negate) `v` if it is a reference, as it would be null.
5307 __ movl(field_addr, Immediate(v));
5308 break;
5309 }
5310 case DataType::Type::kUint64:
5311 case DataType::Type::kInt64:
5312 case DataType::Type::kFloat64: {
5313 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5314 if (byte_swap) {
5315 v = BSWAP(v);
5316 }
5317 if (is_atomic) {
5318 // Move constant into a register, then atomically store the register to memory.
5319 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5320 __ movq(temp, Immediate(v));
5321 __ movq(field_addr, temp);
5322 } else {
5323 Address field_addr2 = Address::displace(field_addr, sizeof(int32_t));
5324 codegen_->MoveInt64ToAddress(field_addr, field_addr2, v, instruction);
5325 }
5326 maybe_record_implicit_null_check_done = true;
5327 break;
5328 }
5329 case DataType::Type::kVoid:
5330 LOG(FATAL) << "Unreachable type " << field_type;
5331 UNREACHABLE();
5332 }
5333 } else {
5334 if (byte_swap) {
5335 // Swap byte order in-place in the input register (we will restore it later).
5336 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5337 Bswap(value, field_type, &temp);
5338 }
5339
5340 switch (field_type) {
5341 case DataType::Type::kBool:
5342 case DataType::Type::kUint8:
5343 case DataType::Type::kInt8:
5344 __ movb(field_addr, value.AsRegister<CpuRegister>());
5345 break;
5346 case DataType::Type::kUint16:
5347 case DataType::Type::kInt16:
5348 __ movw(field_addr, value.AsRegister<CpuRegister>());
5349 break;
5350 case DataType::Type::kUint32:
5351 case DataType::Type::kInt32:
5352 case DataType::Type::kReference:
5353 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5354 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5355 __ movl(temp, value.AsRegister<CpuRegister>());
5356 __ PoisonHeapReference(temp);
5357 __ movl(field_addr, temp);
5358 } else {
5359 __ movl(field_addr, value.AsRegister<CpuRegister>());
5360 }
5361 break;
5362 case DataType::Type::kUint64:
5363 case DataType::Type::kInt64:
5364 __ movq(field_addr, value.AsRegister<CpuRegister>());
5365 break;
5366 case DataType::Type::kFloat32:
5367 __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
5368 break;
5369 case DataType::Type::kFloat64:
5370 __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
5371 break;
5372 case DataType::Type::kVoid:
5373 LOG(FATAL) << "Unreachable type " << field_type;
5374 UNREACHABLE();
5375 }
5376
5377 if (byte_swap) {
5378 // Restore byte order.
5379 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5380 Bswap(value, field_type, &temp);
5381 }
5382 }
5383
5384 if (!maybe_record_implicit_null_check_done) {
5385 codegen_->MaybeRecordImplicitNullCheck(instruction);
5386 }
5387
5388 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index)) &&
5389 write_barrier_kind != WriteBarrierKind::kDontEmit) {
5390 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5391 CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5392 codegen_->MarkGCCard(
5393 temp,
5394 card,
5395 base,
5396 value.AsRegister<CpuRegister>(),
5397 value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
5398 }
5399
5400 if (is_volatile) {
5401 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5402 }
5403 }
5404
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)5405 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5406 const FieldInfo& field_info,
5407 bool value_can_be_null,
5408 WriteBarrierKind write_barrier_kind) {
5409 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5410
5411 LocationSummary* locations = instruction->GetLocations();
5412 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
5413 bool is_volatile = field_info.IsVolatile();
5414 DataType::Type field_type = field_info.GetFieldType();
5415 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5416 bool is_predicated =
5417 instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
5418
5419 NearLabel pred_is_null;
5420 if (is_predicated) {
5421 __ testl(base, base);
5422 __ j(kZero, &pred_is_null);
5423 }
5424
5425 HandleFieldSet(instruction,
5426 /*value_index=*/ 1,
5427 /*extra_temp_index=*/ 1,
5428 field_type,
5429 Address(base, offset),
5430 base,
5431 is_volatile,
5432 /*is_atomic=*/ false,
5433 value_can_be_null,
5434 /*byte_swap=*/ false,
5435 write_barrier_kind);
5436
5437 if (is_predicated) {
5438 __ Bind(&pred_is_null);
5439 }
5440 }
5441
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5442 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5443 HandleFieldSet(instruction, instruction->GetFieldInfo());
5444 }
5445
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5446 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5447 HandleFieldSet(instruction,
5448 instruction->GetFieldInfo(),
5449 instruction->GetValueCanBeNull(),
5450 instruction->GetWriteBarrierKind());
5451 }
5452
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)5453 void LocationsBuilderX86_64::VisitPredicatedInstanceFieldGet(
5454 HPredicatedInstanceFieldGet* instruction) {
5455 HandleFieldGet(instruction);
5456 }
5457
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5458 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5459 HandleFieldGet(instruction);
5460 }
5461
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)5462 void InstructionCodeGeneratorX86_64::VisitPredicatedInstanceFieldGet(
5463 HPredicatedInstanceFieldGet* instruction) {
5464 NearLabel finish;
5465 LocationSummary* locations = instruction->GetLocations();
5466 CpuRegister target = locations->InAt(1).AsRegister<CpuRegister>();
5467 __ testl(target, target);
5468 __ j(kZero, &finish);
5469 HandleFieldGet(instruction, instruction->GetFieldInfo());
5470 __ Bind(&finish);
5471 }
5472
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5473 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5474 HandleFieldGet(instruction, instruction->GetFieldInfo());
5475 }
5476
VisitStaticFieldGet(HStaticFieldGet * instruction)5477 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5478 HandleFieldGet(instruction);
5479 }
5480
VisitStaticFieldGet(HStaticFieldGet * instruction)5481 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5482 HandleFieldGet(instruction, instruction->GetFieldInfo());
5483 }
5484
VisitStaticFieldSet(HStaticFieldSet * instruction)5485 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5486 HandleFieldSet(instruction, instruction->GetFieldInfo());
5487 }
5488
VisitStaticFieldSet(HStaticFieldSet * instruction)5489 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5490 HandleFieldSet(instruction,
5491 instruction->GetFieldInfo(),
5492 instruction->GetValueCanBeNull(),
5493 instruction->GetWriteBarrierKind());
5494 }
5495
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5496 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5497 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
5498 }
5499
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5500 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5501 __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
5502 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5503 }
5504
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5505 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
5506 HUnresolvedInstanceFieldGet* instruction) {
5507 FieldAccessCallingConventionX86_64 calling_convention;
5508 codegen_->CreateUnresolvedFieldLocationSummary(
5509 instruction, instruction->GetFieldType(), calling_convention);
5510 }
5511
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5512 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
5513 HUnresolvedInstanceFieldGet* instruction) {
5514 FieldAccessCallingConventionX86_64 calling_convention;
5515 codegen_->GenerateUnresolvedFieldAccess(instruction,
5516 instruction->GetFieldType(),
5517 instruction->GetFieldIndex(),
5518 instruction->GetDexPc(),
5519 calling_convention);
5520 }
5521
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5522 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5523 HUnresolvedInstanceFieldSet* instruction) {
5524 FieldAccessCallingConventionX86_64 calling_convention;
5525 codegen_->CreateUnresolvedFieldLocationSummary(
5526 instruction, instruction->GetFieldType(), calling_convention);
5527 }
5528
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5529 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5530 HUnresolvedInstanceFieldSet* instruction) {
5531 FieldAccessCallingConventionX86_64 calling_convention;
5532 codegen_->GenerateUnresolvedFieldAccess(instruction,
5533 instruction->GetFieldType(),
5534 instruction->GetFieldIndex(),
5535 instruction->GetDexPc(),
5536 calling_convention);
5537 }
5538
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5539 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5540 HUnresolvedStaticFieldGet* instruction) {
5541 FieldAccessCallingConventionX86_64 calling_convention;
5542 codegen_->CreateUnresolvedFieldLocationSummary(
5543 instruction, instruction->GetFieldType(), calling_convention);
5544 }
5545
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5546 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5547 HUnresolvedStaticFieldGet* instruction) {
5548 FieldAccessCallingConventionX86_64 calling_convention;
5549 codegen_->GenerateUnresolvedFieldAccess(instruction,
5550 instruction->GetFieldType(),
5551 instruction->GetFieldIndex(),
5552 instruction->GetDexPc(),
5553 calling_convention);
5554 }
5555
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5556 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5557 HUnresolvedStaticFieldSet* instruction) {
5558 FieldAccessCallingConventionX86_64 calling_convention;
5559 codegen_->CreateUnresolvedFieldLocationSummary(
5560 instruction, instruction->GetFieldType(), calling_convention);
5561 }
5562
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5563 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5564 HUnresolvedStaticFieldSet* instruction) {
5565 FieldAccessCallingConventionX86_64 calling_convention;
5566 codegen_->GenerateUnresolvedFieldAccess(instruction,
5567 instruction->GetFieldType(),
5568 instruction->GetFieldIndex(),
5569 instruction->GetDexPc(),
5570 calling_convention);
5571 }
5572
VisitNullCheck(HNullCheck * instruction)5573 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5574 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5575 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5576 ? Location::RequiresRegister()
5577 : Location::Any();
5578 locations->SetInAt(0, loc);
5579 }
5580
GenerateImplicitNullCheck(HNullCheck * instruction)5581 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5582 if (CanMoveNullCheckToUser(instruction)) {
5583 return;
5584 }
5585 LocationSummary* locations = instruction->GetLocations();
5586 Location obj = locations->InAt(0);
5587
5588 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5589 RecordPcInfo(instruction, instruction->GetDexPc());
5590 }
5591
GenerateExplicitNullCheck(HNullCheck * instruction)5592 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5593 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5594 AddSlowPath(slow_path);
5595
5596 LocationSummary* locations = instruction->GetLocations();
5597 Location obj = locations->InAt(0);
5598
5599 if (obj.IsRegister()) {
5600 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5601 } else if (obj.IsStackSlot()) {
5602 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5603 } else {
5604 DCHECK(obj.IsConstant()) << obj;
5605 DCHECK(obj.GetConstant()->IsNullConstant());
5606 __ jmp(slow_path->GetEntryLabel());
5607 return;
5608 }
5609 __ j(kEqual, slow_path->GetEntryLabel());
5610 }
5611
VisitNullCheck(HNullCheck * instruction)5612 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5613 codegen_->GenerateNullCheck(instruction);
5614 }
5615
VisitArrayGet(HArrayGet * instruction)5616 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5617 bool object_array_get_with_read_barrier =
5618 gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5619 LocationSummary* locations =
5620 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5621 object_array_get_with_read_barrier
5622 ? LocationSummary::kCallOnSlowPath
5623 : LocationSummary::kNoCall);
5624 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5625 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5626 }
5627 locations->SetInAt(0, Location::RequiresRegister());
5628 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5629 if (DataType::IsFloatingPointType(instruction->GetType())) {
5630 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5631 } else {
5632 // The output overlaps for an object array get when read barriers
5633 // are enabled: we do not want the move to overwrite the array's
5634 // location, as we need it to emit the read barrier.
5635 locations->SetOut(
5636 Location::RequiresRegister(),
5637 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5638 }
5639 }
5640
VisitArrayGet(HArrayGet * instruction)5641 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5642 LocationSummary* locations = instruction->GetLocations();
5643 Location obj_loc = locations->InAt(0);
5644 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5645 Location index = locations->InAt(1);
5646 Location out_loc = locations->Out();
5647 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5648
5649 DataType::Type type = instruction->GetType();
5650 if (type == DataType::Type::kReference) {
5651 static_assert(
5652 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5653 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5654 // /* HeapReference<Object> */ out =
5655 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5656 if (gUseReadBarrier && kUseBakerReadBarrier) {
5657 // Note that a potential implicit null check is handled in this
5658 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5659 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5660 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5661 } else {
5662 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5663 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5664 codegen_->MaybeRecordImplicitNullCheck(instruction);
5665 // If read barriers are enabled, emit read barriers other than
5666 // Baker's using a slow path (and also unpoison the loaded
5667 // reference, if heap poisoning is enabled).
5668 if (index.IsConstant()) {
5669 uint32_t offset =
5670 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5671 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5672 } else {
5673 codegen_->MaybeGenerateReadBarrierSlow(
5674 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5675 }
5676 }
5677 } else {
5678 if (type == DataType::Type::kUint16
5679 && mirror::kUseStringCompression
5680 && instruction->IsStringCharAt()) {
5681 // Branch cases into compressed and uncompressed for each index's type.
5682 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5683 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5684 NearLabel done, not_compressed;
5685 __ testb(Address(obj, count_offset), Immediate(1));
5686 codegen_->MaybeRecordImplicitNullCheck(instruction);
5687 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5688 "Expecting 0=compressed, 1=uncompressed");
5689 __ j(kNotZero, ¬_compressed);
5690 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5691 __ jmp(&done);
5692 __ Bind(¬_compressed);
5693 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5694 __ Bind(&done);
5695 } else {
5696 ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
5697 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, scale, data_offset);
5698 codegen_->LoadFromMemoryNoReference(type, out_loc, src);
5699 }
5700 codegen_->MaybeRecordImplicitNullCheck(instruction);
5701 }
5702 }
5703
VisitArraySet(HArraySet * instruction)5704 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5705 DataType::Type value_type = instruction->GetComponentType();
5706
5707 bool needs_write_barrier =
5708 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5709 bool needs_type_check = instruction->NeedsTypeCheck();
5710
5711 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5712 instruction,
5713 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5714
5715 locations->SetInAt(0, Location::RequiresRegister());
5716 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5717 if (DataType::IsFloatingPointType(value_type)) {
5718 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5719 } else {
5720 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5721 }
5722
5723 if (needs_write_barrier) {
5724 // Used by reference poisoning or emitting write barrier.
5725 locations->AddTemp(Location::RequiresRegister());
5726 if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
5727 // Only used when emitting a write barrier.
5728 locations->AddTemp(Location::RequiresRegister());
5729 }
5730 }
5731 }
5732
VisitArraySet(HArraySet * instruction)5733 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5734 LocationSummary* locations = instruction->GetLocations();
5735 Location array_loc = locations->InAt(0);
5736 CpuRegister array = array_loc.AsRegister<CpuRegister>();
5737 Location index = locations->InAt(1);
5738 Location value = locations->InAt(2);
5739 DataType::Type value_type = instruction->GetComponentType();
5740 bool needs_type_check = instruction->NeedsTypeCheck();
5741 bool needs_write_barrier =
5742 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5743
5744 switch (value_type) {
5745 case DataType::Type::kBool:
5746 case DataType::Type::kUint8:
5747 case DataType::Type::kInt8: {
5748 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5749 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5750 if (value.IsRegister()) {
5751 __ movb(address, value.AsRegister<CpuRegister>());
5752 } else {
5753 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5754 }
5755 codegen_->MaybeRecordImplicitNullCheck(instruction);
5756 break;
5757 }
5758
5759 case DataType::Type::kUint16:
5760 case DataType::Type::kInt16: {
5761 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5762 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5763 if (value.IsRegister()) {
5764 __ movw(address, value.AsRegister<CpuRegister>());
5765 } else {
5766 DCHECK(value.IsConstant()) << value;
5767 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5768 }
5769 codegen_->MaybeRecordImplicitNullCheck(instruction);
5770 break;
5771 }
5772
5773 case DataType::Type::kReference: {
5774 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5775 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5776
5777 if (!value.IsRegister()) {
5778 // Just setting null.
5779 DCHECK(instruction->InputAt(2)->IsNullConstant());
5780 DCHECK(value.IsConstant()) << value;
5781 __ movl(address, Immediate(0));
5782 codegen_->MaybeRecordImplicitNullCheck(instruction);
5783 DCHECK(!needs_write_barrier);
5784 DCHECK(!needs_type_check);
5785 break;
5786 }
5787
5788 DCHECK(needs_write_barrier);
5789 CpuRegister register_value = value.AsRegister<CpuRegister>();
5790 Location temp_loc = locations->GetTemp(0);
5791 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5792
5793 bool can_value_be_null = instruction->GetValueCanBeNull();
5794 NearLabel do_store;
5795 if (can_value_be_null) {
5796 __ testl(register_value, register_value);
5797 __ j(kEqual, &do_store);
5798 }
5799
5800 SlowPathCode* slow_path = nullptr;
5801 if (needs_type_check) {
5802 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
5803 codegen_->AddSlowPath(slow_path);
5804
5805 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5806 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5807 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5808
5809 // Note that when Baker read barriers are enabled, the type
5810 // checks are performed without read barriers. This is fine,
5811 // even in the case where a class object is in the from-space
5812 // after the flip, as a comparison involving such a type would
5813 // not produce a false positive; it may of course produce a
5814 // false negative, in which case we would take the ArraySet
5815 // slow path.
5816
5817 // /* HeapReference<Class> */ temp = array->klass_
5818 __ movl(temp, Address(array, class_offset));
5819 codegen_->MaybeRecordImplicitNullCheck(instruction);
5820 __ MaybeUnpoisonHeapReference(temp);
5821
5822 // /* HeapReference<Class> */ temp = temp->component_type_
5823 __ movl(temp, Address(temp, component_offset));
5824 // If heap poisoning is enabled, no need to unpoison `temp`
5825 // nor the object reference in `register_value->klass`, as
5826 // we are comparing two poisoned references.
5827 __ cmpl(temp, Address(register_value, class_offset));
5828
5829 if (instruction->StaticTypeOfArrayIsObjectArray()) {
5830 NearLabel do_put;
5831 __ j(kEqual, &do_put);
5832 // If heap poisoning is enabled, the `temp` reference has
5833 // not been unpoisoned yet; unpoison it now.
5834 __ MaybeUnpoisonHeapReference(temp);
5835
5836 // If heap poisoning is enabled, no need to unpoison the
5837 // heap reference loaded below, as it is only used for a
5838 // comparison with null.
5839 __ cmpl(Address(temp, super_offset), Immediate(0));
5840 __ j(kNotEqual, slow_path->GetEntryLabel());
5841 __ Bind(&do_put);
5842 } else {
5843 __ j(kNotEqual, slow_path->GetEntryLabel());
5844 }
5845 }
5846
5847 if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
5848 DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
5849 << " Already null checked so we shouldn't do it again.";
5850 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5851 codegen_->MarkGCCard(temp,
5852 card,
5853 array,
5854 value.AsRegister<CpuRegister>(),
5855 /* emit_null_check= */ false);
5856 }
5857
5858 if (can_value_be_null) {
5859 DCHECK(do_store.IsLinked());
5860 __ Bind(&do_store);
5861 }
5862
5863 Location source = value;
5864 if (kPoisonHeapReferences) {
5865 __ movl(temp, register_value);
5866 __ PoisonHeapReference(temp);
5867 source = temp_loc;
5868 }
5869
5870 __ movl(address, source.AsRegister<CpuRegister>());
5871
5872 if (can_value_be_null || !needs_type_check) {
5873 codegen_->MaybeRecordImplicitNullCheck(instruction);
5874 }
5875
5876 if (slow_path != nullptr) {
5877 __ Bind(slow_path->GetExitLabel());
5878 }
5879
5880 break;
5881 }
5882
5883 case DataType::Type::kInt32: {
5884 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5885 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5886 if (value.IsRegister()) {
5887 __ movl(address, value.AsRegister<CpuRegister>());
5888 } else {
5889 DCHECK(value.IsConstant()) << value;
5890 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5891 __ movl(address, Immediate(v));
5892 }
5893 codegen_->MaybeRecordImplicitNullCheck(instruction);
5894 break;
5895 }
5896
5897 case DataType::Type::kInt64: {
5898 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5899 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5900 if (value.IsRegister()) {
5901 __ movq(address, value.AsRegister<CpuRegister>());
5902 codegen_->MaybeRecordImplicitNullCheck(instruction);
5903 } else {
5904 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5905 Address address_high =
5906 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5907 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5908 }
5909 break;
5910 }
5911
5912 case DataType::Type::kFloat32: {
5913 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5914 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5915 if (value.IsFpuRegister()) {
5916 __ movss(address, value.AsFpuRegister<XmmRegister>());
5917 } else {
5918 DCHECK(value.IsConstant());
5919 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5920 __ movl(address, Immediate(v));
5921 }
5922 codegen_->MaybeRecordImplicitNullCheck(instruction);
5923 break;
5924 }
5925
5926 case DataType::Type::kFloat64: {
5927 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5928 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5929 if (value.IsFpuRegister()) {
5930 __ movsd(address, value.AsFpuRegister<XmmRegister>());
5931 codegen_->MaybeRecordImplicitNullCheck(instruction);
5932 } else {
5933 int64_t v =
5934 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5935 Address address_high =
5936 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5937 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5938 }
5939 break;
5940 }
5941
5942 case DataType::Type::kUint32:
5943 case DataType::Type::kUint64:
5944 case DataType::Type::kVoid:
5945 LOG(FATAL) << "Unreachable type " << instruction->GetType();
5946 UNREACHABLE();
5947 }
5948 }
5949
VisitArrayLength(HArrayLength * instruction)5950 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5951 LocationSummary* locations =
5952 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5953 locations->SetInAt(0, Location::RequiresRegister());
5954 if (!instruction->IsEmittedAtUseSite()) {
5955 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5956 }
5957 }
5958
VisitArrayLength(HArrayLength * instruction)5959 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5960 if (instruction->IsEmittedAtUseSite()) {
5961 return;
5962 }
5963
5964 LocationSummary* locations = instruction->GetLocations();
5965 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5966 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5967 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5968 __ movl(out, Address(obj, offset));
5969 codegen_->MaybeRecordImplicitNullCheck(instruction);
5970 // Mask out most significant bit in case the array is String's array of char.
5971 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5972 __ shrl(out, Immediate(1));
5973 }
5974 }
5975
VisitBoundsCheck(HBoundsCheck * instruction)5976 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5977 RegisterSet caller_saves = RegisterSet::Empty();
5978 InvokeRuntimeCallingConvention calling_convention;
5979 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5980 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5981 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5982 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5983 HInstruction* length = instruction->InputAt(1);
5984 if (!length->IsEmittedAtUseSite()) {
5985 locations->SetInAt(1, Location::RegisterOrConstant(length));
5986 }
5987 }
5988
VisitBoundsCheck(HBoundsCheck * instruction)5989 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5990 LocationSummary* locations = instruction->GetLocations();
5991 Location index_loc = locations->InAt(0);
5992 Location length_loc = locations->InAt(1);
5993 SlowPathCode* slow_path =
5994 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
5995
5996 if (length_loc.IsConstant()) {
5997 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5998 if (index_loc.IsConstant()) {
5999 // BCE will remove the bounds check if we are guarenteed to pass.
6000 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6001 if (index < 0 || index >= length) {
6002 codegen_->AddSlowPath(slow_path);
6003 __ jmp(slow_path->GetEntryLabel());
6004 } else {
6005 // Some optimization after BCE may have generated this, and we should not
6006 // generate a bounds check if it is a valid range.
6007 }
6008 return;
6009 }
6010
6011 // We have to reverse the jump condition because the length is the constant.
6012 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
6013 __ cmpl(index_reg, Immediate(length));
6014 codegen_->AddSlowPath(slow_path);
6015 __ j(kAboveEqual, slow_path->GetEntryLabel());
6016 } else {
6017 HInstruction* array_length = instruction->InputAt(1);
6018 if (array_length->IsEmittedAtUseSite()) {
6019 // Address the length field in the array.
6020 DCHECK(array_length->IsArrayLength());
6021 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6022 Location array_loc = array_length->GetLocations()->InAt(0);
6023 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
6024 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6025 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6026 // the string compression flag) with the in-memory length and avoid the temporary.
6027 CpuRegister length_reg = CpuRegister(TMP);
6028 __ movl(length_reg, array_len);
6029 codegen_->MaybeRecordImplicitNullCheck(array_length);
6030 __ shrl(length_reg, Immediate(1));
6031 codegen_->GenerateIntCompare(length_reg, index_loc);
6032 } else {
6033 // Checking the bound for general case:
6034 // Array of char or String's array when the compression feature off.
6035 if (index_loc.IsConstant()) {
6036 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6037 __ cmpl(array_len, Immediate(value));
6038 } else {
6039 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
6040 }
6041 codegen_->MaybeRecordImplicitNullCheck(array_length);
6042 }
6043 } else {
6044 codegen_->GenerateIntCompare(length_loc, index_loc);
6045 }
6046 codegen_->AddSlowPath(slow_path);
6047 __ j(kBelowEqual, slow_path->GetEntryLabel());
6048 }
6049 }
6050
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool emit_null_check)6051 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
6052 CpuRegister card,
6053 CpuRegister object,
6054 CpuRegister value,
6055 bool emit_null_check) {
6056 NearLabel is_null;
6057 if (emit_null_check) {
6058 __ testl(value, value);
6059 __ j(kEqual, &is_null);
6060 }
6061 // Load the address of the card table into `card`.
6062 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6063 /* no_rip= */ true));
6064 // Calculate the offset (in the card table) of the card corresponding to
6065 // `object`.
6066 __ movq(temp, object);
6067 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6068 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
6069 // `object`'s card.
6070 //
6071 // Register `card` contains the address of the card table. Note that the card
6072 // table's base is biased during its creation so that it always starts at an
6073 // address whose least-significant byte is equal to `kCardDirty` (see
6074 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
6075 // below writes the `kCardDirty` (byte) value into the `object`'s card
6076 // (located at `card + object >> kCardShift`).
6077 //
6078 // This dual use of the value in register `card` (1. to calculate the location
6079 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
6080 // (no need to explicitly load `kCardDirty` as an immediate value).
6081 __ movb(Address(temp, card, TIMES_1, 0), card);
6082 if (emit_null_check) {
6083 __ Bind(&is_null);
6084 }
6085 }
6086
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)6087 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
6088 LOG(FATAL) << "Unimplemented";
6089 }
6090
VisitParallelMove(HParallelMove * instruction)6091 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
6092 if (instruction->GetNext()->IsSuspendCheck() &&
6093 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6094 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6095 // The back edge will generate the suspend check.
6096 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6097 }
6098
6099 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6100 }
6101
VisitSuspendCheck(HSuspendCheck * instruction)6102 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6103 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6104 instruction, LocationSummary::kCallOnSlowPath);
6105 // In suspend check slow path, usually there are no caller-save registers at all.
6106 // If SIMD instructions are present, however, we force spilling all live SIMD
6107 // registers in full width (since the runtime only saves/restores lower part).
6108 locations->SetCustomSlowPathCallerSaves(
6109 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6110 }
6111
VisitSuspendCheck(HSuspendCheck * instruction)6112 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6113 HBasicBlock* block = instruction->GetBlock();
6114 if (block->GetLoopInformation() != nullptr) {
6115 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6116 // The back edge will generate the suspend check.
6117 return;
6118 }
6119 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6120 // The goto will generate the suspend check.
6121 return;
6122 }
6123 GenerateSuspendCheck(instruction, nullptr);
6124 }
6125
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6126 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
6127 HBasicBlock* successor) {
6128 SuspendCheckSlowPathX86_64* slow_path =
6129 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
6130 if (slow_path == nullptr) {
6131 slow_path =
6132 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
6133 instruction->SetSlowPath(slow_path);
6134 codegen_->AddSlowPath(slow_path);
6135 if (successor != nullptr) {
6136 DCHECK(successor->IsLoopHeader());
6137 }
6138 } else {
6139 DCHECK_EQ(slow_path->GetSuccessor(), successor);
6140 }
6141
6142 __ gs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
6143 /* no_rip= */ true),
6144 Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6145 if (successor == nullptr) {
6146 __ j(kNotZero, slow_path->GetEntryLabel());
6147 __ Bind(slow_path->GetReturnLabel());
6148 } else {
6149 __ j(kZero, codegen_->GetLabelOf(successor));
6150 __ jmp(slow_path->GetEntryLabel());
6151 }
6152 }
6153
GetAssembler() const6154 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
6155 return codegen_->GetAssembler();
6156 }
6157
EmitMove(size_t index)6158 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
6159 MoveOperands* move = moves_[index];
6160 Location source = move->GetSource();
6161 Location destination = move->GetDestination();
6162
6163 if (source.IsRegister()) {
6164 if (destination.IsRegister()) {
6165 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
6166 } else if (destination.IsStackSlot()) {
6167 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
6168 source.AsRegister<CpuRegister>());
6169 } else {
6170 DCHECK(destination.IsDoubleStackSlot());
6171 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
6172 source.AsRegister<CpuRegister>());
6173 }
6174 } else if (source.IsStackSlot()) {
6175 if (destination.IsRegister()) {
6176 __ movl(destination.AsRegister<CpuRegister>(),
6177 Address(CpuRegister(RSP), source.GetStackIndex()));
6178 } else if (destination.IsFpuRegister()) {
6179 __ movss(destination.AsFpuRegister<XmmRegister>(),
6180 Address(CpuRegister(RSP), source.GetStackIndex()));
6181 } else {
6182 DCHECK(destination.IsStackSlot());
6183 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6184 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6185 }
6186 } else if (source.IsDoubleStackSlot()) {
6187 if (destination.IsRegister()) {
6188 __ movq(destination.AsRegister<CpuRegister>(),
6189 Address(CpuRegister(RSP), source.GetStackIndex()));
6190 } else if (destination.IsFpuRegister()) {
6191 __ movsd(destination.AsFpuRegister<XmmRegister>(),
6192 Address(CpuRegister(RSP), source.GetStackIndex()));
6193 } else {
6194 DCHECK(destination.IsDoubleStackSlot()) << destination;
6195 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6196 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6197 }
6198 } else if (source.IsSIMDStackSlot()) {
6199 if (destination.IsFpuRegister()) {
6200 __ movups(destination.AsFpuRegister<XmmRegister>(),
6201 Address(CpuRegister(RSP), source.GetStackIndex()));
6202 } else {
6203 DCHECK(destination.IsSIMDStackSlot());
6204 size_t high = kX86_64WordSize;
6205 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6206 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6207 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
6208 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
6209 }
6210 } else if (source.IsConstant()) {
6211 HConstant* constant = source.GetConstant();
6212 if (constant->IsIntConstant() || constant->IsNullConstant()) {
6213 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6214 if (destination.IsRegister()) {
6215 if (value == 0) {
6216 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6217 } else {
6218 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
6219 }
6220 } else {
6221 DCHECK(destination.IsStackSlot()) << destination;
6222 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
6223 }
6224 } else if (constant->IsLongConstant()) {
6225 int64_t value = constant->AsLongConstant()->GetValue();
6226 if (destination.IsRegister()) {
6227 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
6228 } else {
6229 DCHECK(destination.IsDoubleStackSlot()) << destination;
6230 codegen_->Store64BitValueToStack(destination, value);
6231 }
6232 } else if (constant->IsFloatConstant()) {
6233 float fp_value = constant->AsFloatConstant()->GetValue();
6234 if (destination.IsFpuRegister()) {
6235 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6236 codegen_->Load32BitValue(dest, fp_value);
6237 } else {
6238 DCHECK(destination.IsStackSlot()) << destination;
6239 Immediate imm(bit_cast<int32_t, float>(fp_value));
6240 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
6241 }
6242 } else {
6243 DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
6244 double fp_value = constant->AsDoubleConstant()->GetValue();
6245 int64_t value = bit_cast<int64_t, double>(fp_value);
6246 if (destination.IsFpuRegister()) {
6247 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6248 codegen_->Load64BitValue(dest, fp_value);
6249 } else {
6250 DCHECK(destination.IsDoubleStackSlot()) << destination;
6251 codegen_->Store64BitValueToStack(destination, value);
6252 }
6253 }
6254 } else if (source.IsFpuRegister()) {
6255 if (destination.IsFpuRegister()) {
6256 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6257 } else if (destination.IsStackSlot()) {
6258 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
6259 source.AsFpuRegister<XmmRegister>());
6260 } else if (destination.IsDoubleStackSlot()) {
6261 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
6262 source.AsFpuRegister<XmmRegister>());
6263 } else {
6264 DCHECK(destination.IsSIMDStackSlot());
6265 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
6266 source.AsFpuRegister<XmmRegister>());
6267 }
6268 }
6269 }
6270
Exchange32(CpuRegister reg,int mem)6271 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
6272 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6273 __ movl(Address(CpuRegister(RSP), mem), reg);
6274 __ movl(reg, CpuRegister(TMP));
6275 }
6276
Exchange64(CpuRegister reg1,CpuRegister reg2)6277 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
6278 __ movq(CpuRegister(TMP), reg1);
6279 __ movq(reg1, reg2);
6280 __ movq(reg2, CpuRegister(TMP));
6281 }
6282
Exchange64(CpuRegister reg,int mem)6283 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
6284 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6285 __ movq(Address(CpuRegister(RSP), mem), reg);
6286 __ movq(reg, CpuRegister(TMP));
6287 }
6288
Exchange32(XmmRegister reg,int mem)6289 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
6290 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6291 __ movss(Address(CpuRegister(RSP), mem), reg);
6292 __ movd(reg, CpuRegister(TMP));
6293 }
6294
Exchange64(XmmRegister reg,int mem)6295 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
6296 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6297 __ movsd(Address(CpuRegister(RSP), mem), reg);
6298 __ movd(reg, CpuRegister(TMP));
6299 }
6300
Exchange128(XmmRegister reg,int mem)6301 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
6302 size_t extra_slot = 2 * kX86_64WordSize;
6303 __ subq(CpuRegister(RSP), Immediate(extra_slot));
6304 __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
6305 ExchangeMemory64(0, mem + extra_slot, 2);
6306 __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
6307 __ addq(CpuRegister(RSP), Immediate(extra_slot));
6308 }
6309
ExchangeMemory32(int mem1,int mem2)6310 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
6311 ScratchRegisterScope ensure_scratch(
6312 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6313
6314 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6315 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
6316 __ movl(CpuRegister(ensure_scratch.GetRegister()),
6317 Address(CpuRegister(RSP), mem2 + stack_offset));
6318 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
6319 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
6320 CpuRegister(ensure_scratch.GetRegister()));
6321 }
6322
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)6323 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
6324 ScratchRegisterScope ensure_scratch(
6325 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6326
6327 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6328
6329 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6330 for (int i = 0; i < num_of_qwords; i++) {
6331 __ movq(CpuRegister(TMP),
6332 Address(CpuRegister(RSP), mem1 + stack_offset));
6333 __ movq(CpuRegister(ensure_scratch.GetRegister()),
6334 Address(CpuRegister(RSP), mem2 + stack_offset));
6335 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
6336 CpuRegister(TMP));
6337 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
6338 CpuRegister(ensure_scratch.GetRegister()));
6339 stack_offset += kX86_64WordSize;
6340 }
6341 }
6342
EmitSwap(size_t index)6343 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
6344 MoveOperands* move = moves_[index];
6345 Location source = move->GetSource();
6346 Location destination = move->GetDestination();
6347
6348 if (source.IsRegister() && destination.IsRegister()) {
6349 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6350 } else if (source.IsRegister() && destination.IsStackSlot()) {
6351 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6352 } else if (source.IsStackSlot() && destination.IsRegister()) {
6353 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6354 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6355 ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
6356 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
6357 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6358 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
6359 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6360 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
6361 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
6362 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6363 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
6364 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6365 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
6366 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6367 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6368 } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
6369 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6370 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6371 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6372 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
6373 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6374 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6375 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
6376 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6377 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6378 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6379 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6380 } else {
6381 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
6382 }
6383 }
6384
6385
SpillScratch(int reg)6386 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
6387 __ pushq(CpuRegister(reg));
6388 }
6389
6390
RestoreScratch(int reg)6391 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
6392 __ popq(CpuRegister(reg));
6393 }
6394
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)6395 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
6396 SlowPathCode* slow_path, CpuRegister class_reg) {
6397 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
6398 __ j(kBelow, slow_path->GetEntryLabel());
6399 __ Bind(slow_path->GetExitLabel());
6400 }
6401
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)6402 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6403 CpuRegister temp) {
6404 uint32_t path_to_root = check->GetBitstringPathToRoot();
6405 uint32_t mask = check->GetBitstringMask();
6406 DCHECK(IsPowerOfTwo(mask + 1));
6407 size_t mask_bits = WhichPowerOf2(mask + 1);
6408
6409 if (mask_bits == 16u) {
6410 // Compare the bitstring in memory.
6411 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6412 } else {
6413 // /* uint32_t */ temp = temp->status_
6414 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6415 // Compare the bitstring bits using SUB.
6416 __ subl(temp, Immediate(path_to_root));
6417 // Shift out bits that do not contribute to the comparison.
6418 __ shll(temp, Immediate(32u - mask_bits));
6419 }
6420 }
6421
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6422 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
6423 HLoadClass::LoadKind desired_class_load_kind) {
6424 switch (desired_class_load_kind) {
6425 case HLoadClass::LoadKind::kInvalid:
6426 LOG(FATAL) << "UNREACHABLE";
6427 UNREACHABLE();
6428 case HLoadClass::LoadKind::kReferrersClass:
6429 break;
6430 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6431 case HLoadClass::LoadKind::kBootImageRelRo:
6432 case HLoadClass::LoadKind::kBssEntry:
6433 case HLoadClass::LoadKind::kBssEntryPublic:
6434 case HLoadClass::LoadKind::kBssEntryPackage:
6435 DCHECK(!GetCompilerOptions().IsJitCompiler());
6436 break;
6437 case HLoadClass::LoadKind::kJitBootImageAddress:
6438 case HLoadClass::LoadKind::kJitTableAddress:
6439 DCHECK(GetCompilerOptions().IsJitCompiler());
6440 break;
6441 case HLoadClass::LoadKind::kRuntimeCall:
6442 break;
6443 }
6444 return desired_class_load_kind;
6445 }
6446
VisitLoadClass(HLoadClass * cls)6447 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
6448 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6449 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6450 // Custom calling convention: RAX serves as both input and output.
6451 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6452 cls,
6453 Location::RegisterLocation(RAX),
6454 Location::RegisterLocation(RAX));
6455 return;
6456 }
6457 DCHECK_EQ(cls->NeedsAccessCheck(),
6458 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6459 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6460
6461 const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
6462 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6463 ? LocationSummary::kCallOnSlowPath
6464 : LocationSummary::kNoCall;
6465 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6466 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6467 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6468 }
6469
6470 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6471 locations->SetInAt(0, Location::RequiresRegister());
6472 }
6473 locations->SetOut(Location::RequiresRegister());
6474 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6475 if (!gUseReadBarrier || kUseBakerReadBarrier) {
6476 // Rely on the type resolution and/or initialization to save everything.
6477 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6478 } else {
6479 // For non-Baker read barrier we have a temp-clobbering call.
6480 }
6481 }
6482 }
6483
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6484 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6485 dex::TypeIndex type_index,
6486 Handle<mirror::Class> handle) {
6487 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6488 // Add a patch entry and return the label.
6489 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6490 PatchInfo<Label>* info = &jit_class_patches_.back();
6491 return &info->label;
6492 }
6493
6494 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6495 // move.
VisitLoadClass(HLoadClass * cls)6496 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6497 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6498 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6499 codegen_->GenerateLoadClassRuntimeCall(cls);
6500 return;
6501 }
6502 DCHECK_EQ(cls->NeedsAccessCheck(),
6503 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6504 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6505
6506 LocationSummary* locations = cls->GetLocations();
6507 Location out_loc = locations->Out();
6508 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6509
6510 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6511 ? kWithoutReadBarrier
6512 : gCompilerReadBarrierOption;
6513 bool generate_null_check = false;
6514 switch (load_kind) {
6515 case HLoadClass::LoadKind::kReferrersClass: {
6516 DCHECK(!cls->CanCallRuntime());
6517 DCHECK(!cls->MustGenerateClinitCheck());
6518 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6519 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6520 GenerateGcRootFieldLoad(
6521 cls,
6522 out_loc,
6523 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6524 /* fixup_label= */ nullptr,
6525 read_barrier_option);
6526 break;
6527 }
6528 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6529 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6530 codegen_->GetCompilerOptions().IsBootImageExtension());
6531 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6532 __ leal(out,
6533 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6534 codegen_->RecordBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6535 break;
6536 case HLoadClass::LoadKind::kBootImageRelRo: {
6537 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6538 __ movl(out,
6539 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6540 codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(cls));
6541 break;
6542 }
6543 case HLoadClass::LoadKind::kBssEntry:
6544 case HLoadClass::LoadKind::kBssEntryPublic:
6545 case HLoadClass::LoadKind::kBssEntryPackage: {
6546 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6547 /* no_rip= */ false);
6548 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6549 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6550 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6551 // No need for memory fence, thanks to the x86-64 memory model.
6552 generate_null_check = true;
6553 break;
6554 }
6555 case HLoadClass::LoadKind::kJitBootImageAddress: {
6556 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6557 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6558 DCHECK_NE(address, 0u);
6559 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6560 break;
6561 }
6562 case HLoadClass::LoadKind::kJitTableAddress: {
6563 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6564 /* no_rip= */ true);
6565 Label* fixup_label =
6566 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6567 // /* GcRoot<mirror::Class> */ out = *address
6568 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6569 break;
6570 }
6571 default:
6572 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6573 UNREACHABLE();
6574 }
6575
6576 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6577 DCHECK(cls->CanCallRuntime());
6578 SlowPathCode* slow_path =
6579 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6580 codegen_->AddSlowPath(slow_path);
6581 if (generate_null_check) {
6582 __ testl(out, out);
6583 __ j(kEqual, slow_path->GetEntryLabel());
6584 }
6585 if (cls->MustGenerateClinitCheck()) {
6586 GenerateClassInitializationCheck(slow_path, out);
6587 } else {
6588 __ Bind(slow_path->GetExitLabel());
6589 }
6590 }
6591 }
6592
VisitClinitCheck(HClinitCheck * check)6593 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6594 LocationSummary* locations =
6595 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6596 locations->SetInAt(0, Location::RequiresRegister());
6597 if (check->HasUses()) {
6598 locations->SetOut(Location::SameAsFirstInput());
6599 }
6600 // Rely on the type initialization to save everything we need.
6601 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6602 }
6603
VisitLoadMethodHandle(HLoadMethodHandle * load)6604 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6605 // Custom calling convention: RAX serves as both input and output.
6606 Location location = Location::RegisterLocation(RAX);
6607 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6608 }
6609
VisitLoadMethodHandle(HLoadMethodHandle * load)6610 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6611 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6612 }
6613
VisitLoadMethodType(HLoadMethodType * load)6614 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6615 // Custom calling convention: RAX serves as both input and output.
6616 Location location = Location::RegisterLocation(RAX);
6617 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6618 }
6619
VisitLoadMethodType(HLoadMethodType * load)6620 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6621 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6622 }
6623
VisitClinitCheck(HClinitCheck * check)6624 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6625 // We assume the class to not be null.
6626 SlowPathCode* slow_path =
6627 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6628 codegen_->AddSlowPath(slow_path);
6629 GenerateClassInitializationCheck(slow_path,
6630 check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6631 }
6632
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6633 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6634 HLoadString::LoadKind desired_string_load_kind) {
6635 switch (desired_string_load_kind) {
6636 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6637 case HLoadString::LoadKind::kBootImageRelRo:
6638 case HLoadString::LoadKind::kBssEntry:
6639 DCHECK(!GetCompilerOptions().IsJitCompiler());
6640 break;
6641 case HLoadString::LoadKind::kJitBootImageAddress:
6642 case HLoadString::LoadKind::kJitTableAddress:
6643 DCHECK(GetCompilerOptions().IsJitCompiler());
6644 break;
6645 case HLoadString::LoadKind::kRuntimeCall:
6646 break;
6647 }
6648 return desired_string_load_kind;
6649 }
6650
VisitLoadString(HLoadString * load)6651 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6652 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6653 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6654 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6655 locations->SetOut(Location::RegisterLocation(RAX));
6656 } else {
6657 locations->SetOut(Location::RequiresRegister());
6658 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6659 if (!gUseReadBarrier || kUseBakerReadBarrier) {
6660 // Rely on the pResolveString to save everything.
6661 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6662 } else {
6663 // For non-Baker read barrier we have a temp-clobbering call.
6664 }
6665 }
6666 }
6667 }
6668
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6669 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6670 dex::StringIndex string_index,
6671 Handle<mirror::String> handle) {
6672 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6673 // Add a patch entry and return the label.
6674 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6675 PatchInfo<Label>* info = &jit_string_patches_.back();
6676 return &info->label;
6677 }
6678
6679 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6680 // move.
VisitLoadString(HLoadString * load)6681 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6682 LocationSummary* locations = load->GetLocations();
6683 Location out_loc = locations->Out();
6684 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6685
6686 switch (load->GetLoadKind()) {
6687 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6688 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6689 codegen_->GetCompilerOptions().IsBootImageExtension());
6690 __ leal(out,
6691 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6692 codegen_->RecordBootImageStringPatch(load);
6693 return;
6694 }
6695 case HLoadString::LoadKind::kBootImageRelRo: {
6696 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6697 __ movl(out,
6698 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6699 codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(load));
6700 return;
6701 }
6702 case HLoadString::LoadKind::kBssEntry: {
6703 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6704 /* no_rip= */ false);
6705 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6706 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6707 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
6708 // No need for memory fence, thanks to the x86-64 memory model.
6709 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
6710 codegen_->AddSlowPath(slow_path);
6711 __ testl(out, out);
6712 __ j(kEqual, slow_path->GetEntryLabel());
6713 __ Bind(slow_path->GetExitLabel());
6714 return;
6715 }
6716 case HLoadString::LoadKind::kJitBootImageAddress: {
6717 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6718 DCHECK_NE(address, 0u);
6719 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6720 return;
6721 }
6722 case HLoadString::LoadKind::kJitTableAddress: {
6723 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6724 /* no_rip= */ true);
6725 Label* fixup_label = codegen_->NewJitRootStringPatch(
6726 load->GetDexFile(), load->GetStringIndex(), load->GetString());
6727 // /* GcRoot<mirror::String> */ out = *address
6728 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
6729 return;
6730 }
6731 default:
6732 break;
6733 }
6734
6735 // TODO: Re-add the compiler code to do string dex cache lookup again.
6736 // Custom calling convention: RAX serves as both input and output.
6737 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
6738 codegen_->InvokeRuntime(kQuickResolveString,
6739 load,
6740 load->GetDexPc());
6741 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6742 }
6743
GetExceptionTlsAddress()6744 static Address GetExceptionTlsAddress() {
6745 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
6746 /* no_rip= */ true);
6747 }
6748
VisitLoadException(HLoadException * load)6749 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
6750 LocationSummary* locations =
6751 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6752 locations->SetOut(Location::RequiresRegister());
6753 }
6754
VisitLoadException(HLoadException * load)6755 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
6756 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
6757 }
6758
VisitClearException(HClearException * clear)6759 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
6760 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6761 }
6762
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6763 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6764 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
6765 }
6766
VisitThrow(HThrow * instruction)6767 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
6768 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6769 instruction, LocationSummary::kCallOnMainOnly);
6770 InvokeRuntimeCallingConvention calling_convention;
6771 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6772 }
6773
VisitThrow(HThrow * instruction)6774 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
6775 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6776 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6777 }
6778
6779 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6780 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6781 if (gUseReadBarrier &&
6782 !kUseBakerReadBarrier &&
6783 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6784 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6785 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6786 return 1;
6787 }
6788 return 0;
6789 }
6790
6791 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
6792 // interface pointer, the current interface is compared in memory.
6793 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6794 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6795 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6796 return 2;
6797 }
6798 return 1 + NumberOfInstanceOfTemps(type_check_kind);
6799 }
6800
VisitInstanceOf(HInstanceOf * instruction)6801 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6802 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6803 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6804 bool baker_read_barrier_slow_path = false;
6805 switch (type_check_kind) {
6806 case TypeCheckKind::kExactCheck:
6807 case TypeCheckKind::kAbstractClassCheck:
6808 case TypeCheckKind::kClassHierarchyCheck:
6809 case TypeCheckKind::kArrayObjectCheck: {
6810 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
6811 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6812 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
6813 break;
6814 }
6815 case TypeCheckKind::kArrayCheck:
6816 case TypeCheckKind::kUnresolvedCheck:
6817 case TypeCheckKind::kInterfaceCheck:
6818 call_kind = LocationSummary::kCallOnSlowPath;
6819 break;
6820 case TypeCheckKind::kBitstringCheck:
6821 break;
6822 }
6823
6824 LocationSummary* locations =
6825 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6826 if (baker_read_barrier_slow_path) {
6827 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6828 }
6829 locations->SetInAt(0, Location::RequiresRegister());
6830 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6831 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
6832 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
6833 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
6834 } else {
6835 locations->SetInAt(1, Location::Any());
6836 }
6837 // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
6838 locations->SetOut(Location::RequiresRegister());
6839 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6840 }
6841
VisitInstanceOf(HInstanceOf * instruction)6842 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6843 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6844 LocationSummary* locations = instruction->GetLocations();
6845 Location obj_loc = locations->InAt(0);
6846 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6847 Location cls = locations->InAt(1);
6848 Location out_loc = locations->Out();
6849 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6850 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6851 DCHECK_LE(num_temps, 1u);
6852 Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
6853 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6854 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6855 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6856 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6857 SlowPathCode* slow_path = nullptr;
6858 NearLabel done, zero;
6859
6860 // Return 0 if `obj` is null.
6861 // Avoid null check if we know obj is not null.
6862 if (instruction->MustDoNullCheck()) {
6863 __ testl(obj, obj);
6864 __ j(kEqual, &zero);
6865 }
6866
6867 switch (type_check_kind) {
6868 case TypeCheckKind::kExactCheck: {
6869 ReadBarrierOption read_barrier_option =
6870 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6871 // /* HeapReference<Class> */ out = obj->klass_
6872 GenerateReferenceLoadTwoRegisters(instruction,
6873 out_loc,
6874 obj_loc,
6875 class_offset,
6876 read_barrier_option);
6877 if (cls.IsRegister()) {
6878 __ cmpl(out, cls.AsRegister<CpuRegister>());
6879 } else {
6880 DCHECK(cls.IsStackSlot()) << cls;
6881 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6882 }
6883 if (zero.IsLinked()) {
6884 // Classes must be equal for the instanceof to succeed.
6885 __ j(kNotEqual, &zero);
6886 __ movl(out, Immediate(1));
6887 __ jmp(&done);
6888 } else {
6889 __ setcc(kEqual, out);
6890 // setcc only sets the low byte.
6891 __ andl(out, Immediate(1));
6892 }
6893 break;
6894 }
6895
6896 case TypeCheckKind::kAbstractClassCheck: {
6897 ReadBarrierOption read_barrier_option =
6898 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6899 // /* HeapReference<Class> */ out = obj->klass_
6900 GenerateReferenceLoadTwoRegisters(instruction,
6901 out_loc,
6902 obj_loc,
6903 class_offset,
6904 read_barrier_option);
6905 // If the class is abstract, we eagerly fetch the super class of the
6906 // object to avoid doing a comparison we know will fail.
6907 NearLabel loop, success;
6908 __ Bind(&loop);
6909 // /* HeapReference<Class> */ out = out->super_class_
6910 GenerateReferenceLoadOneRegister(instruction,
6911 out_loc,
6912 super_offset,
6913 maybe_temp_loc,
6914 read_barrier_option);
6915 __ testl(out, out);
6916 // If `out` is null, we use it for the result, and jump to `done`.
6917 __ j(kEqual, &done);
6918 if (cls.IsRegister()) {
6919 __ cmpl(out, cls.AsRegister<CpuRegister>());
6920 } else {
6921 DCHECK(cls.IsStackSlot()) << cls;
6922 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6923 }
6924 __ j(kNotEqual, &loop);
6925 __ movl(out, Immediate(1));
6926 if (zero.IsLinked()) {
6927 __ jmp(&done);
6928 }
6929 break;
6930 }
6931
6932 case TypeCheckKind::kClassHierarchyCheck: {
6933 ReadBarrierOption read_barrier_option =
6934 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6935 // /* HeapReference<Class> */ out = obj->klass_
6936 GenerateReferenceLoadTwoRegisters(instruction,
6937 out_loc,
6938 obj_loc,
6939 class_offset,
6940 read_barrier_option);
6941 // Walk over the class hierarchy to find a match.
6942 NearLabel loop, success;
6943 __ Bind(&loop);
6944 if (cls.IsRegister()) {
6945 __ cmpl(out, cls.AsRegister<CpuRegister>());
6946 } else {
6947 DCHECK(cls.IsStackSlot()) << cls;
6948 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6949 }
6950 __ j(kEqual, &success);
6951 // /* HeapReference<Class> */ out = out->super_class_
6952 GenerateReferenceLoadOneRegister(instruction,
6953 out_loc,
6954 super_offset,
6955 maybe_temp_loc,
6956 read_barrier_option);
6957 __ testl(out, out);
6958 __ j(kNotEqual, &loop);
6959 // If `out` is null, we use it for the result, and jump to `done`.
6960 __ jmp(&done);
6961 __ Bind(&success);
6962 __ movl(out, Immediate(1));
6963 if (zero.IsLinked()) {
6964 __ jmp(&done);
6965 }
6966 break;
6967 }
6968
6969 case TypeCheckKind::kArrayObjectCheck: {
6970 ReadBarrierOption read_barrier_option =
6971 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6972 // /* HeapReference<Class> */ out = obj->klass_
6973 GenerateReferenceLoadTwoRegisters(instruction,
6974 out_loc,
6975 obj_loc,
6976 class_offset,
6977 read_barrier_option);
6978 // Do an exact check.
6979 NearLabel exact_check;
6980 if (cls.IsRegister()) {
6981 __ cmpl(out, cls.AsRegister<CpuRegister>());
6982 } else {
6983 DCHECK(cls.IsStackSlot()) << cls;
6984 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6985 }
6986 __ j(kEqual, &exact_check);
6987 // Otherwise, we need to check that the object's class is a non-primitive array.
6988 // /* HeapReference<Class> */ out = out->component_type_
6989 GenerateReferenceLoadOneRegister(instruction,
6990 out_loc,
6991 component_offset,
6992 maybe_temp_loc,
6993 read_barrier_option);
6994 __ testl(out, out);
6995 // If `out` is null, we use it for the result, and jump to `done`.
6996 __ j(kEqual, &done);
6997 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6998 __ j(kNotEqual, &zero);
6999 __ Bind(&exact_check);
7000 __ movl(out, Immediate(1));
7001 __ jmp(&done);
7002 break;
7003 }
7004
7005 case TypeCheckKind::kArrayCheck: {
7006 // No read barrier since the slow path will retry upon failure.
7007 // /* HeapReference<Class> */ out = obj->klass_
7008 GenerateReferenceLoadTwoRegisters(instruction,
7009 out_loc,
7010 obj_loc,
7011 class_offset,
7012 kWithoutReadBarrier);
7013 if (cls.IsRegister()) {
7014 __ cmpl(out, cls.AsRegister<CpuRegister>());
7015 } else {
7016 DCHECK(cls.IsStackSlot()) << cls;
7017 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7018 }
7019 DCHECK(locations->OnlyCallsOnSlowPath());
7020 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7021 instruction, /* is_fatal= */ false);
7022 codegen_->AddSlowPath(slow_path);
7023 __ j(kNotEqual, slow_path->GetEntryLabel());
7024 __ movl(out, Immediate(1));
7025 if (zero.IsLinked()) {
7026 __ jmp(&done);
7027 }
7028 break;
7029 }
7030
7031 case TypeCheckKind::kUnresolvedCheck:
7032 case TypeCheckKind::kInterfaceCheck: {
7033 // Note that we indeed only call on slow path, but we always go
7034 // into the slow path for the unresolved and interface check
7035 // cases.
7036 //
7037 // We cannot directly call the InstanceofNonTrivial runtime
7038 // entry point without resorting to a type checking slow path
7039 // here (i.e. by calling InvokeRuntime directly), as it would
7040 // require to assign fixed registers for the inputs of this
7041 // HInstanceOf instruction (following the runtime calling
7042 // convention), which might be cluttered by the potential first
7043 // read barrier emission at the beginning of this method.
7044 //
7045 // TODO: Introduce a new runtime entry point taking the object
7046 // to test (instead of its class) as argument, and let it deal
7047 // with the read barrier issues. This will let us refactor this
7048 // case of the `switch` code as it was previously (with a direct
7049 // call to the runtime not using a type checking slow path).
7050 // This should also be beneficial for the other cases above.
7051 DCHECK(locations->OnlyCallsOnSlowPath());
7052 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7053 instruction, /* is_fatal= */ false);
7054 codegen_->AddSlowPath(slow_path);
7055 __ jmp(slow_path->GetEntryLabel());
7056 if (zero.IsLinked()) {
7057 __ jmp(&done);
7058 }
7059 break;
7060 }
7061
7062 case TypeCheckKind::kBitstringCheck: {
7063 // /* HeapReference<Class> */ temp = obj->klass_
7064 GenerateReferenceLoadTwoRegisters(instruction,
7065 out_loc,
7066 obj_loc,
7067 class_offset,
7068 kWithoutReadBarrier);
7069
7070 GenerateBitstringTypeCheckCompare(instruction, out);
7071 if (zero.IsLinked()) {
7072 __ j(kNotEqual, &zero);
7073 __ movl(out, Immediate(1));
7074 __ jmp(&done);
7075 } else {
7076 __ setcc(kEqual, out);
7077 // setcc only sets the low byte.
7078 __ andl(out, Immediate(1));
7079 }
7080 break;
7081 }
7082 }
7083
7084 if (zero.IsLinked()) {
7085 __ Bind(&zero);
7086 __ xorl(out, out);
7087 }
7088
7089 if (done.IsLinked()) {
7090 __ Bind(&done);
7091 }
7092
7093 if (slow_path != nullptr) {
7094 __ Bind(slow_path->GetExitLabel());
7095 }
7096 }
7097
VisitCheckCast(HCheckCast * instruction)7098 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
7099 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7100 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
7101 LocationSummary* locations =
7102 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7103 locations->SetInAt(0, Location::RequiresRegister());
7104 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7105 // Require a register for the interface check since there is a loop that compares the class to
7106 // a memory address.
7107 locations->SetInAt(1, Location::RequiresRegister());
7108 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7109 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7110 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7111 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7112 } else {
7113 locations->SetInAt(1, Location::Any());
7114 }
7115 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7116 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7117 }
7118
VisitCheckCast(HCheckCast * instruction)7119 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
7120 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7121 LocationSummary* locations = instruction->GetLocations();
7122 Location obj_loc = locations->InAt(0);
7123 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7124 Location cls = locations->InAt(1);
7125 Location temp_loc = locations->GetTemp(0);
7126 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
7127 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7128 DCHECK_GE(num_temps, 1u);
7129 DCHECK_LE(num_temps, 2u);
7130 Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
7131 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7132 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7133 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7134 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7135 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7136 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7137 const uint32_t object_array_data_offset =
7138 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7139
7140 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7141 SlowPathCode* type_check_slow_path =
7142 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7143 instruction, is_type_check_slow_path_fatal);
7144 codegen_->AddSlowPath(type_check_slow_path);
7145
7146
7147 NearLabel done;
7148 // Avoid null check if we know obj is not null.
7149 if (instruction->MustDoNullCheck()) {
7150 __ testl(obj, obj);
7151 __ j(kEqual, &done);
7152 }
7153
7154 switch (type_check_kind) {
7155 case TypeCheckKind::kExactCheck:
7156 case TypeCheckKind::kArrayCheck: {
7157 // /* HeapReference<Class> */ temp = obj->klass_
7158 GenerateReferenceLoadTwoRegisters(instruction,
7159 temp_loc,
7160 obj_loc,
7161 class_offset,
7162 kWithoutReadBarrier);
7163 if (cls.IsRegister()) {
7164 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7165 } else {
7166 DCHECK(cls.IsStackSlot()) << cls;
7167 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7168 }
7169 // Jump to slow path for throwing the exception or doing a
7170 // more involved array check.
7171 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7172 break;
7173 }
7174
7175 case TypeCheckKind::kAbstractClassCheck: {
7176 // /* HeapReference<Class> */ temp = obj->klass_
7177 GenerateReferenceLoadTwoRegisters(instruction,
7178 temp_loc,
7179 obj_loc,
7180 class_offset,
7181 kWithoutReadBarrier);
7182 // If the class is abstract, we eagerly fetch the super class of the
7183 // object to avoid doing a comparison we know will fail.
7184 NearLabel loop;
7185 __ Bind(&loop);
7186 // /* HeapReference<Class> */ temp = temp->super_class_
7187 GenerateReferenceLoadOneRegister(instruction,
7188 temp_loc,
7189 super_offset,
7190 maybe_temp2_loc,
7191 kWithoutReadBarrier);
7192
7193 // If the class reference currently in `temp` is null, jump to the slow path to throw the
7194 // exception.
7195 __ testl(temp, temp);
7196 // Otherwise, compare the classes.
7197 __ j(kZero, type_check_slow_path->GetEntryLabel());
7198 if (cls.IsRegister()) {
7199 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7200 } else {
7201 DCHECK(cls.IsStackSlot()) << cls;
7202 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7203 }
7204 __ j(kNotEqual, &loop);
7205 break;
7206 }
7207
7208 case TypeCheckKind::kClassHierarchyCheck: {
7209 // /* HeapReference<Class> */ temp = obj->klass_
7210 GenerateReferenceLoadTwoRegisters(instruction,
7211 temp_loc,
7212 obj_loc,
7213 class_offset,
7214 kWithoutReadBarrier);
7215 // Walk over the class hierarchy to find a match.
7216 NearLabel loop;
7217 __ Bind(&loop);
7218 if (cls.IsRegister()) {
7219 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7220 } else {
7221 DCHECK(cls.IsStackSlot()) << cls;
7222 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7223 }
7224 __ j(kEqual, &done);
7225
7226 // /* HeapReference<Class> */ temp = temp->super_class_
7227 GenerateReferenceLoadOneRegister(instruction,
7228 temp_loc,
7229 super_offset,
7230 maybe_temp2_loc,
7231 kWithoutReadBarrier);
7232
7233 // If the class reference currently in `temp` is not null, jump
7234 // back at the beginning of the loop.
7235 __ testl(temp, temp);
7236 __ j(kNotZero, &loop);
7237 // Otherwise, jump to the slow path to throw the exception.
7238 __ jmp(type_check_slow_path->GetEntryLabel());
7239 break;
7240 }
7241
7242 case TypeCheckKind::kArrayObjectCheck: {
7243 // /* HeapReference<Class> */ temp = obj->klass_
7244 GenerateReferenceLoadTwoRegisters(instruction,
7245 temp_loc,
7246 obj_loc,
7247 class_offset,
7248 kWithoutReadBarrier);
7249 // Do an exact check.
7250 NearLabel check_non_primitive_component_type;
7251 if (cls.IsRegister()) {
7252 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7253 } else {
7254 DCHECK(cls.IsStackSlot()) << cls;
7255 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7256 }
7257 __ j(kEqual, &done);
7258
7259 // Otherwise, we need to check that the object's class is a non-primitive array.
7260 // /* HeapReference<Class> */ temp = temp->component_type_
7261 GenerateReferenceLoadOneRegister(instruction,
7262 temp_loc,
7263 component_offset,
7264 maybe_temp2_loc,
7265 kWithoutReadBarrier);
7266
7267 // If the component type is not null (i.e. the object is indeed
7268 // an array), jump to label `check_non_primitive_component_type`
7269 // to further check that this component type is not a primitive
7270 // type.
7271 __ testl(temp, temp);
7272 // Otherwise, jump to the slow path to throw the exception.
7273 __ j(kZero, type_check_slow_path->GetEntryLabel());
7274 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7275 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7276 break;
7277 }
7278
7279 case TypeCheckKind::kUnresolvedCheck: {
7280 // We always go into the type check slow path for the unresolved case.
7281 //
7282 // We cannot directly call the CheckCast runtime entry point
7283 // without resorting to a type checking slow path here (i.e. by
7284 // calling InvokeRuntime directly), as it would require to
7285 // assign fixed registers for the inputs of this HInstanceOf
7286 // instruction (following the runtime calling convention), which
7287 // might be cluttered by the potential first read barrier
7288 // emission at the beginning of this method.
7289 __ jmp(type_check_slow_path->GetEntryLabel());
7290 break;
7291 }
7292
7293 case TypeCheckKind::kInterfaceCheck: {
7294 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7295 // We can not get false positives by doing this.
7296 // /* HeapReference<Class> */ temp = obj->klass_
7297 GenerateReferenceLoadTwoRegisters(instruction,
7298 temp_loc,
7299 obj_loc,
7300 class_offset,
7301 kWithoutReadBarrier);
7302
7303 // /* HeapReference<Class> */ temp = temp->iftable_
7304 GenerateReferenceLoadTwoRegisters(instruction,
7305 temp_loc,
7306 temp_loc,
7307 iftable_offset,
7308 kWithoutReadBarrier);
7309 // Iftable is never null.
7310 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
7311 // Maybe poison the `cls` for direct comparison with memory.
7312 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7313 // Loop through the iftable and check if any class matches.
7314 NearLabel start_loop;
7315 __ Bind(&start_loop);
7316 // Need to subtract first to handle the empty array case.
7317 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
7318 __ j(kNegative, type_check_slow_path->GetEntryLabel());
7319 // Go to next interface if the classes do not match.
7320 __ cmpl(cls.AsRegister<CpuRegister>(),
7321 CodeGeneratorX86_64::ArrayAddress(temp,
7322 maybe_temp2_loc,
7323 TIMES_4,
7324 object_array_data_offset));
7325 __ j(kNotEqual, &start_loop); // Return if same class.
7326 // If `cls` was poisoned above, unpoison it.
7327 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7328 break;
7329 }
7330
7331 case TypeCheckKind::kBitstringCheck: {
7332 // /* HeapReference<Class> */ temp = obj->klass_
7333 GenerateReferenceLoadTwoRegisters(instruction,
7334 temp_loc,
7335 obj_loc,
7336 class_offset,
7337 kWithoutReadBarrier);
7338
7339 GenerateBitstringTypeCheckCompare(instruction, temp);
7340 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7341 break;
7342 }
7343 }
7344
7345 if (done.IsLinked()) {
7346 __ Bind(&done);
7347 }
7348
7349 __ Bind(type_check_slow_path->GetExitLabel());
7350 }
7351
VisitMonitorOperation(HMonitorOperation * instruction)7352 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7353 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7354 instruction, LocationSummary::kCallOnMainOnly);
7355 InvokeRuntimeCallingConvention calling_convention;
7356 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7357 }
7358
VisitMonitorOperation(HMonitorOperation * instruction)7359 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7360 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
7361 instruction,
7362 instruction->GetDexPc());
7363 if (instruction->IsEnter()) {
7364 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7365 } else {
7366 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7367 }
7368 }
7369
VisitX86AndNot(HX86AndNot * instruction)7370 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7371 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7372 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7373 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7374 locations->SetInAt(0, Location::RequiresRegister());
7375 // There is no immediate variant of negated bitwise and in X86.
7376 locations->SetInAt(1, Location::RequiresRegister());
7377 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7378 }
7379
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7380 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7381 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7382 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7383 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7384 locations->SetInAt(0, Location::RequiresRegister());
7385 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7386 }
7387
VisitX86AndNot(HX86AndNot * instruction)7388 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7389 LocationSummary* locations = instruction->GetLocations();
7390 Location first = locations->InAt(0);
7391 Location second = locations->InAt(1);
7392 Location dest = locations->Out();
7393 __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7394 }
7395
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7396 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7397 LocationSummary* locations = instruction->GetLocations();
7398 Location src = locations->InAt(0);
7399 Location dest = locations->Out();
7400 switch (instruction->GetOpKind()) {
7401 case HInstruction::kAnd:
7402 __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7403 break;
7404 case HInstruction::kXor:
7405 __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7406 break;
7407 default:
7408 LOG(FATAL) << "Unreachable";
7409 }
7410 }
7411
VisitAnd(HAnd * instruction)7412 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7413 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7414 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7415
HandleBitwiseOperation(HBinaryOperation * instruction)7416 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7417 LocationSummary* locations =
7418 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7419 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7420 || instruction->GetResultType() == DataType::Type::kInt64);
7421 locations->SetInAt(0, Location::RequiresRegister());
7422 locations->SetInAt(1, Location::Any());
7423 locations->SetOut(Location::SameAsFirstInput());
7424 }
7425
VisitAnd(HAnd * instruction)7426 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
7427 HandleBitwiseOperation(instruction);
7428 }
7429
VisitOr(HOr * instruction)7430 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
7431 HandleBitwiseOperation(instruction);
7432 }
7433
VisitXor(HXor * instruction)7434 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
7435 HandleBitwiseOperation(instruction);
7436 }
7437
HandleBitwiseOperation(HBinaryOperation * instruction)7438 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7439 LocationSummary* locations = instruction->GetLocations();
7440 Location first = locations->InAt(0);
7441 Location second = locations->InAt(1);
7442 DCHECK(first.Equals(locations->Out()));
7443
7444 if (instruction->GetResultType() == DataType::Type::kInt32) {
7445 if (second.IsRegister()) {
7446 if (instruction->IsAnd()) {
7447 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7448 } else if (instruction->IsOr()) {
7449 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7450 } else {
7451 DCHECK(instruction->IsXor());
7452 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7453 }
7454 } else if (second.IsConstant()) {
7455 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
7456 if (instruction->IsAnd()) {
7457 __ andl(first.AsRegister<CpuRegister>(), imm);
7458 } else if (instruction->IsOr()) {
7459 __ orl(first.AsRegister<CpuRegister>(), imm);
7460 } else {
7461 DCHECK(instruction->IsXor());
7462 __ xorl(first.AsRegister<CpuRegister>(), imm);
7463 }
7464 } else {
7465 Address address(CpuRegister(RSP), second.GetStackIndex());
7466 if (instruction->IsAnd()) {
7467 __ andl(first.AsRegister<CpuRegister>(), address);
7468 } else if (instruction->IsOr()) {
7469 __ orl(first.AsRegister<CpuRegister>(), address);
7470 } else {
7471 DCHECK(instruction->IsXor());
7472 __ xorl(first.AsRegister<CpuRegister>(), address);
7473 }
7474 }
7475 } else {
7476 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7477 CpuRegister first_reg = first.AsRegister<CpuRegister>();
7478 bool second_is_constant = false;
7479 int64_t value = 0;
7480 if (second.IsConstant()) {
7481 second_is_constant = true;
7482 value = second.GetConstant()->AsLongConstant()->GetValue();
7483 }
7484 bool is_int32_value = IsInt<32>(value);
7485
7486 if (instruction->IsAnd()) {
7487 if (second_is_constant) {
7488 if (is_int32_value) {
7489 __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7490 } else {
7491 __ andq(first_reg, codegen_->LiteralInt64Address(value));
7492 }
7493 } else if (second.IsDoubleStackSlot()) {
7494 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7495 } else {
7496 __ andq(first_reg, second.AsRegister<CpuRegister>());
7497 }
7498 } else if (instruction->IsOr()) {
7499 if (second_is_constant) {
7500 if (is_int32_value) {
7501 __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7502 } else {
7503 __ orq(first_reg, codegen_->LiteralInt64Address(value));
7504 }
7505 } else if (second.IsDoubleStackSlot()) {
7506 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7507 } else {
7508 __ orq(first_reg, second.AsRegister<CpuRegister>());
7509 }
7510 } else {
7511 DCHECK(instruction->IsXor());
7512 if (second_is_constant) {
7513 if (is_int32_value) {
7514 __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7515 } else {
7516 __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7517 }
7518 } else if (second.IsDoubleStackSlot()) {
7519 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7520 } else {
7521 __ xorq(first_reg, second.AsRegister<CpuRegister>());
7522 }
7523 }
7524 }
7525 }
7526
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7527 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7528 HInstruction* instruction,
7529 Location out,
7530 uint32_t offset,
7531 Location maybe_temp,
7532 ReadBarrierOption read_barrier_option) {
7533 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7534 if (read_barrier_option == kWithReadBarrier) {
7535 CHECK(gUseReadBarrier);
7536 if (kUseBakerReadBarrier) {
7537 // Load with fast path based Baker's read barrier.
7538 // /* HeapReference<Object> */ out = *(out + offset)
7539 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7540 instruction, out, out_reg, offset, /* needs_null_check= */ false);
7541 } else {
7542 // Load with slow path based read barrier.
7543 // Save the value of `out` into `maybe_temp` before overwriting it
7544 // in the following move operation, as we will need it for the
7545 // read barrier below.
7546 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7547 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7548 // /* HeapReference<Object> */ out = *(out + offset)
7549 __ movl(out_reg, Address(out_reg, offset));
7550 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7551 }
7552 } else {
7553 // Plain load with no read barrier.
7554 // /* HeapReference<Object> */ out = *(out + offset)
7555 __ movl(out_reg, Address(out_reg, offset));
7556 __ MaybeUnpoisonHeapReference(out_reg);
7557 }
7558 }
7559
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7560 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7561 HInstruction* instruction,
7562 Location out,
7563 Location obj,
7564 uint32_t offset,
7565 ReadBarrierOption read_barrier_option) {
7566 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7567 CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7568 if (read_barrier_option == kWithReadBarrier) {
7569 CHECK(gUseReadBarrier);
7570 if (kUseBakerReadBarrier) {
7571 // Load with fast path based Baker's read barrier.
7572 // /* HeapReference<Object> */ out = *(obj + offset)
7573 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7574 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7575 } else {
7576 // Load with slow path based read barrier.
7577 // /* HeapReference<Object> */ out = *(obj + offset)
7578 __ movl(out_reg, Address(obj_reg, offset));
7579 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7580 }
7581 } else {
7582 // Plain load with no read barrier.
7583 // /* HeapReference<Object> */ out = *(obj + offset)
7584 __ movl(out_reg, Address(obj_reg, offset));
7585 __ MaybeUnpoisonHeapReference(out_reg);
7586 }
7587 }
7588
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7589 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7590 HInstruction* instruction,
7591 Location root,
7592 const Address& address,
7593 Label* fixup_label,
7594 ReadBarrierOption read_barrier_option) {
7595 CpuRegister root_reg = root.AsRegister<CpuRegister>();
7596 if (read_barrier_option == kWithReadBarrier) {
7597 DCHECK(gUseReadBarrier);
7598 if (kUseBakerReadBarrier) {
7599 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7600 // Baker's read barrier are used:
7601 //
7602 // root = obj.field;
7603 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7604 // if (temp != null) {
7605 // root = temp(root)
7606 // }
7607
7608 // /* GcRoot<mirror::Object> */ root = *address
7609 __ movl(root_reg, address);
7610 if (fixup_label != nullptr) {
7611 __ Bind(fixup_label);
7612 }
7613 static_assert(
7614 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7615 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7616 "have different sizes.");
7617 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7618 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7619 "have different sizes.");
7620
7621 // Slow path marking the GC root `root`.
7622 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7623 instruction, root, /* unpoison_ref_before_marking= */ false);
7624 codegen_->AddSlowPath(slow_path);
7625
7626 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
7627 const int32_t entry_point_offset =
7628 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
7629 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
7630 // The entrypoint is null when the GC is not marking.
7631 __ j(kNotEqual, slow_path->GetEntryLabel());
7632 __ Bind(slow_path->GetExitLabel());
7633 } else {
7634 // GC root loaded through a slow path for read barriers other
7635 // than Baker's.
7636 // /* GcRoot<mirror::Object>* */ root = address
7637 __ leaq(root_reg, address);
7638 if (fixup_label != nullptr) {
7639 __ Bind(fixup_label);
7640 }
7641 // /* mirror::Object* */ root = root->Read()
7642 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7643 }
7644 } else {
7645 // Plain GC root load with no read barrier.
7646 // /* GcRoot<mirror::Object> */ root = *address
7647 __ movl(root_reg, address);
7648 if (fixup_label != nullptr) {
7649 __ Bind(fixup_label);
7650 }
7651 // Note that GC roots are not affected by heap poisoning, thus we
7652 // do not have to unpoison `root_reg` here.
7653 }
7654 }
7655
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)7656 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7657 Location ref,
7658 CpuRegister obj,
7659 uint32_t offset,
7660 bool needs_null_check) {
7661 DCHECK(gUseReadBarrier);
7662 DCHECK(kUseBakerReadBarrier);
7663
7664 // /* HeapReference<Object> */ ref = *(obj + offset)
7665 Address src(obj, offset);
7666 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7667 }
7668
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)7669 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7670 Location ref,
7671 CpuRegister obj,
7672 uint32_t data_offset,
7673 Location index,
7674 bool needs_null_check) {
7675 DCHECK(gUseReadBarrier);
7676 DCHECK(kUseBakerReadBarrier);
7677
7678 static_assert(
7679 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7680 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7681 // /* HeapReference<Object> */ ref =
7682 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
7683 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
7684 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7685 }
7686
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)7687 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7688 Location ref,
7689 CpuRegister obj,
7690 const Address& src,
7691 bool needs_null_check,
7692 bool always_update_field,
7693 CpuRegister* temp1,
7694 CpuRegister* temp2) {
7695 DCHECK(gUseReadBarrier);
7696 DCHECK(kUseBakerReadBarrier);
7697
7698 // In slow path based read barriers, the read barrier call is
7699 // inserted after the original load. However, in fast path based
7700 // Baker's read barriers, we need to perform the load of
7701 // mirror::Object::monitor_ *before* the original reference load.
7702 // This load-load ordering is required by the read barrier.
7703 // The fast path/slow path (for Baker's algorithm) should look like:
7704 //
7705 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7706 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
7707 // HeapReference<Object> ref = *src; // Original reference load.
7708 // bool is_gray = (rb_state == ReadBarrier::GrayState());
7709 // if (is_gray) {
7710 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
7711 // }
7712 //
7713 // Note: the original implementation in ReadBarrier::Barrier is
7714 // slightly more complex as:
7715 // - it implements the load-load fence using a data dependency on
7716 // the high-bits of rb_state, which are expected to be all zeroes
7717 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
7718 // here, which is a no-op thanks to the x86-64 memory model);
7719 // - it performs additional checks that we do not do here for
7720 // performance reasons.
7721
7722 CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
7723 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7724
7725 // Given the numeric representation, it's enough to check the low bit of the rb_state.
7726 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7727 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7728 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7729 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7730 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7731
7732 // if (rb_state == ReadBarrier::GrayState())
7733 // ref = ReadBarrier::Mark(ref);
7734 // At this point, just do the "if" and make sure that flags are preserved until the branch.
7735 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7736 if (needs_null_check) {
7737 MaybeRecordImplicitNullCheck(instruction);
7738 }
7739
7740 // Load fence to prevent load-load reordering.
7741 // Note that this is a no-op, thanks to the x86-64 memory model.
7742 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7743
7744 // The actual reference load.
7745 // /* HeapReference<Object> */ ref = *src
7746 __ movl(ref_reg, src); // Flags are unaffected.
7747
7748 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7749 // Slow path marking the object `ref` when it is gray.
7750 SlowPathCode* slow_path;
7751 if (always_update_field) {
7752 DCHECK(temp1 != nullptr);
7753 DCHECK(temp2 != nullptr);
7754 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
7755 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
7756 } else {
7757 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7758 instruction, ref, /* unpoison_ref_before_marking= */ true);
7759 }
7760 AddSlowPath(slow_path);
7761
7762 // We have done the "if" of the gray bit check above, now branch based on the flags.
7763 __ j(kNotZero, slow_path->GetEntryLabel());
7764
7765 // Object* ref = ref_addr->AsMirrorPtr()
7766 __ MaybeUnpoisonHeapReference(ref_reg);
7767
7768 __ Bind(slow_path->GetExitLabel());
7769 }
7770
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7771 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
7772 Location out,
7773 Location ref,
7774 Location obj,
7775 uint32_t offset,
7776 Location index) {
7777 DCHECK(gUseReadBarrier);
7778
7779 // Insert a slow path based read barrier *after* the reference load.
7780 //
7781 // If heap poisoning is enabled, the unpoisoning of the loaded
7782 // reference will be carried out by the runtime within the slow
7783 // path.
7784 //
7785 // Note that `ref` currently does not get unpoisoned (when heap
7786 // poisoning is enabled), which is alright as the `ref` argument is
7787 // not used by the artReadBarrierSlow entry point.
7788 //
7789 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7790 SlowPathCode* slow_path = new (GetScopedAllocator())
7791 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
7792 AddSlowPath(slow_path);
7793
7794 __ jmp(slow_path->GetEntryLabel());
7795 __ Bind(slow_path->GetExitLabel());
7796 }
7797
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7798 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7799 Location out,
7800 Location ref,
7801 Location obj,
7802 uint32_t offset,
7803 Location index) {
7804 if (gUseReadBarrier) {
7805 // Baker's read barriers shall be handled by the fast path
7806 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
7807 DCHECK(!kUseBakerReadBarrier);
7808 // If heap poisoning is enabled, unpoisoning will be taken care of
7809 // by the runtime within the slow path.
7810 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7811 } else if (kPoisonHeapReferences) {
7812 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
7813 }
7814 }
7815
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7816 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7817 Location out,
7818 Location root) {
7819 DCHECK(gUseReadBarrier);
7820
7821 // Insert a slow path based read barrier *after* the GC root load.
7822 //
7823 // Note that GC roots are not affected by heap poisoning, so we do
7824 // not need to do anything special for this here.
7825 SlowPathCode* slow_path =
7826 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
7827 AddSlowPath(slow_path);
7828
7829 __ jmp(slow_path->GetEntryLabel());
7830 __ Bind(slow_path->GetExitLabel());
7831 }
7832
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7833 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7834 // Nothing to do, this should be removed during prepare for register allocator.
7835 LOG(FATAL) << "Unreachable";
7836 }
7837
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7838 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7839 // Nothing to do, this should be removed during prepare for register allocator.
7840 LOG(FATAL) << "Unreachable";
7841 }
7842
7843 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7844 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7845 LocationSummary* locations =
7846 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7847 locations->SetInAt(0, Location::RequiresRegister());
7848 locations->AddTemp(Location::RequiresRegister());
7849 locations->AddTemp(Location::RequiresRegister());
7850 }
7851
VisitPackedSwitch(HPackedSwitch * switch_instr)7852 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7853 int32_t lower_bound = switch_instr->GetStartValue();
7854 uint32_t num_entries = switch_instr->GetNumEntries();
7855 LocationSummary* locations = switch_instr->GetLocations();
7856 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
7857 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
7858 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
7859 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7860
7861 // Should we generate smaller inline compare/jumps?
7862 if (num_entries <= kPackedSwitchJumpTableThreshold) {
7863 // Figure out the correct compare values and jump conditions.
7864 // Handle the first compare/branch as a special case because it might
7865 // jump to the default case.
7866 DCHECK_GT(num_entries, 2u);
7867 Condition first_condition;
7868 uint32_t index;
7869 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
7870 if (lower_bound != 0) {
7871 first_condition = kLess;
7872 __ cmpl(value_reg_in, Immediate(lower_bound));
7873 __ j(first_condition, codegen_->GetLabelOf(default_block));
7874 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7875
7876 index = 1;
7877 } else {
7878 // Handle all the compare/jumps below.
7879 first_condition = kBelow;
7880 index = 0;
7881 }
7882
7883 // Handle the rest of the compare/jumps.
7884 for (; index + 1 < num_entries; index += 2) {
7885 int32_t compare_to_value = lower_bound + index + 1;
7886 __ cmpl(value_reg_in, Immediate(compare_to_value));
7887 // Jump to successors[index] if value < case_value[index].
7888 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7889 // Jump to successors[index + 1] if value == case_value[index + 1].
7890 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7891 }
7892
7893 if (index != num_entries) {
7894 // There are an odd number of entries. Handle the last one.
7895 DCHECK_EQ(index + 1, num_entries);
7896 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
7897 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7898 }
7899
7900 // And the default for any other value.
7901 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
7902 __ jmp(codegen_->GetLabelOf(default_block));
7903 }
7904 return;
7905 }
7906
7907 // Remove the bias, if needed.
7908 Register value_reg_out = value_reg_in.AsRegister();
7909 if (lower_bound != 0) {
7910 __ leal(temp_reg, Address(value_reg_in, -lower_bound));
7911 value_reg_out = temp_reg.AsRegister();
7912 }
7913 CpuRegister value_reg(value_reg_out);
7914
7915 // Is the value in range?
7916 __ cmpl(value_reg, Immediate(num_entries - 1));
7917 __ j(kAbove, codegen_->GetLabelOf(default_block));
7918
7919 // We are in the range of the table.
7920 // Load the address of the jump table in the constant area.
7921 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
7922
7923 // Load the (signed) offset from the jump table.
7924 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
7925
7926 // Add the offset to the address of the table base.
7927 __ addq(temp_reg, base_reg);
7928
7929 // And jump.
7930 __ jmp(temp_reg);
7931 }
7932
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7933 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7934 ATTRIBUTE_UNUSED) {
7935 LOG(FATAL) << "Unreachable";
7936 }
7937
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7938 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7939 ATTRIBUTE_UNUSED) {
7940 LOG(FATAL) << "Unreachable";
7941 }
7942
Load32BitValue(CpuRegister dest,int32_t value)7943 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
7944 if (value == 0) {
7945 __ xorl(dest, dest);
7946 } else {
7947 __ movl(dest, Immediate(value));
7948 }
7949 }
7950
Load64BitValue(CpuRegister dest,int64_t value)7951 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
7952 if (value == 0) {
7953 // Clears upper bits too.
7954 __ xorl(dest, dest);
7955 } else if (IsUint<32>(value)) {
7956 // We can use a 32 bit move, as it will zero-extend and is shorter.
7957 __ movl(dest, Immediate(static_cast<int32_t>(value)));
7958 } else {
7959 __ movq(dest, Immediate(value));
7960 }
7961 }
7962
Load32BitValue(XmmRegister dest,int32_t value)7963 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
7964 if (value == 0) {
7965 __ xorps(dest, dest);
7966 } else {
7967 __ movss(dest, LiteralInt32Address(value));
7968 }
7969 }
7970
Load64BitValue(XmmRegister dest,int64_t value)7971 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
7972 if (value == 0) {
7973 __ xorpd(dest, dest);
7974 } else {
7975 __ movsd(dest, LiteralInt64Address(value));
7976 }
7977 }
7978
Load32BitValue(XmmRegister dest,float value)7979 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
7980 Load32BitValue(dest, bit_cast<int32_t, float>(value));
7981 }
7982
Load64BitValue(XmmRegister dest,double value)7983 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
7984 Load64BitValue(dest, bit_cast<int64_t, double>(value));
7985 }
7986
Compare32BitValue(CpuRegister dest,int32_t value)7987 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
7988 if (value == 0) {
7989 __ testl(dest, dest);
7990 } else {
7991 __ cmpl(dest, Immediate(value));
7992 }
7993 }
7994
Compare64BitValue(CpuRegister dest,int64_t value)7995 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
7996 if (IsInt<32>(value)) {
7997 if (value == 0) {
7998 __ testq(dest, dest);
7999 } else {
8000 __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
8001 }
8002 } else {
8003 // Value won't fit in an int.
8004 __ cmpq(dest, LiteralInt64Address(value));
8005 }
8006 }
8007
GenerateIntCompare(Location lhs,Location rhs)8008 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
8009 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8010 GenerateIntCompare(lhs_reg, rhs);
8011 }
8012
GenerateIntCompare(CpuRegister lhs,Location rhs)8013 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
8014 if (rhs.IsConstant()) {
8015 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8016 Compare32BitValue(lhs, value);
8017 } else if (rhs.IsStackSlot()) {
8018 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8019 } else {
8020 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
8021 }
8022 }
8023
GenerateLongCompare(Location lhs,Location rhs)8024 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
8025 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8026 if (rhs.IsConstant()) {
8027 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
8028 Compare64BitValue(lhs_reg, value);
8029 } else if (rhs.IsDoubleStackSlot()) {
8030 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8031 } else {
8032 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
8033 }
8034 }
8035
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)8036 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
8037 Location index,
8038 ScaleFactor scale,
8039 uint32_t data_offset) {
8040 return index.IsConstant() ?
8041 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
8042 Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
8043 }
8044
Store64BitValueToStack(Location dest,int64_t value)8045 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
8046 DCHECK(dest.IsDoubleStackSlot());
8047 if (IsInt<32>(value)) {
8048 // Can move directly as an int32 constant.
8049 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
8050 Immediate(static_cast<int32_t>(value)));
8051 } else {
8052 Load64BitValue(CpuRegister(TMP), value);
8053 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
8054 }
8055 }
8056
8057 /**
8058 * Class to handle late fixup of offsets into constant area.
8059 */
8060 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8061 public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)8062 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
8063 : codegen_(&codegen), offset_into_constant_area_(offset) {}
8064
8065 protected:
SetOffset(size_t offset)8066 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8067
8068 CodeGeneratorX86_64* codegen_;
8069
8070 private:
Process(const MemoryRegion & region,int pos)8071 void Process(const MemoryRegion& region, int pos) override {
8072 // Patch the correct offset for the instruction. We use the address of the
8073 // 'next' instruction, which is 'pos' (patch the 4 bytes before).
8074 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8075 int32_t relative_position = constant_offset - pos;
8076
8077 // Patch in the right value.
8078 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8079 }
8080
8081 // Location in constant area that the fixup refers to.
8082 size_t offset_into_constant_area_;
8083 };
8084
8085 /**
8086 t * Class to handle late fixup of offsets to a jump table that will be created in the
8087 * constant area.
8088 */
8089 class JumpTableRIPFixup : public RIPFixup {
8090 public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)8091 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
8092 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
8093
CreateJumpTable()8094 void CreateJumpTable() {
8095 X86_64Assembler* assembler = codegen_->GetAssembler();
8096
8097 // Ensure that the reference to the jump table has the correct offset.
8098 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8099 SetOffset(offset_in_constant_table);
8100
8101 // Compute the offset from the start of the function to this jump table.
8102 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
8103
8104 // Populate the jump table with the correct values for the jump table.
8105 int32_t num_entries = switch_instr_->GetNumEntries();
8106 HBasicBlock* block = switch_instr_->GetBlock();
8107 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8108 // The value that we want is the target offset - the position of the table.
8109 for (int32_t i = 0; i < num_entries; i++) {
8110 HBasicBlock* b = successors[i];
8111 Label* l = codegen_->GetLabelOf(b);
8112 DCHECK(l->IsBound());
8113 int32_t offset_to_block = l->Position() - current_table_offset;
8114 assembler->AppendInt32(offset_to_block);
8115 }
8116 }
8117
8118 private:
8119 const HPackedSwitch* switch_instr_;
8120 };
8121
Finalize(CodeAllocator * allocator)8122 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
8123 // Generate the constant area if needed.
8124 X86_64Assembler* assembler = GetAssembler();
8125 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8126 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
8127 assembler->Align(4, 0);
8128 constant_area_start_ = assembler->CodeSize();
8129
8130 // Populate any jump tables.
8131 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8132 jump_table->CreateJumpTable();
8133 }
8134
8135 // And now add the constant area to the generated code.
8136 assembler->AddConstantArea();
8137 }
8138
8139 // And finish up.
8140 CodeGenerator::Finalize(allocator);
8141 }
8142
LiteralDoubleAddress(double v)8143 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
8144 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
8145 return Address::RIP(fixup);
8146 }
8147
LiteralFloatAddress(float v)8148 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
8149 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
8150 return Address::RIP(fixup);
8151 }
8152
LiteralInt32Address(int32_t v)8153 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
8154 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
8155 return Address::RIP(fixup);
8156 }
8157
LiteralInt64Address(int64_t v)8158 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
8159 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
8160 return Address::RIP(fixup);
8161 }
8162
8163 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)8164 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
8165 if (!trg.IsValid()) {
8166 DCHECK_EQ(type, DataType::Type::kVoid);
8167 return;
8168 }
8169
8170 DCHECK_NE(type, DataType::Type::kVoid);
8171
8172 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
8173 if (trg.Equals(return_loc)) {
8174 return;
8175 }
8176
8177 // Let the parallel move resolver take care of all of this.
8178 HParallelMove parallel_move(GetGraph()->GetAllocator());
8179 parallel_move.AddMove(return_loc, trg, type, nullptr);
8180 GetMoveResolver()->EmitNativeCode(¶llel_move);
8181 }
8182
LiteralCaseTable(HPackedSwitch * switch_instr)8183 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
8184 // Create a fixup to be used to create and address the jump table.
8185 JumpTableRIPFixup* table_fixup =
8186 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8187
8188 // We have to populate the jump tables.
8189 fixups_to_jump_tables_.push_back(table_fixup);
8190 return Address::RIP(table_fixup);
8191 }
8192
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)8193 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
8194 const Address& addr_high,
8195 int64_t v,
8196 HInstruction* instruction) {
8197 if (IsInt<32>(v)) {
8198 int32_t v_32 = v;
8199 __ movq(addr_low, Immediate(v_32));
8200 MaybeRecordImplicitNullCheck(instruction);
8201 } else {
8202 // Didn't fit in a register. Do it in pieces.
8203 int32_t low_v = Low32Bits(v);
8204 int32_t high_v = High32Bits(v);
8205 __ movl(addr_low, Immediate(low_v));
8206 MaybeRecordImplicitNullCheck(instruction);
8207 __ movl(addr_high, Immediate(high_v));
8208 }
8209 }
8210
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8211 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
8212 const uint8_t* roots_data,
8213 const PatchInfo<Label>& info,
8214 uint64_t index_in_table) const {
8215 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8216 uintptr_t address =
8217 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8218 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8219 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8220 dchecked_integral_cast<uint32_t>(address);
8221 }
8222
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8223 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8224 for (const PatchInfo<Label>& info : jit_string_patches_) {
8225 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8226 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8227 PatchJitRootUse(code, roots_data, info, index_in_table);
8228 }
8229
8230 for (const PatchInfo<Label>& info : jit_class_patches_) {
8231 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8232 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8233 PatchJitRootUse(code, roots_data, info, index_in_table);
8234 }
8235 }
8236
CpuHasAvxFeatureFlag()8237 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
8238 return codegen_->GetInstructionSetFeatures().HasAVX();
8239 }
8240
CpuHasAvx2FeatureFlag()8241 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
8242 return codegen_->GetInstructionSetFeatures().HasAVX2();
8243 }
8244
CpuHasAvxFeatureFlag()8245 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
8246 return codegen_->GetInstructionSetFeatures().HasAVX();
8247 }
8248
CpuHasAvx2FeatureFlag()8249 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
8250 return codegen_->GetInstructionSetFeatures().HasAVX2();
8251 }
8252
8253 #undef __
8254
8255 } // namespace x86_64
8256 } // namespace art
8257