1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "arch/x86/jni_frame_x86.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "entrypoints/quick/quick_entrypoints_enum.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_list.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/var_handle.h"
39 #include "optimizing/nodes.h"
40 #include "profiling_info_builder.h"
41 #include "scoped_thread_state_change-inl.h"
42 #include "thread.h"
43 #include "trace.h"
44 #include "utils/assembler.h"
45 #include "utils/stack_checks.h"
46 #include "utils/x86/assembler_x86.h"
47 #include "utils/x86/constants_x86.h"
48 #include "utils/x86/managed_register_x86.h"
49
50 namespace art HIDDEN {
51
52 template<class MirrorType>
53 class GcRoot;
54
55 namespace x86 {
56
57 static constexpr int kCurrentMethodStackOffset = 0;
58 static constexpr Register kMethodRegisterArgument = EAX;
59 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
60
61 static constexpr int kC2ConditionMask = 0x400;
62
63 static constexpr int kFakeReturnRegister = Register(8);
64
65 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
66 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
67
OneRegInReferenceOutSaveEverythingCallerSaves()68 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
69 InvokeRuntimeCallingConvention calling_convention;
70 RegisterSet caller_saves = RegisterSet::Empty();
71 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
72 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
73 // that the kPrimNot result register is the same as the first argument register.
74 return caller_saves;
75 }
76
77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
79 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
80
81 class NullCheckSlowPathX86 : public SlowPathCode {
82 public:
NullCheckSlowPathX86(HNullCheck * instruction)83 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
84
EmitNativeCode(CodeGenerator * codegen)85 void EmitNativeCode(CodeGenerator* codegen) override {
86 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
87 __ Bind(GetEntryLabel());
88 if (instruction_->CanThrowIntoCatchBlock()) {
89 // Live registers will be restored in the catch block if caught.
90 SaveLiveRegisters(codegen, instruction_->GetLocations());
91 }
92 x86_codegen->InvokeRuntime(kQuickThrowNullPointer, instruction_, this);
93 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
94 }
95
IsFatal() const96 bool IsFatal() const override { return true; }
97
GetDescription() const98 const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
99
100 private:
101 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
102 };
103
104 class DivZeroCheckSlowPathX86 : public SlowPathCode {
105 public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)106 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
107
EmitNativeCode(CodeGenerator * codegen)108 void EmitNativeCode(CodeGenerator* codegen) override {
109 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
110 __ Bind(GetEntryLabel());
111 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, this);
112 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
113 }
114
IsFatal() const115 bool IsFatal() const override { return true; }
116
GetDescription() const117 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
118
119 private:
120 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
121 };
122
123 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
124 public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)125 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
126 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
127
EmitNativeCode(CodeGenerator * codegen)128 void EmitNativeCode(CodeGenerator* codegen) override {
129 __ Bind(GetEntryLabel());
130 if (is_div_) {
131 __ negl(reg_);
132 } else {
133 __ movl(reg_, Immediate(0));
134 }
135 __ jmp(GetExitLabel());
136 }
137
GetDescription() const138 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
139
140 private:
141 Register reg_;
142 bool is_div_;
143 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
144 };
145
146 class BoundsCheckSlowPathX86 : public SlowPathCode {
147 public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)148 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
149
EmitNativeCode(CodeGenerator * codegen)150 void EmitNativeCode(CodeGenerator* codegen) override {
151 LocationSummary* locations = instruction_->GetLocations();
152 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
153 __ Bind(GetEntryLabel());
154 if (instruction_->CanThrowIntoCatchBlock()) {
155 // Live registers will be restored in the catch block if caught.
156 SaveLiveRegisters(codegen, locations);
157 }
158
159 Location index_loc = locations->InAt(0);
160 Location length_loc = locations->InAt(1);
161 InvokeRuntimeCallingConvention calling_convention;
162 Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
163 Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
164
165 // Are we using an array length from memory?
166 if (!length_loc.IsValid()) {
167 DCHECK(instruction_->InputAt(1)->IsArrayLength());
168 HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
169 DCHECK(array_length->IsEmittedAtUseSite());
170 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
171 Location array_loc = array_length->GetLocations()->InAt(0);
172 if (!index_loc.Equals(length_arg)) {
173 // The index is not clobbered by loading the length directly to `length_arg`.
174 __ movl(length_arg.AsRegister<Register>(),
175 Address(array_loc.AsRegister<Register>(), len_offset));
176 x86_codegen->Move32(index_arg, index_loc);
177 } else if (!array_loc.Equals(index_arg)) {
178 // The array reference is not clobbered by the index move.
179 x86_codegen->Move32(index_arg, index_loc);
180 __ movl(length_arg.AsRegister<Register>(),
181 Address(array_loc.AsRegister<Register>(), len_offset));
182 } else {
183 // We do not have a temporary we could use, so swap the registers using the
184 // parallel move resolver and replace the array with the length afterwards.
185 codegen->EmitParallelMoves(
186 index_loc,
187 index_arg,
188 DataType::Type::kInt32,
189 array_loc,
190 length_arg,
191 DataType::Type::kReference);
192 __ movl(length_arg.AsRegister<Register>(),
193 Address(length_arg.AsRegister<Register>(), len_offset));
194 }
195 if (mirror::kUseStringCompression && array_length->IsStringLength()) {
196 __ shrl(length_arg.AsRegister<Register>(), Immediate(1));
197 }
198 } else {
199 // We're moving two locations to locations that could overlap,
200 // so we need a parallel move resolver.
201 codegen->EmitParallelMoves(
202 index_loc,
203 index_arg,
204 DataType::Type::kInt32,
205 length_loc,
206 length_arg,
207 DataType::Type::kInt32);
208 }
209
210 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
211 ? kQuickThrowStringBounds
212 : kQuickThrowArrayBounds;
213 x86_codegen->InvokeRuntime(entrypoint, instruction_, this);
214 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
215 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
216 }
217
IsFatal() const218 bool IsFatal() const override { return true; }
219
GetDescription() const220 const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
221
222 private:
223 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
224 };
225
226 class SuspendCheckSlowPathX86 : public SlowPathCode {
227 public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)228 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
229 : SlowPathCode(instruction), successor_(successor) {}
230
EmitNativeCode(CodeGenerator * codegen)231 void EmitNativeCode(CodeGenerator* codegen) override {
232 LocationSummary* locations = instruction_->GetLocations();
233 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
234 __ Bind(GetEntryLabel());
235 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
236 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, this);
237 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
238 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
239 if (successor_ == nullptr) {
240 __ jmp(GetReturnLabel());
241 } else {
242 __ jmp(x86_codegen->GetLabelOf(successor_));
243 }
244 }
245
GetReturnLabel()246 Label* GetReturnLabel() {
247 DCHECK(successor_ == nullptr);
248 return &return_label_;
249 }
250
GetSuccessor() const251 HBasicBlock* GetSuccessor() const {
252 return successor_;
253 }
254
GetDescription() const255 const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
256
257 private:
258 HBasicBlock* const successor_;
259 Label return_label_;
260
261 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
262 };
263
264 class LoadStringSlowPathX86 : public SlowPathCode {
265 public:
LoadStringSlowPathX86(HLoadString * instruction)266 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
267
EmitNativeCode(CodeGenerator * codegen)268 void EmitNativeCode(CodeGenerator* codegen) override {
269 LocationSummary* locations = instruction_->GetLocations();
270 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
271
272 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
273 __ Bind(GetEntryLabel());
274 SaveLiveRegisters(codegen, locations);
275
276 InvokeRuntimeCallingConvention calling_convention;
277 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
278 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
279 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, this);
280 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
281 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
282 RestoreLiveRegisters(codegen, locations);
283
284 __ jmp(GetExitLabel());
285 }
286
GetDescription() const287 const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
288
289 private:
290 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
291 };
292
293 class LoadClassSlowPathX86 : public SlowPathCode {
294 public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)295 LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
296 : SlowPathCode(at), cls_(cls) {
297 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
298 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
299 }
300
EmitNativeCode(CodeGenerator * codegen)301 void EmitNativeCode(CodeGenerator* codegen) override {
302 LocationSummary* locations = instruction_->GetLocations();
303 Location out = locations->Out();
304 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
305 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
306
307 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
308 __ Bind(GetEntryLabel());
309 SaveLiveRegisters(codegen, locations);
310
311 InvokeRuntimeCallingConvention calling_convention;
312 if (must_resolve_type) {
313 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()) ||
314 x86_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
315 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
316 &cls_->GetDexFile()));
317 dex::TypeIndex type_index = cls_->GetTypeIndex();
318 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
319 if (cls_->NeedsAccessCheck()) {
320 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
321 x86_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, this);
322 } else {
323 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
324 x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, this);
325 }
326 // If we also must_do_clinit, the resolved type is now in the correct register.
327 } else {
328 DCHECK(must_do_clinit);
329 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
330 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
331 }
332 if (must_do_clinit) {
333 x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, this);
334 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
335 }
336
337 // Move the class to the desired location.
338 if (out.IsValid()) {
339 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
340 x86_codegen->Move32(out, Location::RegisterLocation(EAX));
341 }
342 RestoreLiveRegisters(codegen, locations);
343 __ jmp(GetExitLabel());
344 }
345
GetDescription() const346 const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
347
348 private:
349 // The class this slow path will load.
350 HLoadClass* const cls_;
351
352 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
353 };
354
355 class TypeCheckSlowPathX86 : public SlowPathCode {
356 public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)357 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
358 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
359
EmitNativeCode(CodeGenerator * codegen)360 void EmitNativeCode(CodeGenerator* codegen) override {
361 LocationSummary* locations = instruction_->GetLocations();
362 DCHECK(instruction_->IsCheckCast()
363 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
364
365 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
366 __ Bind(GetEntryLabel());
367
368 if (kPoisonHeapReferences &&
369 instruction_->IsCheckCast() &&
370 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
371 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
372 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
373 }
374
375 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
376 SaveLiveRegisters(codegen, locations);
377 }
378
379 // We're moving two locations to locations that could overlap, so we need a parallel
380 // move resolver.
381 InvokeRuntimeCallingConvention calling_convention;
382 x86_codegen->EmitParallelMoves(locations->InAt(0),
383 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
384 DataType::Type::kReference,
385 locations->InAt(1),
386 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
387 DataType::Type::kReference);
388 if (instruction_->IsInstanceOf()) {
389 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, this);
390 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
391 } else {
392 DCHECK(instruction_->IsCheckCast());
393 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, this);
394 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
395 }
396
397 if (!is_fatal_) {
398 if (instruction_->IsInstanceOf()) {
399 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
400 }
401 RestoreLiveRegisters(codegen, locations);
402
403 __ jmp(GetExitLabel());
404 }
405 }
406
GetDescription() const407 const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const408 bool IsFatal() const override { return is_fatal_; }
409
410 private:
411 const bool is_fatal_;
412
413 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
414 };
415
416 class DeoptimizationSlowPathX86 : public SlowPathCode {
417 public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)418 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
419 : SlowPathCode(instruction) {}
420
EmitNativeCode(CodeGenerator * codegen)421 void EmitNativeCode(CodeGenerator* codegen) override {
422 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
423 __ Bind(GetEntryLabel());
424 LocationSummary* locations = instruction_->GetLocations();
425 SaveLiveRegisters(codegen, locations);
426 InvokeRuntimeCallingConvention calling_convention;
427 x86_codegen->Load32BitValue(
428 calling_convention.GetRegisterAt(0),
429 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
430 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, this);
431 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
432 }
433
GetDescription() const434 const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
435
436 private:
437 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
438 };
439
440 class ArraySetSlowPathX86 : public SlowPathCode {
441 public:
ArraySetSlowPathX86(HInstruction * instruction)442 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
443
EmitNativeCode(CodeGenerator * codegen)444 void EmitNativeCode(CodeGenerator* codegen) override {
445 LocationSummary* locations = instruction_->GetLocations();
446 __ Bind(GetEntryLabel());
447 SaveLiveRegisters(codegen, locations);
448
449 InvokeRuntimeCallingConvention calling_convention;
450 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
451 parallel_move.AddMove(
452 locations->InAt(0),
453 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
454 DataType::Type::kReference,
455 nullptr);
456 parallel_move.AddMove(
457 locations->InAt(1),
458 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
459 DataType::Type::kInt32,
460 nullptr);
461 parallel_move.AddMove(
462 locations->InAt(2),
463 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
464 DataType::Type::kReference,
465 nullptr);
466 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
467
468 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
469 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, this);
470 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
471 RestoreLiveRegisters(codegen, locations);
472 __ jmp(GetExitLabel());
473 }
474
GetDescription() const475 const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
476
477 private:
478 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
479 };
480
481 // Slow path marking an object reference `ref` during a read
482 // barrier. The field `obj.field` in the object `obj` holding this
483 // reference does not get updated by this slow path after marking (see
484 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
485 //
486 // This means that after the execution of this slow path, `ref` will
487 // always be up-to-date, but `obj.field` may not; i.e., after the
488 // flip, `ref` will be a to-space reference, but `obj.field` will
489 // probably still be a from-space reference (unless it gets updated by
490 // another thread, or if another thread installed another object
491 // reference (different from `ref`) in `obj.field`).
492 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
493 public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)494 ReadBarrierMarkSlowPathX86(HInstruction* instruction,
495 Location ref,
496 bool unpoison_ref_before_marking)
497 : SlowPathCode(instruction),
498 ref_(ref),
499 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
500 }
501
GetDescription() const502 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
503
EmitNativeCode(CodeGenerator * codegen)504 void EmitNativeCode(CodeGenerator* codegen) override {
505 DCHECK(codegen->EmitReadBarrier());
506 LocationSummary* locations = instruction_->GetLocations();
507 Register ref_reg = ref_.AsRegister<Register>();
508 DCHECK(locations->CanCall());
509 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
510 DCHECK(instruction_->IsInstanceFieldGet() ||
511 instruction_->IsStaticFieldGet() ||
512 instruction_->IsArrayGet() ||
513 instruction_->IsArraySet() ||
514 instruction_->IsLoadClass() ||
515 instruction_->IsLoadString() ||
516 instruction_->IsInstanceOf() ||
517 instruction_->IsCheckCast() ||
518 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
519 << "Unexpected instruction in read barrier marking slow path: "
520 << instruction_->DebugName();
521
522 __ Bind(GetEntryLabel());
523 if (unpoison_ref_before_marking_) {
524 // Object* ref = ref_addr->AsMirrorPtr()
525 __ MaybeUnpoisonHeapReference(ref_reg);
526 }
527 // No need to save live registers; it's taken care of by the
528 // entrypoint. Also, there is no need to update the stack mask,
529 // as this runtime call will not trigger a garbage collection.
530 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
531 DCHECK_NE(ref_reg, ESP);
532 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
533 // "Compact" slow path, saving two moves.
534 //
535 // Instead of using the standard runtime calling convention (input
536 // and output in EAX):
537 //
538 // EAX <- ref
539 // EAX <- ReadBarrierMark(EAX)
540 // ref <- EAX
541 //
542 // we just use rX (the register containing `ref`) as input and output
543 // of a dedicated entrypoint:
544 //
545 // rX <- ReadBarrierMarkRegX(rX)
546 //
547 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
548 // This runtime call does not require a stack map.
549 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
550 __ jmp(GetExitLabel());
551 }
552
553 private:
554 // The location (register) of the marked object reference.
555 const Location ref_;
556 // Should the reference in `ref_` be unpoisoned prior to marking it?
557 const bool unpoison_ref_before_marking_;
558
559 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
560 };
561
562 // Slow path marking an object reference `ref` during a read barrier,
563 // and if needed, atomically updating the field `obj.field` in the
564 // object `obj` holding this reference after marking (contrary to
565 // ReadBarrierMarkSlowPathX86 above, which never tries to update
566 // `obj.field`).
567 //
568 // This means that after the execution of this slow path, both `ref`
569 // and `obj.field` will be up-to-date; i.e., after the flip, both will
570 // hold the same to-space reference (unless another thread installed
571 // another object reference (different from `ref`) in `obj.field`).
572 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
573 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)574 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
575 Location ref,
576 Register obj,
577 const Address& field_addr,
578 bool unpoison_ref_before_marking,
579 Register temp)
580 : SlowPathCode(instruction),
581 ref_(ref),
582 obj_(obj),
583 field_addr_(field_addr),
584 unpoison_ref_before_marking_(unpoison_ref_before_marking),
585 temp_(temp) {
586 }
587
GetDescription() const588 const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
589
EmitNativeCode(CodeGenerator * codegen)590 void EmitNativeCode(CodeGenerator* codegen) override {
591 DCHECK(codegen->EmitReadBarrier());
592 LocationSummary* locations = instruction_->GetLocations();
593 Register ref_reg = ref_.AsRegister<Register>();
594 DCHECK(locations->CanCall());
595 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
596 DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
597 << "Unexpected instruction in read barrier marking and field updating slow path: "
598 << instruction_->DebugName();
599 HInvoke* invoke = instruction_->AsInvoke();
600 DCHECK(IsUnsafeCASReference(invoke) ||
601 IsUnsafeGetAndSetReference(invoke) ||
602 IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
603
604 __ Bind(GetEntryLabel());
605 if (unpoison_ref_before_marking_) {
606 // Object* ref = ref_addr->AsMirrorPtr()
607 __ MaybeUnpoisonHeapReference(ref_reg);
608 }
609
610 // Save the old (unpoisoned) reference.
611 __ movl(temp_, ref_reg);
612
613 // No need to save live registers; it's taken care of by the
614 // entrypoint. Also, there is no need to update the stack mask,
615 // as this runtime call will not trigger a garbage collection.
616 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
617 DCHECK_NE(ref_reg, ESP);
618 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
619 // "Compact" slow path, saving two moves.
620 //
621 // Instead of using the standard runtime calling convention (input
622 // and output in EAX):
623 //
624 // EAX <- ref
625 // EAX <- ReadBarrierMark(EAX)
626 // ref <- EAX
627 //
628 // we just use rX (the register containing `ref`) as input and output
629 // of a dedicated entrypoint:
630 //
631 // rX <- ReadBarrierMarkRegX(rX)
632 //
633 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
634 // This runtime call does not require a stack map.
635 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
636
637 // If the new reference is different from the old reference,
638 // update the field in the holder (`*field_addr`).
639 //
640 // Note that this field could also hold a different object, if
641 // another thread had concurrently changed it. In that case, the
642 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
643 // operation below would abort the CAS, leaving the field as-is.
644 NearLabel done;
645 __ cmpl(temp_, ref_reg);
646 __ j(kEqual, &done);
647
648 // Update the holder's field atomically. This may fail if
649 // mutator updates before us, but it's OK. This is achieved
650 // using a strong compare-and-set (CAS) operation with relaxed
651 // memory synchronization ordering, where the expected value is
652 // the old reference and the desired value is the new reference.
653 // This operation is implemented with a 32-bit LOCK CMPXLCHG
654 // instruction, which requires the expected value (the old
655 // reference) to be in EAX. Save EAX beforehand, and move the
656 // expected value (stored in `temp_`) into EAX.
657 __ pushl(EAX);
658 __ movl(EAX, temp_);
659
660 // Convenience aliases.
661 Register base = obj_;
662 Register expected = EAX;
663 Register value = ref_reg;
664
665 bool base_equals_value = (base == value);
666 if (kPoisonHeapReferences) {
667 if (base_equals_value) {
668 // If `base` and `value` are the same register location, move
669 // `value` to a temporary register. This way, poisoning
670 // `value` won't invalidate `base`.
671 value = temp_;
672 __ movl(value, base);
673 }
674
675 // Check that the register allocator did not assign the location
676 // of `expected` (EAX) to `value` nor to `base`, so that heap
677 // poisoning (when enabled) works as intended below.
678 // - If `value` were equal to `expected`, both references would
679 // be poisoned twice, meaning they would not be poisoned at
680 // all, as heap poisoning uses address negation.
681 // - If `base` were equal to `expected`, poisoning `expected`
682 // would invalidate `base`.
683 DCHECK_NE(value, expected);
684 DCHECK_NE(base, expected);
685
686 __ PoisonHeapReference(expected);
687 __ PoisonHeapReference(value);
688 }
689
690 __ LockCmpxchgl(field_addr_, value);
691
692 // If heap poisoning is enabled, we need to unpoison the values
693 // that were poisoned earlier.
694 if (kPoisonHeapReferences) {
695 if (base_equals_value) {
696 // `value` has been moved to a temporary register, no need
697 // to unpoison it.
698 } else {
699 __ UnpoisonHeapReference(value);
700 }
701 // No need to unpoison `expected` (EAX), as it is be overwritten below.
702 }
703
704 // Restore EAX.
705 __ popl(EAX);
706
707 __ Bind(&done);
708 __ jmp(GetExitLabel());
709 }
710
711 private:
712 // The location (register) of the marked object reference.
713 const Location ref_;
714 // The register containing the object holding the marked object reference field.
715 const Register obj_;
716 // The address of the marked reference field. The base of this address must be `obj_`.
717 const Address field_addr_;
718
719 // Should the reference in `ref_` be unpoisoned prior to marking it?
720 const bool unpoison_ref_before_marking_;
721
722 const Register temp_;
723
724 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
725 };
726
727 // Slow path generating a read barrier for a heap reference.
728 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
729 public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)730 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
731 Location out,
732 Location ref,
733 Location obj,
734 uint32_t offset,
735 Location index)
736 : SlowPathCode(instruction),
737 out_(out),
738 ref_(ref),
739 obj_(obj),
740 offset_(offset),
741 index_(index) {
742 // If `obj` is equal to `out` or `ref`, it means the initial object
743 // has been overwritten by (or after) the heap object reference load
744 // to be instrumented, e.g.:
745 //
746 // __ movl(out, Address(out, offset));
747 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
748 //
749 // In that case, we have lost the information about the original
750 // object, and the emitted read barrier cannot work properly.
751 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
752 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
753 }
754
EmitNativeCode(CodeGenerator * codegen)755 void EmitNativeCode(CodeGenerator* codegen) override {
756 DCHECK(codegen->EmitReadBarrier());
757 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
758 LocationSummary* locations = instruction_->GetLocations();
759 Register reg_out = out_.AsRegister<Register>();
760 DCHECK(locations->CanCall());
761 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
762 DCHECK(instruction_->IsInstanceFieldGet() ||
763 instruction_->IsStaticFieldGet() ||
764 instruction_->IsArrayGet() ||
765 instruction_->IsInstanceOf() ||
766 instruction_->IsCheckCast() ||
767 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
768 << "Unexpected instruction in read barrier for heap reference slow path: "
769 << instruction_->DebugName();
770
771 __ Bind(GetEntryLabel());
772 SaveLiveRegisters(codegen, locations);
773
774 // We may have to change the index's value, but as `index_` is a
775 // constant member (like other "inputs" of this slow path),
776 // introduce a copy of it, `index`.
777 Location index = index_;
778 if (index_.IsValid()) {
779 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
780 if (instruction_->IsArrayGet()) {
781 // Compute the actual memory offset and store it in `index`.
782 Register index_reg = index_.AsRegister<Register>();
783 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
784 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
785 // We are about to change the value of `index_reg` (see the
786 // calls to art::x86::X86Assembler::shll and
787 // art::x86::X86Assembler::AddImmediate below), but it has
788 // not been saved by the previous call to
789 // art::SlowPathCode::SaveLiveRegisters, as it is a
790 // callee-save register --
791 // art::SlowPathCode::SaveLiveRegisters does not consider
792 // callee-save registers, as it has been designed with the
793 // assumption that callee-save registers are supposed to be
794 // handled by the called function. So, as a callee-save
795 // register, `index_reg` _would_ eventually be saved onto
796 // the stack, but it would be too late: we would have
797 // changed its value earlier. Therefore, we manually save
798 // it here into another freely available register,
799 // `free_reg`, chosen of course among the caller-save
800 // registers (as a callee-save `free_reg` register would
801 // exhibit the same problem).
802 //
803 // Note we could have requested a temporary register from
804 // the register allocator instead; but we prefer not to, as
805 // this is a slow path, and we know we can find a
806 // caller-save register that is available.
807 Register free_reg = FindAvailableCallerSaveRegister(codegen);
808 __ movl(free_reg, index_reg);
809 index_reg = free_reg;
810 index = Location::RegisterLocation(index_reg);
811 } else {
812 // The initial register stored in `index_` has already been
813 // saved in the call to art::SlowPathCode::SaveLiveRegisters
814 // (as it is not a callee-save register), so we can freely
815 // use it.
816 }
817 // Shifting the index value contained in `index_reg` by the scale
818 // factor (2) cannot overflow in practice, as the runtime is
819 // unable to allocate object arrays with a size larger than
820 // 2^26 - 1 (that is, 2^28 - 4 bytes).
821 __ shll(index_reg, Immediate(TIMES_4));
822 static_assert(
823 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
824 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
825 __ AddImmediate(index_reg, Immediate(offset_));
826 } else {
827 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
828 // intrinsics, `index_` is not shifted by a scale factor of 2
829 // (as in the case of ArrayGet), as it is actually an offset
830 // to an object field within an object.
831 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
832 DCHECK(instruction_->GetLocations()->Intrinsified());
833 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
834 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
835 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) ||
836 (instruction_->AsInvoke()->GetIntrinsic() ==
837 Intrinsics::kJdkUnsafeGetReferenceVolatile) ||
838 (instruction_->AsInvoke()->GetIntrinsic() ==
839 Intrinsics::kJdkUnsafeGetReferenceAcquire))
840 << instruction_->AsInvoke()->GetIntrinsic();
841 DCHECK_EQ(offset_, 0U);
842 DCHECK(index_.IsRegisterPair());
843 // UnsafeGet's offset location is a register pair, the low
844 // part contains the correct offset.
845 index = index_.ToLow();
846 }
847 }
848
849 // We're moving two or three locations to locations that could
850 // overlap, so we need a parallel move resolver.
851 InvokeRuntimeCallingConvention calling_convention;
852 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
853 parallel_move.AddMove(ref_,
854 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
855 DataType::Type::kReference,
856 nullptr);
857 parallel_move.AddMove(obj_,
858 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
859 DataType::Type::kReference,
860 nullptr);
861 if (index.IsValid()) {
862 parallel_move.AddMove(index,
863 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
864 DataType::Type::kInt32,
865 nullptr);
866 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
867 } else {
868 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
869 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
870 }
871 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, this);
872 CheckEntrypointTypes<
873 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
874 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
875
876 RestoreLiveRegisters(codegen, locations);
877 __ jmp(GetExitLabel());
878 }
879
GetDescription() const880 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
881
882 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)883 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
884 size_t ref = static_cast<int>(ref_.AsRegister<Register>());
885 size_t obj = static_cast<int>(obj_.AsRegister<Register>());
886 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
887 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
888 return static_cast<Register>(i);
889 }
890 }
891 // We shall never fail to find a free caller-save register, as
892 // there are more than two core caller-save registers on x86
893 // (meaning it is possible to find one which is different from
894 // `ref` and `obj`).
895 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
896 LOG(FATAL) << "Could not find a free caller-save register";
897 UNREACHABLE();
898 }
899
900 const Location out_;
901 const Location ref_;
902 const Location obj_;
903 const uint32_t offset_;
904 // An additional location containing an index to an array.
905 // Only used for HArrayGet and the UnsafeGetObject &
906 // UnsafeGetObjectVolatile intrinsics.
907 const Location index_;
908
909 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
910 };
911
912 // Slow path generating a read barrier for a GC root.
913 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
914 public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)915 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
916 : SlowPathCode(instruction), out_(out), root_(root) {
917 }
918
EmitNativeCode(CodeGenerator * codegen)919 void EmitNativeCode(CodeGenerator* codegen) override {
920 DCHECK(codegen->EmitReadBarrier());
921 LocationSummary* locations = instruction_->GetLocations();
922 Register reg_out = out_.AsRegister<Register>();
923 DCHECK(locations->CanCall());
924 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
925 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
926 << "Unexpected instruction in read barrier for GC root slow path: "
927 << instruction_->DebugName();
928
929 __ Bind(GetEntryLabel());
930 SaveLiveRegisters(codegen, locations);
931
932 InvokeRuntimeCallingConvention calling_convention;
933 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
934 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
935 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, this);
936 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
937 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
938
939 RestoreLiveRegisters(codegen, locations);
940 __ jmp(GetExitLabel());
941 }
942
GetDescription() const943 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
944
945 private:
946 const Location out_;
947 const Location root_;
948
949 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
950 };
951
952 class MethodEntryExitHooksSlowPathX86 : public SlowPathCode {
953 public:
MethodEntryExitHooksSlowPathX86(HInstruction * instruction)954 explicit MethodEntryExitHooksSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
955
EmitNativeCode(CodeGenerator * codegen)956 void EmitNativeCode(CodeGenerator* codegen) override {
957 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
958 LocationSummary* locations = instruction_->GetLocations();
959 QuickEntrypointEnum entry_point =
960 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
961 __ Bind(GetEntryLabel());
962 SaveLiveRegisters(codegen, locations);
963 if (instruction_->IsMethodExitHook()) {
964 __ movl(EBX, Immediate(codegen->GetFrameSize()));
965 }
966 x86_codegen->InvokeRuntime(entry_point, instruction_, this);
967 RestoreLiveRegisters(codegen, locations);
968 __ jmp(GetExitLabel());
969 }
970
GetDescription() const971 const char* GetDescription() const override {
972 return "MethodEntryExitHooksSlowPath";
973 }
974
975 private:
976 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86);
977 };
978
979 class CompileOptimizedSlowPathX86 : public SlowPathCode {
980 public:
CompileOptimizedSlowPathX86(HSuspendCheck * suspend_check,uint32_t counter_address)981 CompileOptimizedSlowPathX86(HSuspendCheck* suspend_check, uint32_t counter_address)
982 : SlowPathCode(suspend_check),
983 counter_address_(counter_address) {}
984
EmitNativeCode(CodeGenerator * codegen)985 void EmitNativeCode(CodeGenerator* codegen) override {
986 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
987 __ Bind(GetEntryLabel());
988 __ movw(Address::Absolute(counter_address_), Immediate(ProfilingInfo::GetOptimizeThreshold()));
989 if (instruction_ != nullptr) {
990 // Only saves full width XMM for SIMD.
991 SaveLiveRegisters(codegen, instruction_->GetLocations());
992 }
993 x86_codegen->GenerateInvokeRuntime(
994 GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
995 if (instruction_ != nullptr) {
996 // Only restores full width XMM for SIMD.
997 RestoreLiveRegisters(codegen, instruction_->GetLocations());
998 }
999 __ jmp(GetExitLabel());
1000 }
1001
GetDescription() const1002 const char* GetDescription() const override {
1003 return "CompileOptimizedSlowPath";
1004 }
1005
1006 private:
1007 uint32_t counter_address_;
1008
1009 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86);
1010 };
1011
1012 #undef __
1013 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1014 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
1015
X86Condition(IfCondition cond)1016 inline Condition X86Condition(IfCondition cond) {
1017 switch (cond) {
1018 case kCondEQ: return kEqual;
1019 case kCondNE: return kNotEqual;
1020 case kCondLT: return kLess;
1021 case kCondLE: return kLessEqual;
1022 case kCondGT: return kGreater;
1023 case kCondGE: return kGreaterEqual;
1024 case kCondB: return kBelow;
1025 case kCondBE: return kBelowEqual;
1026 case kCondA: return kAbove;
1027 case kCondAE: return kAboveEqual;
1028 }
1029 LOG(FATAL) << "Unreachable";
1030 UNREACHABLE();
1031 }
1032
1033 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)1034 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
1035 switch (cond) {
1036 case kCondEQ: return kEqual;
1037 case kCondNE: return kNotEqual;
1038 // Signed to unsigned, and FP to x86 name.
1039 case kCondLT: return kBelow;
1040 case kCondLE: return kBelowEqual;
1041 case kCondGT: return kAbove;
1042 case kCondGE: return kAboveEqual;
1043 // Unsigned remain unchanged.
1044 case kCondB: return kBelow;
1045 case kCondBE: return kBelowEqual;
1046 case kCondA: return kAbove;
1047 case kCondAE: return kAboveEqual;
1048 }
1049 LOG(FATAL) << "Unreachable";
1050 UNREACHABLE();
1051 }
1052
DumpCoreRegister(std::ostream & stream,int reg) const1053 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
1054 stream << Register(reg);
1055 }
1056
DumpFloatingPointRegister(std::ostream & stream,int reg) const1057 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1058 stream << XmmRegister(reg);
1059 }
1060
GetInstructionSetFeatures() const1061 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
1062 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
1063 }
1064
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1065 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1066 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
1067 return kX86WordSize;
1068 }
1069
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1070 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1071 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
1072 return kX86WordSize;
1073 }
1074
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1075 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1076 if (GetGraph()->HasSIMD()) {
1077 __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
1078 } else {
1079 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
1080 }
1081 return GetSlowPathFPWidth();
1082 }
1083
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1084 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1085 if (GetGraph()->HasSIMD()) {
1086 __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
1087 } else {
1088 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1089 }
1090 return GetSlowPathFPWidth();
1091 }
1092
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,SlowPathCode * slow_path)1093 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1094 HInstruction* instruction,
1095 SlowPathCode* slow_path) {
1096 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1097 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1098 if (EntrypointRequiresStackMap(entrypoint)) {
1099 RecordPcInfo(instruction, slow_path);
1100 }
1101 }
1102
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1103 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1104 HInstruction* instruction,
1105 SlowPathCode* slow_path) {
1106 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1107 GenerateInvokeRuntime(entry_point_offset);
1108 }
1109
GenerateInvokeRuntime(int32_t entry_point_offset)1110 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1111 __ fs()->call(Address::Absolute(entry_point_offset));
1112 }
1113
1114 namespace detail {
1115
1116 // Mark which intrinsics we don't have handcrafted code for.
1117 template <Intrinsics T>
1118 struct IsUnimplemented {
1119 bool is_unimplemented = false;
1120 };
1121
1122 #define TRUE_OVERRIDE(Name) \
1123 template <> \
1124 struct IsUnimplemented<Intrinsics::k##Name> { \
1125 bool is_unimplemented = true; \
1126 };
1127 UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE)
1128 #undef TRUE_OVERRIDE
1129
1130 static constexpr bool kIsIntrinsicUnimplemented[] = {
1131 false, // kNone
1132 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1133 IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1134 ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1135 #undef IS_UNIMPLEMENTED
1136 };
1137
1138 } // namespace detail
1139
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1140 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1141 const CompilerOptions& compiler_options,
1142 OptimizingCompilerStats* stats)
1143 : CodeGenerator(graph,
1144 kNumberOfCpuRegisters,
1145 kNumberOfXmmRegisters,
1146 kNumberOfRegisterPairs,
1147 ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
1148 | (1 << kFakeReturnRegister),
1149 0,
1150 compiler_options,
1151 stats,
1152 ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1153 block_labels_(nullptr),
1154 location_builder_(graph, this),
1155 instruction_visitor_(graph, this),
1156 move_resolver_(graph->GetAllocator(), this),
1157 assembler_(graph->GetAllocator(),
1158 compiler_options.GetInstructionSetFeatures()->AsX86InstructionSetFeatures()),
1159 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1160 app_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1161 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1162 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1163 app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1164 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1165 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1166 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1167 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1168 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1169 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1170 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1171 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1172 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1173 constant_area_start_(-1),
1174 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1175 method_address_offset_(std::less<uint32_t>(),
1176 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1177 // Use a fake return address register to mimic Quick.
1178 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1179 }
1180
SetupBlockedRegisters() const1181 void CodeGeneratorX86::SetupBlockedRegisters() const {
1182 // Stack register is always reserved.
1183 blocked_core_registers_[ESP] = true;
1184 }
1185
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1186 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1187 : InstructionCodeGenerator(graph, codegen),
1188 assembler_(codegen->GetAssembler()),
1189 codegen_(codegen) {}
1190
DWARFReg(Register reg)1191 static dwarf::Reg DWARFReg(Register reg) {
1192 return dwarf::Reg::X86Core(static_cast<int>(reg));
1193 }
1194
SetInForReturnValue(HInstruction * ret,LocationSummary * locations)1195 void SetInForReturnValue(HInstruction* ret, LocationSummary* locations) {
1196 switch (ret->InputAt(0)->GetType()) {
1197 case DataType::Type::kReference:
1198 case DataType::Type::kBool:
1199 case DataType::Type::kUint8:
1200 case DataType::Type::kInt8:
1201 case DataType::Type::kUint16:
1202 case DataType::Type::kInt16:
1203 case DataType::Type::kInt32:
1204 locations->SetInAt(0, Location::RegisterLocation(EAX));
1205 break;
1206
1207 case DataType::Type::kInt64:
1208 locations->SetInAt(0, Location::RegisterPairLocation(EAX, EDX));
1209 break;
1210
1211 case DataType::Type::kFloat32:
1212 case DataType::Type::kFloat64:
1213 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1214 break;
1215
1216 case DataType::Type::kVoid:
1217 locations->SetInAt(0, Location::NoLocation());
1218 break;
1219
1220 default:
1221 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
1222 }
1223 }
1224
VisitMethodExitHook(HMethodExitHook * method_hook)1225 void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) {
1226 LocationSummary* locations = new (GetGraph()->GetAllocator())
1227 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1228 SetInForReturnValue(method_hook, locations);
1229 // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
1230 locations->AddTemp(Location::RegisterLocation(EAX));
1231 locations->AddTemp(Location::RegisterLocation(EDX));
1232 // An additional temporary register to hold address to store the timestamp counter.
1233 locations->AddTemp(Location::RequiresRegister());
1234 }
1235
GenerateMethodEntryExitHook(HInstruction * instruction)1236 void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) {
1237 SlowPathCode* slow_path =
1238 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
1239 codegen_->AddSlowPath(slow_path);
1240 LocationSummary* locations = instruction->GetLocations();
1241
1242 if (instruction->IsMethodExitHook()) {
1243 // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1244 // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1245 // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1246 // disabled in debuggable runtime. The other bit is used when this method itself requires a
1247 // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1248 __ cmpl(Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1249 __ j(kNotEqual, slow_path->GetEntryLabel());
1250 }
1251
1252 uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1253 MemberOffset offset = instruction->IsMethodExitHook() ?
1254 instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
1255 instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1256 __ cmpb(Address::Absolute(address + offset.Int32Value()),
1257 Immediate(instrumentation::Instrumentation::kFastTraceListeners));
1258 // Check if there are any trace method entry / exit listeners. If no, continue.
1259 __ j(kLess, slow_path->GetExitLabel());
1260 // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
1261 // If yes, just take the slow path.
1262 __ j(kGreater, slow_path->GetEntryLabel());
1263
1264 // For curr_entry use the register that isn't EAX or EDX. We need this after
1265 // rdtsc which returns values in EAX + EDX.
1266 Register curr_entry = locations->GetTemp(2).AsRegister<Register>();
1267 Register init_entry = locations->GetTemp(1).AsRegister<Register>();
1268
1269 // Check if there is place in the buffer for a new entry, if no, take slow path.
1270 uint32_t trace_buffer_ptr = Thread::TraceBufferPtrOffset<kX86PointerSize>().Int32Value();
1271 uint64_t trace_buffer_curr_entry_offset =
1272 Thread::TraceBufferCurrPtrOffset<kX86PointerSize>().Int32Value();
1273
1274 __ fs()->movl(curr_entry, Address::Absolute(trace_buffer_curr_entry_offset));
1275 __ subl(curr_entry, Immediate(kNumEntriesForWallClock * sizeof(void*)));
1276 __ fs()->movl(init_entry, Address::Absolute(trace_buffer_ptr));
1277 __ cmpl(curr_entry, init_entry);
1278 __ j(kLess, slow_path->GetEntryLabel());
1279
1280 // Update the index in the `Thread`.
1281 __ fs()->movl(Address::Absolute(trace_buffer_curr_entry_offset), curr_entry);
1282
1283 // Record method pointer and trace action.
1284 Register method = init_entry;
1285 __ movl(method, Address(ESP, kCurrentMethodStackOffset));
1286 // Use last two bits to encode trace method action. For MethodEntry it is 0
1287 // so no need to set the bits since they are 0 already.
1288 if (instruction->IsMethodExitHook()) {
1289 DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1290 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1291 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1292 __ orl(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1293 }
1294 __ movl(Address(curr_entry, kMethodOffsetInBytes), method);
1295 // Get the timestamp. rdtsc returns timestamp in EAX + EDX.
1296 __ rdtsc();
1297 __ movl(Address(curr_entry, kTimestampOffsetInBytes), EAX);
1298 __ movl(Address(curr_entry, kHighTimestampOffsetInBytes), EDX);
1299 __ Bind(slow_path->GetExitLabel());
1300 }
1301
VisitMethodExitHook(HMethodExitHook * instruction)1302 void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instruction) {
1303 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1304 DCHECK(codegen_->RequiresCurrentMethod());
1305 GenerateMethodEntryExitHook(instruction);
1306 }
1307
VisitMethodEntryHook(HMethodEntryHook * method_hook)1308 void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1309 LocationSummary* locations = new (GetGraph()->GetAllocator())
1310 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1311 // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
1312 locations->AddTemp(Location::RegisterLocation(EAX));
1313 locations->AddTemp(Location::RegisterLocation(EDX));
1314 // An additional temporary register to hold address to store the timestamp counter.
1315 locations->AddTemp(Location::RequiresRegister());
1316 }
1317
VisitMethodEntryHook(HMethodEntryHook * instruction)1318 void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1319 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1320 DCHECK(codegen_->RequiresCurrentMethod());
1321 GenerateMethodEntryExitHook(instruction);
1322 }
1323
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1324 void CodeGeneratorX86::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1325 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1326 Register reg = EAX;
1327 if (is_frame_entry) {
1328 reg = kMethodRegisterArgument;
1329 } else {
1330 __ pushl(EAX);
1331 __ cfi().AdjustCFAOffset(4);
1332 __ movl(EAX, Address(ESP, kX86WordSize));
1333 }
1334 NearLabel overflow;
1335 __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1336 Immediate(interpreter::kNterpHotnessValue));
1337 __ j(kEqual, &overflow);
1338 __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(-1));
1339 __ Bind(&overflow);
1340 if (!is_frame_entry) {
1341 __ popl(EAX);
1342 __ cfi().AdjustCFAOffset(-4);
1343 }
1344 }
1345
1346 if (GetGraph()->IsCompilingBaseline() &&
1347 GetGraph()->IsUsefulOptimizing() &&
1348 !Runtime::Current()->IsAotCompiler()) {
1349 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1350 DCHECK(info != nullptr);
1351 uint32_t address = reinterpret_cast32<uint32_t>(info) +
1352 ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1353 DCHECK(!HasEmptyFrame());
1354 SlowPathCode* slow_path =
1355 new (GetScopedAllocator()) CompileOptimizedSlowPathX86(suspend_check, address);
1356 AddSlowPath(slow_path);
1357 // With multiple threads, this can overflow. This is OK, we will eventually get to see
1358 // it reaching 0. Also, at this point we have no register available to look
1359 // at the counter directly.
1360 __ addw(Address::Absolute(address), Immediate(-1));
1361 __ j(kEqual, slow_path->GetEntryLabel());
1362 __ Bind(slow_path->GetExitLabel());
1363 }
1364 }
1365
GenerateFrameEntry()1366 void CodeGeneratorX86::GenerateFrameEntry() {
1367 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
1368
1369 // Check if we need to generate the clinit check. We will jump to the
1370 // resolution stub if the class is not initialized and the executing thread is
1371 // not the thread initializing it.
1372 // We do this before constructing the frame to get the correct stack trace if
1373 // an exception is thrown.
1374 if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1375 NearLabel continue_execution, resolution;
1376 // We'll use EBP as temporary.
1377 __ pushl(EBP);
1378 __ cfi().AdjustCFAOffset(4);
1379 // Check if we're visibly initialized.
1380
1381 // We don't emit a read barrier here to save on code size. We rely on the
1382 // resolution trampoline to do a suspend check before re-entering this code.
1383 __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value()));
1384 __ cmpb(Address(EBP, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
1385 __ j(kAboveEqual, &continue_execution);
1386
1387 // Check if we're initializing and the thread initializing is the one
1388 // executing the code.
1389 __ cmpb(Address(EBP, kClassStatusByteOffset), Immediate(kShiftedInitializingValue));
1390 __ j(kBelow, &resolution);
1391
1392 __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value()));
1393 __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value()));
1394 __ j(kEqual, &continue_execution);
1395 __ Bind(&resolution);
1396
1397 __ popl(EBP);
1398 __ cfi().AdjustCFAOffset(-4);
1399 // Jump to the resolution stub.
1400 ThreadOffset32 entrypoint_offset =
1401 GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline);
1402 __ fs()->jmp(Address::Absolute(entrypoint_offset));
1403
1404 __ Bind(&continue_execution);
1405 __ cfi().AdjustCFAOffset(4); // Undo the `-4` adjustment above. We get here with EBP pushed.
1406 __ popl(EBP);
1407 __ cfi().AdjustCFAOffset(-4);
1408 }
1409
1410 __ Bind(&frame_entry_label_);
1411 bool skip_overflow_check =
1412 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1413 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1414
1415 if (!skip_overflow_check) {
1416 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1417 __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1418 RecordPcInfoForFrameOrBlockEntry();
1419 }
1420
1421 if (!HasEmptyFrame()) {
1422 // Make sure the frame size isn't unreasonably large.
1423 DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1424
1425 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1426 Register reg = kCoreCalleeSaves[i];
1427 if (allocated_registers_.ContainsCoreRegister(reg)) {
1428 __ pushl(reg);
1429 __ cfi().AdjustCFAOffset(kX86WordSize);
1430 __ cfi().RelOffset(DWARFReg(reg), 0);
1431 }
1432 }
1433
1434 int adjust = GetFrameSize() - FrameEntrySpillSize();
1435 IncreaseFrame(adjust);
1436 // Save the current method if we need it. Note that we do not
1437 // do this in HCurrentMethod, as the instruction might have been removed
1438 // in the SSA graph.
1439 if (RequiresCurrentMethod()) {
1440 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1441 }
1442
1443 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1444 // Initialize should_deoptimize flag to 0.
1445 __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1446 }
1447 }
1448
1449 MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1450 }
1451
GenerateFrameExit()1452 void CodeGeneratorX86::GenerateFrameExit() {
1453 __ cfi().RememberState();
1454 if (!HasEmptyFrame()) {
1455 int adjust = GetFrameSize() - FrameEntrySpillSize();
1456 DecreaseFrame(adjust);
1457
1458 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1459 Register reg = kCoreCalleeSaves[i];
1460 if (allocated_registers_.ContainsCoreRegister(reg)) {
1461 __ popl(reg);
1462 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1463 __ cfi().Restore(DWARFReg(reg));
1464 }
1465 }
1466 }
1467 __ ret();
1468 __ cfi().RestoreState();
1469 __ cfi().DefCFAOffset(GetFrameSize());
1470 }
1471
Bind(HBasicBlock * block)1472 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1473 __ Bind(GetLabelOf(block));
1474 }
1475
GetReturnLocation(DataType::Type type) const1476 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1477 switch (type) {
1478 case DataType::Type::kReference:
1479 case DataType::Type::kBool:
1480 case DataType::Type::kUint8:
1481 case DataType::Type::kInt8:
1482 case DataType::Type::kUint16:
1483 case DataType::Type::kInt16:
1484 case DataType::Type::kUint32:
1485 case DataType::Type::kInt32:
1486 return Location::RegisterLocation(EAX);
1487
1488 case DataType::Type::kUint64:
1489 case DataType::Type::kInt64:
1490 return Location::RegisterPairLocation(EAX, EDX);
1491
1492 case DataType::Type::kVoid:
1493 return Location::NoLocation();
1494
1495 case DataType::Type::kFloat64:
1496 case DataType::Type::kFloat32:
1497 return Location::FpuRegisterLocation(XMM0);
1498 }
1499 }
1500
GetMethodLocation() const1501 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1502 return Location::RegisterLocation(kMethodRegisterArgument);
1503 }
1504
GetNextLocation(DataType::Type type)1505 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1506 switch (type) {
1507 case DataType::Type::kReference:
1508 case DataType::Type::kBool:
1509 case DataType::Type::kUint8:
1510 case DataType::Type::kInt8:
1511 case DataType::Type::kUint16:
1512 case DataType::Type::kInt16:
1513 case DataType::Type::kInt32: {
1514 uint32_t index = gp_index_++;
1515 stack_index_++;
1516 if (index < calling_convention.GetNumberOfRegisters()) {
1517 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1518 } else {
1519 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1520 }
1521 }
1522
1523 case DataType::Type::kInt64: {
1524 uint32_t index = gp_index_;
1525 gp_index_ += 2;
1526 stack_index_ += 2;
1527 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1528 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1529 calling_convention.GetRegisterPairAt(index));
1530 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1531 } else {
1532 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1533 }
1534 }
1535
1536 case DataType::Type::kFloat32: {
1537 uint32_t index = float_index_++;
1538 stack_index_++;
1539 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1540 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1541 } else {
1542 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1543 }
1544 }
1545
1546 case DataType::Type::kFloat64: {
1547 uint32_t index = float_index_++;
1548 stack_index_ += 2;
1549 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1550 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1551 } else {
1552 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1553 }
1554 }
1555
1556 case DataType::Type::kUint32:
1557 case DataType::Type::kUint64:
1558 case DataType::Type::kVoid:
1559 LOG(FATAL) << "Unexpected parameter type " << type;
1560 UNREACHABLE();
1561 }
1562 return Location::NoLocation();
1563 }
1564
GetNextLocation(DataType::Type type)1565 Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1566 DCHECK_NE(type, DataType::Type::kReference);
1567
1568 Location location;
1569 if (DataType::Is64BitType(type)) {
1570 location = Location::DoubleStackSlot(stack_offset_);
1571 stack_offset_ += 2 * kFramePointerSize;
1572 } else {
1573 location = Location::StackSlot(stack_offset_);
1574 stack_offset_ += kFramePointerSize;
1575 }
1576 if (for_register_allocation_) {
1577 location = Location::Any();
1578 }
1579 return location;
1580 }
1581
GetReturnLocation(DataType::Type type) const1582 Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1583 // We perform conversion to the managed ABI return register after the call if needed.
1584 InvokeDexCallingConventionVisitorX86 dex_calling_convention;
1585 return dex_calling_convention.GetReturnLocation(type);
1586 }
1587
GetMethodLocation() const1588 Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
1589 // Pass the method in the hidden argument EAX.
1590 return Location::RegisterLocation(EAX);
1591 }
1592
Move32(Location destination,Location source)1593 void CodeGeneratorX86::Move32(Location destination, Location source) {
1594 if (source.Equals(destination)) {
1595 return;
1596 }
1597 if (destination.IsRegister()) {
1598 if (source.IsRegister()) {
1599 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1600 } else if (source.IsFpuRegister()) {
1601 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1602 } else if (source.IsConstant()) {
1603 int32_t value = GetInt32ValueOf(source.GetConstant());
1604 __ movl(destination.AsRegister<Register>(), Immediate(value));
1605 } else {
1606 DCHECK(source.IsStackSlot());
1607 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1608 }
1609 } else if (destination.IsFpuRegister()) {
1610 if (source.IsRegister()) {
1611 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1612 } else if (source.IsFpuRegister()) {
1613 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1614 } else {
1615 DCHECK(source.IsStackSlot());
1616 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1617 }
1618 } else {
1619 DCHECK(destination.IsStackSlot()) << destination;
1620 if (source.IsRegister()) {
1621 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1622 } else if (source.IsFpuRegister()) {
1623 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1624 } else if (source.IsConstant()) {
1625 HConstant* constant = source.GetConstant();
1626 int32_t value = GetInt32ValueOf(constant);
1627 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1628 } else {
1629 DCHECK(source.IsStackSlot());
1630 __ pushl(Address(ESP, source.GetStackIndex()));
1631 __ popl(Address(ESP, destination.GetStackIndex()));
1632 }
1633 }
1634 }
1635
Move64(Location destination,Location source)1636 void CodeGeneratorX86::Move64(Location destination, Location source) {
1637 if (source.Equals(destination)) {
1638 return;
1639 }
1640 if (destination.IsRegisterPair()) {
1641 if (source.IsRegisterPair()) {
1642 EmitParallelMoves(
1643 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1644 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1645 DataType::Type::kInt32,
1646 Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1647 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1648 DataType::Type::kInt32);
1649 } else if (source.IsFpuRegister()) {
1650 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1651 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1652 __ psrlq(src_reg, Immediate(32));
1653 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1654 } else {
1655 // No conflict possible, so just do the moves.
1656 DCHECK(source.IsDoubleStackSlot());
1657 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1658 __ movl(destination.AsRegisterPairHigh<Register>(),
1659 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1660 }
1661 } else if (destination.IsFpuRegister()) {
1662 if (source.IsFpuRegister()) {
1663 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1664 } else if (source.IsDoubleStackSlot()) {
1665 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1666 } else if (source.IsRegisterPair()) {
1667 size_t elem_size = DataType::Size(DataType::Type::kInt32);
1668 // Push the 2 source registers to the stack.
1669 __ pushl(source.AsRegisterPairHigh<Register>());
1670 __ cfi().AdjustCFAOffset(elem_size);
1671 __ pushl(source.AsRegisterPairLow<Register>());
1672 __ cfi().AdjustCFAOffset(elem_size);
1673 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1674 // And remove the temporary stack space we allocated.
1675 DecreaseFrame(2 * elem_size);
1676 } else {
1677 LOG(FATAL) << "Unimplemented";
1678 }
1679 } else {
1680 DCHECK(destination.IsDoubleStackSlot()) << destination;
1681 if (source.IsRegisterPair()) {
1682 // No conflict possible, so just do the moves.
1683 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1684 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1685 source.AsRegisterPairHigh<Register>());
1686 } else if (source.IsFpuRegister()) {
1687 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1688 } else if (source.IsConstant()) {
1689 HConstant* constant = source.GetConstant();
1690 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1691 int64_t value = GetInt64ValueOf(constant);
1692 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1693 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1694 Immediate(High32Bits(value)));
1695 } else {
1696 DCHECK(source.IsDoubleStackSlot()) << source;
1697 EmitParallelMoves(
1698 Location::StackSlot(source.GetStackIndex()),
1699 Location::StackSlot(destination.GetStackIndex()),
1700 DataType::Type::kInt32,
1701 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1702 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1703 DataType::Type::kInt32);
1704 }
1705 }
1706 }
1707
CreateAddress(Register base,Register index=Register::kNoRegister,ScaleFactor scale=TIMES_1,int32_t disp=0)1708 static Address CreateAddress(Register base,
1709 Register index = Register::kNoRegister,
1710 ScaleFactor scale = TIMES_1,
1711 int32_t disp = 0) {
1712 if (index == Register::kNoRegister) {
1713 return Address(base, disp);
1714 }
1715
1716 return Address(base, index, scale, disp);
1717 }
1718
LoadFromMemoryNoBarrier(DataType::Type dst_type,Location dst,Address src,HInstruction * instr,XmmRegister temp,bool is_atomic_load)1719 void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type,
1720 Location dst,
1721 Address src,
1722 HInstruction* instr,
1723 XmmRegister temp,
1724 bool is_atomic_load) {
1725 switch (dst_type) {
1726 case DataType::Type::kBool:
1727 case DataType::Type::kUint8:
1728 __ movzxb(dst.AsRegister<Register>(), src);
1729 break;
1730 case DataType::Type::kInt8:
1731 __ movsxb(dst.AsRegister<Register>(), src);
1732 break;
1733 case DataType::Type::kInt16:
1734 __ movsxw(dst.AsRegister<Register>(), src);
1735 break;
1736 case DataType::Type::kUint16:
1737 __ movzxw(dst.AsRegister<Register>(), src);
1738 break;
1739 case DataType::Type::kInt32:
1740 __ movl(dst.AsRegister<Register>(), src);
1741 break;
1742 case DataType::Type::kInt64: {
1743 if (is_atomic_load) {
1744 __ movsd(temp, src);
1745 if (instr != nullptr) {
1746 MaybeRecordImplicitNullCheck(instr);
1747 }
1748 __ movd(dst.AsRegisterPairLow<Register>(), temp);
1749 __ psrlq(temp, Immediate(32));
1750 __ movd(dst.AsRegisterPairHigh<Register>(), temp);
1751 } else {
1752 DCHECK_NE(src.GetBaseRegister(), dst.AsRegisterPairLow<Register>());
1753 Address src_high = Address::displace(src, kX86WordSize);
1754 __ movl(dst.AsRegisterPairLow<Register>(), src);
1755 if (instr != nullptr) {
1756 MaybeRecordImplicitNullCheck(instr);
1757 }
1758 __ movl(dst.AsRegisterPairHigh<Register>(), src_high);
1759 }
1760 break;
1761 }
1762 case DataType::Type::kFloat32:
1763 __ movss(dst.AsFpuRegister<XmmRegister>(), src);
1764 break;
1765 case DataType::Type::kFloat64:
1766 __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
1767 break;
1768 case DataType::Type::kReference:
1769 DCHECK(!EmitReadBarrier());
1770 __ movl(dst.AsRegister<Register>(), src);
1771 __ MaybeUnpoisonHeapReference(dst.AsRegister<Register>());
1772 break;
1773 default:
1774 LOG(FATAL) << "Unreachable type " << dst_type;
1775 }
1776 if (instr != nullptr && dst_type != DataType::Type::kInt64) {
1777 // kInt64 needs special handling that is done in the above switch.
1778 MaybeRecordImplicitNullCheck(instr);
1779 }
1780 }
1781
MoveToMemory(DataType::Type src_type,Location src,Register dst_base,Register dst_index,ScaleFactor dst_scale,int32_t dst_disp)1782 void CodeGeneratorX86::MoveToMemory(DataType::Type src_type,
1783 Location src,
1784 Register dst_base,
1785 Register dst_index,
1786 ScaleFactor dst_scale,
1787 int32_t dst_disp) {
1788 DCHECK(dst_base != Register::kNoRegister);
1789 Address dst = CreateAddress(dst_base, dst_index, dst_scale, dst_disp);
1790
1791 switch (src_type) {
1792 case DataType::Type::kBool:
1793 case DataType::Type::kUint8:
1794 case DataType::Type::kInt8: {
1795 if (src.IsConstant()) {
1796 __ movb(dst, Immediate(CodeGenerator::GetInt8ValueOf(src.GetConstant())));
1797 } else {
1798 __ movb(dst, src.AsRegister<ByteRegister>());
1799 }
1800 break;
1801 }
1802 case DataType::Type::kUint16:
1803 case DataType::Type::kInt16: {
1804 if (src.IsConstant()) {
1805 __ movw(dst, Immediate(CodeGenerator::GetInt16ValueOf(src.GetConstant())));
1806 } else {
1807 __ movw(dst, src.AsRegister<Register>());
1808 }
1809 break;
1810 }
1811 case DataType::Type::kUint32:
1812 case DataType::Type::kInt32: {
1813 if (src.IsConstant()) {
1814 int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1815 __ movl(dst, Immediate(v));
1816 } else {
1817 __ movl(dst, src.AsRegister<Register>());
1818 }
1819 break;
1820 }
1821 case DataType::Type::kUint64:
1822 case DataType::Type::kInt64: {
1823 Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1824 if (src.IsConstant()) {
1825 int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1826 __ movl(dst, Immediate(Low32Bits(v)));
1827 __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1828 } else {
1829 __ movl(dst, src.AsRegisterPairLow<Register>());
1830 __ movl(dst_next_4_bytes, src.AsRegisterPairHigh<Register>());
1831 }
1832 break;
1833 }
1834 case DataType::Type::kFloat32: {
1835 if (src.IsConstant()) {
1836 int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1837 __ movl(dst, Immediate(v));
1838 } else {
1839 __ movss(dst, src.AsFpuRegister<XmmRegister>());
1840 }
1841 break;
1842 }
1843 case DataType::Type::kFloat64: {
1844 Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1845 if (src.IsConstant()) {
1846 int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1847 __ movl(dst, Immediate(Low32Bits(v)));
1848 __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1849 } else {
1850 __ movsd(dst, src.AsFpuRegister<XmmRegister>());
1851 }
1852 break;
1853 }
1854 case DataType::Type::kVoid:
1855 case DataType::Type::kReference:
1856 LOG(FATAL) << "Unreachable type " << src_type;
1857 }
1858 }
1859
MoveConstant(Location location,int32_t value)1860 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1861 DCHECK(location.IsRegister());
1862 __ movl(location.AsRegister<Register>(), Immediate(value));
1863 }
1864
MoveLocation(Location dst,Location src,DataType::Type dst_type)1865 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1866 HParallelMove move(GetGraph()->GetAllocator());
1867 if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1868 move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1869 move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1870 } else {
1871 move.AddMove(src, dst, dst_type, nullptr);
1872 }
1873 GetMoveResolver()->EmitNativeCode(&move);
1874 }
1875
AddLocationAsTemp(Location location,LocationSummary * locations)1876 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1877 if (location.IsRegister()) {
1878 locations->AddTemp(location);
1879 } else if (location.IsRegisterPair()) {
1880 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1881 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1882 } else {
1883 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1884 }
1885 }
1886
HandleGoto(HInstruction * got,HBasicBlock * successor)1887 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1888 if (successor->IsExitBlock()) {
1889 DCHECK(got->GetPrevious()->AlwaysThrows());
1890 return; // no code needed
1891 }
1892
1893 HBasicBlock* block = got->GetBlock();
1894 HInstruction* previous = got->GetPrevious();
1895
1896 HLoopInformation* info = block->GetLoopInformation();
1897 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1898 codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
1899 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1900 return;
1901 }
1902
1903 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1904 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1905 }
1906 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1907 __ jmp(codegen_->GetLabelOf(successor));
1908 }
1909 }
1910
VisitGoto(HGoto * got)1911 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1912 got->SetLocations(nullptr);
1913 }
1914
VisitGoto(HGoto * got)1915 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1916 HandleGoto(got, got->GetSuccessor());
1917 }
1918
VisitTryBoundary(HTryBoundary * try_boundary)1919 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1920 try_boundary->SetLocations(nullptr);
1921 }
1922
VisitTryBoundary(HTryBoundary * try_boundary)1923 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1924 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1925 if (!successor->IsExitBlock()) {
1926 HandleGoto(try_boundary, successor);
1927 }
1928 }
1929
VisitExit(HExit * exit)1930 void LocationsBuilderX86::VisitExit(HExit* exit) {
1931 exit->SetLocations(nullptr);
1932 }
1933
VisitExit(HExit * exit)1934 void InstructionCodeGeneratorX86::VisitExit([[maybe_unused]] HExit* exit) {}
1935
1936 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1937 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1938 LabelType* true_label,
1939 LabelType* false_label) {
1940 if (cond->IsFPConditionTrueIfNaN()) {
1941 __ j(kUnordered, true_label);
1942 } else if (cond->IsFPConditionFalseIfNaN()) {
1943 __ j(kUnordered, false_label);
1944 }
1945 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1946 }
1947
1948 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1949 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1950 LabelType* true_label,
1951 LabelType* false_label) {
1952 LocationSummary* locations = cond->GetLocations();
1953 Location left = locations->InAt(0);
1954 Location right = locations->InAt(1);
1955 IfCondition if_cond = cond->GetCondition();
1956
1957 Register left_high = left.AsRegisterPairHigh<Register>();
1958 Register left_low = left.AsRegisterPairLow<Register>();
1959 IfCondition true_high_cond = if_cond;
1960 IfCondition false_high_cond = cond->GetOppositeCondition();
1961 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part
1962
1963 // Set the conditions for the test, remembering that == needs to be
1964 // decided using the low words.
1965 switch (if_cond) {
1966 case kCondEQ:
1967 case kCondNE:
1968 // Nothing to do.
1969 break;
1970 case kCondLT:
1971 false_high_cond = kCondGT;
1972 break;
1973 case kCondLE:
1974 true_high_cond = kCondLT;
1975 break;
1976 case kCondGT:
1977 false_high_cond = kCondLT;
1978 break;
1979 case kCondGE:
1980 true_high_cond = kCondGT;
1981 break;
1982 case kCondB:
1983 false_high_cond = kCondA;
1984 break;
1985 case kCondBE:
1986 true_high_cond = kCondB;
1987 break;
1988 case kCondA:
1989 false_high_cond = kCondB;
1990 break;
1991 case kCondAE:
1992 true_high_cond = kCondA;
1993 break;
1994 }
1995
1996 if (right.IsConstant()) {
1997 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1998 int32_t val_high = High32Bits(value);
1999 int32_t val_low = Low32Bits(value);
2000
2001 codegen_->Compare32BitValue(left_high, val_high);
2002 if (if_cond == kCondNE) {
2003 __ j(X86Condition(true_high_cond), true_label);
2004 } else if (if_cond == kCondEQ) {
2005 __ j(X86Condition(false_high_cond), false_label);
2006 } else {
2007 __ j(X86Condition(true_high_cond), true_label);
2008 __ j(X86Condition(false_high_cond), false_label);
2009 }
2010 // Must be equal high, so compare the lows.
2011 codegen_->Compare32BitValue(left_low, val_low);
2012 } else if (right.IsRegisterPair()) {
2013 Register right_high = right.AsRegisterPairHigh<Register>();
2014 Register right_low = right.AsRegisterPairLow<Register>();
2015
2016 __ cmpl(left_high, right_high);
2017 if (if_cond == kCondNE) {
2018 __ j(X86Condition(true_high_cond), true_label);
2019 } else if (if_cond == kCondEQ) {
2020 __ j(X86Condition(false_high_cond), false_label);
2021 } else {
2022 __ j(X86Condition(true_high_cond), true_label);
2023 __ j(X86Condition(false_high_cond), false_label);
2024 }
2025 // Must be equal high, so compare the lows.
2026 __ cmpl(left_low, right_low);
2027 } else {
2028 DCHECK(right.IsDoubleStackSlot());
2029 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
2030 if (if_cond == kCondNE) {
2031 __ j(X86Condition(true_high_cond), true_label);
2032 } else if (if_cond == kCondEQ) {
2033 __ j(X86Condition(false_high_cond), false_label);
2034 } else {
2035 __ j(X86Condition(true_high_cond), true_label);
2036 __ j(X86Condition(false_high_cond), false_label);
2037 }
2038 // Must be equal high, so compare the lows.
2039 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
2040 }
2041 // The last comparison might be unsigned.
2042 __ j(final_condition, true_label);
2043 }
2044
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)2045 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
2046 Location rhs,
2047 HInstruction* insn,
2048 bool is_double) {
2049 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTableOrNull();
2050 if (is_double) {
2051 if (rhs.IsFpuRegister()) {
2052 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
2053 } else if (const_area != nullptr) {
2054 DCHECK(const_area->IsEmittedAtUseSite());
2055 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
2056 codegen_->LiteralDoubleAddress(
2057 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
2058 const_area->GetBaseMethodAddress(),
2059 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
2060 } else {
2061 DCHECK(rhs.IsDoubleStackSlot());
2062 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
2063 }
2064 } else {
2065 if (rhs.IsFpuRegister()) {
2066 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
2067 } else if (const_area != nullptr) {
2068 DCHECK(const_area->IsEmittedAtUseSite());
2069 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
2070 codegen_->LiteralFloatAddress(
2071 const_area->GetConstant()->AsFloatConstant()->GetValue(),
2072 const_area->GetBaseMethodAddress(),
2073 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
2074 } else {
2075 DCHECK(rhs.IsStackSlot());
2076 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
2077 }
2078 }
2079 }
2080
2081 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2082 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
2083 LabelType* true_target_in,
2084 LabelType* false_target_in) {
2085 // Generated branching requires both targets to be explicit. If either of the
2086 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2087 LabelType fallthrough_target;
2088 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2089 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2090
2091 LocationSummary* locations = condition->GetLocations();
2092 Location left = locations->InAt(0);
2093 Location right = locations->InAt(1);
2094
2095 DataType::Type type = condition->InputAt(0)->GetType();
2096 switch (type) {
2097 case DataType::Type::kInt64:
2098 GenerateLongComparesAndJumps(condition, true_target, false_target);
2099 break;
2100 case DataType::Type::kFloat32:
2101 GenerateFPCompare(left, right, condition, false);
2102 GenerateFPJumps(condition, true_target, false_target);
2103 break;
2104 case DataType::Type::kFloat64:
2105 GenerateFPCompare(left, right, condition, true);
2106 GenerateFPJumps(condition, true_target, false_target);
2107 break;
2108 default:
2109 LOG(FATAL) << "Unexpected compare type " << type;
2110 }
2111
2112 if (false_target != &fallthrough_target) {
2113 __ jmp(false_target);
2114 }
2115
2116 if (fallthrough_target.IsLinked()) {
2117 __ Bind(&fallthrough_target);
2118 }
2119 }
2120
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch,const CompilerOptions & compiler_options)2121 static bool AreEflagsSetFrom(HInstruction* cond,
2122 HInstruction* branch,
2123 const CompilerOptions& compiler_options) {
2124 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2125 // are set only strictly before `branch`. We can't use the eflags on long/FP
2126 // conditions if they are materialized due to the complex branching.
2127 return cond->IsCondition() &&
2128 cond->GetNext() == branch &&
2129 cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
2130 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
2131 !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() &&
2132 compiler_options.ProfileBranches());
2133 }
2134
2135 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2136 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
2137 size_t condition_input_index,
2138 LabelType* true_target,
2139 LabelType* false_target) {
2140 HInstruction* cond = instruction->InputAt(condition_input_index);
2141
2142 if (true_target == nullptr && false_target == nullptr) {
2143 // Nothing to do. The code always falls through.
2144 return;
2145 } else if (cond->IsIntConstant()) {
2146 // Constant condition, statically compared against "true" (integer value 1).
2147 if (cond->AsIntConstant()->IsTrue()) {
2148 if (true_target != nullptr) {
2149 __ jmp(true_target);
2150 }
2151 } else {
2152 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2153 if (false_target != nullptr) {
2154 __ jmp(false_target);
2155 }
2156 }
2157 return;
2158 }
2159
2160 // The following code generates these patterns:
2161 // (1) true_target == nullptr && false_target != nullptr
2162 // - opposite condition true => branch to false_target
2163 // (2) true_target != nullptr && false_target == nullptr
2164 // - condition true => branch to true_target
2165 // (3) true_target != nullptr && false_target != nullptr
2166 // - condition true => branch to true_target
2167 // - branch to false_target
2168 if (IsBooleanValueOrMaterializedCondition(cond)) {
2169 if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) {
2170 if (true_target == nullptr) {
2171 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
2172 } else {
2173 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
2174 }
2175 } else {
2176 // Materialized condition, compare against 0.
2177 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2178 if (lhs.IsRegister()) {
2179 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
2180 } else {
2181 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
2182 }
2183 if (true_target == nullptr) {
2184 __ j(kEqual, false_target);
2185 } else {
2186 __ j(kNotEqual, true_target);
2187 }
2188 }
2189 } else {
2190 // Condition has not been materialized, use its inputs as the comparison and
2191 // its condition as the branch condition.
2192 HCondition* condition = cond->AsCondition();
2193
2194 // If this is a long or FP comparison that has been folded into
2195 // the HCondition, generate the comparison directly.
2196 DataType::Type type = condition->InputAt(0)->GetType();
2197 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2198 GenerateCompareTestAndBranch(condition, true_target, false_target);
2199 return;
2200 }
2201
2202 Location lhs = condition->GetLocations()->InAt(0);
2203 Location rhs = condition->GetLocations()->InAt(1);
2204 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
2205 codegen_->GenerateIntCompare(lhs, rhs);
2206 if (true_target == nullptr) {
2207 __ j(X86Condition(condition->GetOppositeCondition()), false_target);
2208 } else {
2209 __ j(X86Condition(condition->GetCondition()), true_target);
2210 }
2211 }
2212
2213 // If neither branch falls through (case 3), the conditional branch to `true_target`
2214 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2215 if (true_target != nullptr && false_target != nullptr) {
2216 __ jmp(false_target);
2217 }
2218 }
2219
VisitIf(HIf * if_instr)2220 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
2221 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2222 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2223 if (GetGraph()->IsCompilingBaseline() &&
2224 codegen_->GetCompilerOptions().ProfileBranches() &&
2225 !Runtime::Current()->IsAotCompiler()) {
2226 locations->SetInAt(0, Location::RequiresRegister());
2227 locations->AddRegisterTemps(2);
2228 } else {
2229 locations->SetInAt(0, Location::Any());
2230 }
2231 }
2232 }
2233
VisitIf(HIf * if_instr)2234 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
2235 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2236 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2237 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2238 nullptr : codegen_->GetLabelOf(true_successor);
2239 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2240 nullptr : codegen_->GetLabelOf(false_successor);
2241 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2242 if (GetGraph()->IsCompilingBaseline() &&
2243 codegen_->GetCompilerOptions().ProfileBranches() &&
2244 !Runtime::Current()->IsAotCompiler()) {
2245 DCHECK(if_instr->InputAt(0)->IsCondition());
2246 Register temp = if_instr->GetLocations()->GetTemp(0).AsRegister<Register>();
2247 Register counter = if_instr->GetLocations()->GetTemp(1).AsRegister<Register>();
2248 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2249 DCHECK(info != nullptr);
2250 BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
2251 // Currently, not all If branches are profiled.
2252 if (cache != nullptr) {
2253 uint64_t address =
2254 reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
2255 static_assert(
2256 BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
2257 "Unexpected offsets for BranchCache");
2258 NearLabel done;
2259 Location lhs = if_instr->GetLocations()->InAt(0);
2260 __ movl(temp, Immediate(address));
2261 __ movzxw(counter, Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0));
2262 __ addw(counter, Immediate(1));
2263 __ j(kEqual, &done);
2264 __ movw(Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0), counter);
2265 __ Bind(&done);
2266 }
2267 }
2268 }
2269 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2270 }
2271
VisitDeoptimize(HDeoptimize * deoptimize)2272 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2273 LocationSummary* locations = new (GetGraph()->GetAllocator())
2274 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2275 InvokeRuntimeCallingConvention calling_convention;
2276 RegisterSet caller_saves = RegisterSet::Empty();
2277 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2278 locations->SetCustomSlowPathCallerSaves(caller_saves);
2279 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2280 locations->SetInAt(0, Location::Any());
2281 }
2282 }
2283
VisitDeoptimize(HDeoptimize * deoptimize)2284 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2285 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
2286 GenerateTestAndBranch<Label>(deoptimize,
2287 /* condition_input_index= */ 0,
2288 slow_path->GetEntryLabel(),
2289 /* false_target= */ nullptr);
2290 }
2291
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2292 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2293 LocationSummary* locations = new (GetGraph()->GetAllocator())
2294 LocationSummary(flag, LocationSummary::kNoCall);
2295 locations->SetOut(Location::RequiresRegister());
2296 }
2297
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2298 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2299 __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
2300 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2301 }
2302
SelectCanUseCMOV(HSelect * select)2303 static bool SelectCanUseCMOV(HSelect* select) {
2304 // There are no conditional move instructions for XMMs.
2305 if (DataType::IsFloatingPointType(select->GetType())) {
2306 return false;
2307 }
2308
2309 // A FP condition doesn't generate the single CC that we need.
2310 // In 32 bit mode, a long condition doesn't generate a single CC either.
2311 HInstruction* condition = select->GetCondition();
2312 if (condition->IsCondition()) {
2313 DataType::Type compare_type = condition->InputAt(0)->GetType();
2314 if (compare_type == DataType::Type::kInt64 ||
2315 DataType::IsFloatingPointType(compare_type)) {
2316 return false;
2317 }
2318 }
2319
2320 // We can generate a CMOV for this Select.
2321 return true;
2322 }
2323
VisitSelect(HSelect * select)2324 void LocationsBuilderX86::VisitSelect(HSelect* select) {
2325 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2326 if (DataType::IsFloatingPointType(select->GetType())) {
2327 locations->SetInAt(0, Location::RequiresFpuRegister());
2328 locations->SetInAt(1, Location::Any());
2329 } else {
2330 locations->SetInAt(0, Location::RequiresRegister());
2331 if (SelectCanUseCMOV(select)) {
2332 if (select->InputAt(1)->IsConstant()) {
2333 // Cmov can't handle a constant value.
2334 locations->SetInAt(1, Location::RequiresRegister());
2335 } else {
2336 locations->SetInAt(1, Location::Any());
2337 }
2338 } else {
2339 locations->SetInAt(1, Location::Any());
2340 }
2341 }
2342 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2343 locations->SetInAt(2, Location::RequiresRegister());
2344 }
2345 locations->SetOut(Location::SameAsFirstInput());
2346 }
2347
VisitSelect(HSelect * select)2348 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
2349 LocationSummary* locations = select->GetLocations();
2350 DCHECK(locations->InAt(0).Equals(locations->Out()));
2351 if (SelectCanUseCMOV(select)) {
2352 // If both the condition and the source types are integer, we can generate
2353 // a CMOV to implement Select.
2354
2355 HInstruction* select_condition = select->GetCondition();
2356 Condition cond = kNotEqual;
2357
2358 // Figure out how to test the 'condition'.
2359 if (select_condition->IsCondition()) {
2360 HCondition* condition = select_condition->AsCondition();
2361 if (!condition->IsEmittedAtUseSite()) {
2362 // This was a previously materialized condition.
2363 // Can we use the existing condition code?
2364 if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) {
2365 // Materialization was the previous instruction. Condition codes are right.
2366 cond = X86Condition(condition->GetCondition());
2367 } else {
2368 // No, we have to recreate the condition code.
2369 Register cond_reg = locations->InAt(2).AsRegister<Register>();
2370 __ testl(cond_reg, cond_reg);
2371 }
2372 } else {
2373 // We can't handle FP or long here.
2374 DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
2375 DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
2376 LocationSummary* cond_locations = condition->GetLocations();
2377 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
2378 cond = X86Condition(condition->GetCondition());
2379 }
2380 } else {
2381 // Must be a Boolean condition, which needs to be compared to 0.
2382 Register cond_reg = locations->InAt(2).AsRegister<Register>();
2383 __ testl(cond_reg, cond_reg);
2384 }
2385
2386 // If the condition is true, overwrite the output, which already contains false.
2387 Location false_loc = locations->InAt(0);
2388 Location true_loc = locations->InAt(1);
2389 if (select->GetType() == DataType::Type::kInt64) {
2390 // 64 bit conditional move.
2391 Register false_high = false_loc.AsRegisterPairHigh<Register>();
2392 Register false_low = false_loc.AsRegisterPairLow<Register>();
2393 if (true_loc.IsRegisterPair()) {
2394 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
2395 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
2396 } else {
2397 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
2398 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
2399 }
2400 } else {
2401 // 32 bit conditional move.
2402 Register false_reg = false_loc.AsRegister<Register>();
2403 if (true_loc.IsRegister()) {
2404 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
2405 } else {
2406 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
2407 }
2408 }
2409 } else {
2410 NearLabel false_target;
2411 GenerateTestAndBranch<NearLabel>(
2412 select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
2413 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2414 __ Bind(&false_target);
2415 }
2416 }
2417
VisitNop(HNop * nop)2418 void LocationsBuilderX86::VisitNop(HNop* nop) {
2419 new (GetGraph()->GetAllocator()) LocationSummary(nop);
2420 }
2421
VisitNop(HNop *)2422 void InstructionCodeGeneratorX86::VisitNop(HNop*) {
2423 // The environment recording already happened in CodeGenerator::Compile.
2424 }
2425
IncreaseFrame(size_t adjustment)2426 void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
2427 __ subl(ESP, Immediate(adjustment));
2428 __ cfi().AdjustCFAOffset(adjustment);
2429 }
2430
DecreaseFrame(size_t adjustment)2431 void CodeGeneratorX86::DecreaseFrame(size_t adjustment) {
2432 __ addl(ESP, Immediate(adjustment));
2433 __ cfi().AdjustCFAOffset(-adjustment);
2434 }
2435
GenerateNop()2436 void CodeGeneratorX86::GenerateNop() {
2437 __ nop();
2438 }
2439
HandleCondition(HCondition * cond)2440 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
2441 LocationSummary* locations =
2442 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2443 // Handle the long/FP comparisons made in instruction simplification.
2444 switch (cond->InputAt(0)->GetType()) {
2445 case DataType::Type::kInt64: {
2446 locations->SetInAt(0, Location::RequiresRegister());
2447 locations->SetInAt(1, Location::Any());
2448 if (!cond->IsEmittedAtUseSite()) {
2449 locations->SetOut(Location::RequiresRegister());
2450 }
2451 break;
2452 }
2453 case DataType::Type::kFloat32:
2454 case DataType::Type::kFloat64: {
2455 locations->SetInAt(0, Location::RequiresFpuRegister());
2456 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
2457 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
2458 } else if (cond->InputAt(1)->IsConstant()) {
2459 locations->SetInAt(1, Location::RequiresFpuRegister());
2460 } else {
2461 locations->SetInAt(1, Location::Any());
2462 }
2463 if (!cond->IsEmittedAtUseSite()) {
2464 locations->SetOut(Location::RequiresRegister());
2465 }
2466 break;
2467 }
2468 default:
2469 locations->SetInAt(0, Location::RequiresRegister());
2470 locations->SetInAt(1, Location::Any());
2471 if (!cond->IsEmittedAtUseSite()) {
2472 // We need a byte register.
2473 locations->SetOut(Location::RegisterLocation(ECX));
2474 }
2475 break;
2476 }
2477 }
2478
HandleCondition(HCondition * cond)2479 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
2480 if (cond->IsEmittedAtUseSite()) {
2481 return;
2482 }
2483
2484 LocationSummary* locations = cond->GetLocations();
2485 Location lhs = locations->InAt(0);
2486 Location rhs = locations->InAt(1);
2487 Register reg = locations->Out().AsRegister<Register>();
2488 NearLabel true_label, false_label;
2489
2490 switch (cond->InputAt(0)->GetType()) {
2491 default: {
2492 // Integer case.
2493
2494 // Clear output register: setb only sets the low byte.
2495 __ xorl(reg, reg);
2496 codegen_->GenerateIntCompare(lhs, rhs);
2497 __ setb(X86Condition(cond->GetCondition()), reg);
2498 return;
2499 }
2500 case DataType::Type::kInt64:
2501 GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2502 break;
2503 case DataType::Type::kFloat32:
2504 GenerateFPCompare(lhs, rhs, cond, false);
2505 GenerateFPJumps(cond, &true_label, &false_label);
2506 break;
2507 case DataType::Type::kFloat64:
2508 GenerateFPCompare(lhs, rhs, cond, true);
2509 GenerateFPJumps(cond, &true_label, &false_label);
2510 break;
2511 }
2512
2513 // Convert the jumps into the result.
2514 NearLabel done_label;
2515
2516 // False case: result = 0.
2517 __ Bind(&false_label);
2518 __ xorl(reg, reg);
2519 __ jmp(&done_label);
2520
2521 // True case: result = 1.
2522 __ Bind(&true_label);
2523 __ movl(reg, Immediate(1));
2524 __ Bind(&done_label);
2525 }
2526
VisitEqual(HEqual * comp)2527 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2528 HandleCondition(comp);
2529 }
2530
VisitEqual(HEqual * comp)2531 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2532 HandleCondition(comp);
2533 }
2534
VisitNotEqual(HNotEqual * comp)2535 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2536 HandleCondition(comp);
2537 }
2538
VisitNotEqual(HNotEqual * comp)2539 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2540 HandleCondition(comp);
2541 }
2542
VisitLessThan(HLessThan * comp)2543 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2544 HandleCondition(comp);
2545 }
2546
VisitLessThan(HLessThan * comp)2547 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2548 HandleCondition(comp);
2549 }
2550
VisitLessThanOrEqual(HLessThanOrEqual * comp)2551 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2552 HandleCondition(comp);
2553 }
2554
VisitLessThanOrEqual(HLessThanOrEqual * comp)2555 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2556 HandleCondition(comp);
2557 }
2558
VisitGreaterThan(HGreaterThan * comp)2559 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2560 HandleCondition(comp);
2561 }
2562
VisitGreaterThan(HGreaterThan * comp)2563 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2564 HandleCondition(comp);
2565 }
2566
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2567 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2568 HandleCondition(comp);
2569 }
2570
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2571 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2572 HandleCondition(comp);
2573 }
2574
VisitBelow(HBelow * comp)2575 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2576 HandleCondition(comp);
2577 }
2578
VisitBelow(HBelow * comp)2579 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2580 HandleCondition(comp);
2581 }
2582
VisitBelowOrEqual(HBelowOrEqual * comp)2583 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2584 HandleCondition(comp);
2585 }
2586
VisitBelowOrEqual(HBelowOrEqual * comp)2587 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2588 HandleCondition(comp);
2589 }
2590
VisitAbove(HAbove * comp)2591 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2592 HandleCondition(comp);
2593 }
2594
VisitAbove(HAbove * comp)2595 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2596 HandleCondition(comp);
2597 }
2598
VisitAboveOrEqual(HAboveOrEqual * comp)2599 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2600 HandleCondition(comp);
2601 }
2602
VisitAboveOrEqual(HAboveOrEqual * comp)2603 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2604 HandleCondition(comp);
2605 }
2606
VisitIntConstant(HIntConstant * constant)2607 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2608 LocationSummary* locations =
2609 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2610 locations->SetOut(Location::ConstantLocation(constant));
2611 }
2612
VisitIntConstant(HIntConstant * constant)2613 void InstructionCodeGeneratorX86::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
2614 // Will be generated at use site.
2615 }
2616
VisitNullConstant(HNullConstant * constant)2617 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2618 LocationSummary* locations =
2619 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2620 locations->SetOut(Location::ConstantLocation(constant));
2621 }
2622
VisitNullConstant(HNullConstant * constant)2623 void InstructionCodeGeneratorX86::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
2624 // Will be generated at use site.
2625 }
2626
VisitLongConstant(HLongConstant * constant)2627 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2628 LocationSummary* locations =
2629 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2630 locations->SetOut(Location::ConstantLocation(constant));
2631 }
2632
VisitLongConstant(HLongConstant * constant)2633 void InstructionCodeGeneratorX86::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
2634 // Will be generated at use site.
2635 }
2636
VisitFloatConstant(HFloatConstant * constant)2637 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2638 LocationSummary* locations =
2639 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2640 locations->SetOut(Location::ConstantLocation(constant));
2641 }
2642
VisitFloatConstant(HFloatConstant * constant)2643 void InstructionCodeGeneratorX86::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
2644 // Will be generated at use site.
2645 }
2646
VisitDoubleConstant(HDoubleConstant * constant)2647 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2648 LocationSummary* locations =
2649 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2650 locations->SetOut(Location::ConstantLocation(constant));
2651 }
2652
VisitDoubleConstant(HDoubleConstant * constant)2653 void InstructionCodeGeneratorX86::VisitDoubleConstant([[maybe_unused]] HDoubleConstant* constant) {
2654 // Will be generated at use site.
2655 }
2656
VisitConstructorFence(HConstructorFence * constructor_fence)2657 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2658 constructor_fence->SetLocations(nullptr);
2659 }
2660
VisitConstructorFence(HConstructorFence * constructor_fence)2661 void InstructionCodeGeneratorX86::VisitConstructorFence(
2662 [[maybe_unused]] HConstructorFence* constructor_fence) {
2663 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2664 }
2665
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2666 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2667 memory_barrier->SetLocations(nullptr);
2668 }
2669
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2670 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2671 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2672 }
2673
VisitReturnVoid(HReturnVoid * ret)2674 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2675 ret->SetLocations(nullptr);
2676 }
2677
VisitReturnVoid(HReturnVoid * ret)2678 void InstructionCodeGeneratorX86::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
2679 codegen_->GenerateFrameExit();
2680 }
2681
VisitReturn(HReturn * ret)2682 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2683 LocationSummary* locations =
2684 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2685 SetInForReturnValue(ret, locations);
2686 }
2687
VisitReturn(HReturn * ret)2688 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2689 switch (ret->InputAt(0)->GetType()) {
2690 case DataType::Type::kReference:
2691 case DataType::Type::kBool:
2692 case DataType::Type::kUint8:
2693 case DataType::Type::kInt8:
2694 case DataType::Type::kUint16:
2695 case DataType::Type::kInt16:
2696 case DataType::Type::kInt32:
2697 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2698 break;
2699
2700 case DataType::Type::kInt64:
2701 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2702 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2703 break;
2704
2705 case DataType::Type::kFloat32:
2706 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2707 if (GetGraph()->IsCompilingOsr()) {
2708 // To simplify callers of an OSR method, we put the return value in both
2709 // floating point and core registers.
2710 __ movd(EAX, XMM0);
2711 }
2712 break;
2713
2714 case DataType::Type::kFloat64:
2715 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2716 if (GetGraph()->IsCompilingOsr()) {
2717 // To simplify callers of an OSR method, we put the return value in both
2718 // floating point and core registers.
2719 __ movd(EAX, XMM0);
2720 // Use XMM1 as temporary register to not clobber XMM0.
2721 __ movaps(XMM1, XMM0);
2722 __ psrlq(XMM1, Immediate(32));
2723 __ movd(EDX, XMM1);
2724 }
2725 break;
2726
2727 default:
2728 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2729 }
2730 codegen_->GenerateFrameExit();
2731 }
2732
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2733 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2734 // The trampoline uses the same calling convention as dex calling conventions,
2735 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2736 // the method_idx.
2737 HandleInvoke(invoke);
2738 }
2739
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2740 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2741 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2742 }
2743
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2744 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2745 // Explicit clinit checks triggered by static invokes must have been pruned by
2746 // art::PrepareForRegisterAllocation.
2747 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2748
2749 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2750 if (intrinsic.TryDispatch(invoke)) {
2751 if (invoke->GetLocations()->CanCall() &&
2752 invoke->HasPcRelativeMethodLoadKind() &&
2753 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2754 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2755 }
2756 return;
2757 }
2758
2759 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2760 CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
2761 /*for_register_allocation=*/ true);
2762 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2763 } else {
2764 HandleInvoke(invoke);
2765 }
2766
2767 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2768 if (invoke->HasPcRelativeMethodLoadKind()) {
2769 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2770 }
2771 }
2772
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2773 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2774 if (invoke->GetLocations()->Intrinsified()) {
2775 IntrinsicCodeGeneratorX86 intrinsic(codegen);
2776 intrinsic.Dispatch(invoke);
2777 return true;
2778 }
2779 return false;
2780 }
2781
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2782 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2783 // Explicit clinit checks triggered by static invokes must have been pruned by
2784 // art::PrepareForRegisterAllocation.
2785 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2786
2787 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2788 return;
2789 }
2790
2791 LocationSummary* locations = invoke->GetLocations();
2792 codegen_->GenerateStaticOrDirectCall(
2793 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2794 }
2795
VisitInvokeVirtual(HInvokeVirtual * invoke)2796 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2797 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2798 if (intrinsic.TryDispatch(invoke)) {
2799 return;
2800 }
2801
2802 HandleInvoke(invoke);
2803
2804 if (ProfilingInfoBuilder::IsInlineCacheUseful(invoke, codegen_)) {
2805 // Add one temporary for inline cache update.
2806 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2807 }
2808 }
2809
HandleInvoke(HInvoke * invoke)2810 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2811 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2812 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2813 }
2814
VisitInvokeVirtual(HInvokeVirtual * invoke)2815 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2816 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2817 return;
2818 }
2819
2820 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2821 DCHECK(!codegen_->IsLeafMethod());
2822 }
2823
VisitInvokeInterface(HInvokeInterface * invoke)2824 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2825 // This call to HandleInvoke allocates a temporary (core) register
2826 // which is also used to transfer the hidden argument from FP to
2827 // core register.
2828 HandleInvoke(invoke);
2829 // Add the hidden argument.
2830 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2831
2832 if (ProfilingInfoBuilder::IsInlineCacheUseful(invoke, codegen_)) {
2833 // Add one temporary for inline cache update.
2834 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2835 }
2836
2837 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2838 if (IsPcRelativeMethodLoadKind(invoke->GetHiddenArgumentLoadKind())) {
2839 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2840 }
2841
2842 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2843 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2844 Location::RequiresRegister());
2845 }
2846 }
2847
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2848 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2849 DCHECK_EQ(EAX, klass);
2850 if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
2851 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2852 DCHECK(info != nullptr);
2853 InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
2854 info, GetCompilerOptions(), instruction->AsInvoke());
2855 if (cache != nullptr) {
2856 uint32_t address = reinterpret_cast32<uint32_t>(cache);
2857 if (kIsDebugBuild) {
2858 uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2859 CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2860 }
2861 Register temp = EBP;
2862 NearLabel done;
2863 __ movl(temp, Immediate(address));
2864 // Fast path for a monomorphic cache.
2865 __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2866 __ j(kEqual, &done);
2867 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2868 __ Bind(&done);
2869 } else {
2870 // This is unexpected, but we don't guarantee stable compilation across
2871 // JIT runs so just warn about it.
2872 ScopedObjectAccess soa(Thread::Current());
2873 LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
2874 }
2875 }
2876 }
2877
VisitInvokeInterface(HInvokeInterface * invoke)2878 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2879 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2880 LocationSummary* locations = invoke->GetLocations();
2881 Register temp = locations->GetTemp(0).AsRegister<Register>();
2882 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2883 Location receiver = locations->InAt(0);
2884 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2885
2886 // Set the hidden argument. This is safe to do this here, as XMM7
2887 // won't be modified thereafter, before the `call` instruction.
2888 DCHECK_EQ(XMM7, hidden_reg);
2889 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2890 __ movd(hidden_reg, locations->InAt(invoke->GetNumberOfArguments() - 1).AsRegister<Register>());
2891 } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2892 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), locations->GetTemp(0), invoke);
2893 __ movd(hidden_reg, temp);
2894 }
2895
2896 if (receiver.IsStackSlot()) {
2897 __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2898 // /* HeapReference<Class> */ temp = temp->klass_
2899 __ movl(temp, Address(temp, class_offset));
2900 } else {
2901 // /* HeapReference<Class> */ temp = receiver->klass_
2902 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2903 }
2904 codegen_->MaybeRecordImplicitNullCheck(invoke);
2905 // Instead of simply (possibly) unpoisoning `temp` here, we should
2906 // emit a read barrier for the previous class reference load.
2907 // However this is not required in practice, as this is an
2908 // intermediate/temporary reference and because the current
2909 // concurrent copying collector keeps the from-space memory
2910 // intact/accessible until the end of the marking phase (the
2911 // concurrent copying collector may not in the future).
2912 __ MaybeUnpoisonHeapReference(temp);
2913
2914 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2915
2916 // temp = temp->GetAddressOfIMT()
2917 __ movl(temp,
2918 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2919 // temp = temp->GetImtEntryAt(method_offset);
2920 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2921 invoke->GetImtIndex(), kX86PointerSize));
2922 __ movl(temp, Address(temp, method_offset));
2923 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2924 // We pass the method from the IMT in case of a conflict. This will ensure
2925 // we go into the runtime to resolve the actual method.
2926 __ movd(hidden_reg, temp);
2927 }
2928 // call temp->GetEntryPoint();
2929 __ call(Address(temp,
2930 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2931
2932 DCHECK(!codegen_->IsLeafMethod());
2933 codegen_->RecordPcInfo(invoke);
2934 }
2935
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2936 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2937 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2938 if (intrinsic.TryDispatch(invoke)) {
2939 return;
2940 }
2941 HandleInvoke(invoke);
2942 }
2943
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2944 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2945 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2946 return;
2947 }
2948 codegen_->GenerateInvokePolymorphicCall(invoke);
2949 }
2950
VisitInvokeCustom(HInvokeCustom * invoke)2951 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2952 HandleInvoke(invoke);
2953 }
2954
VisitInvokeCustom(HInvokeCustom * invoke)2955 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2956 codegen_->GenerateInvokeCustomCall(invoke);
2957 }
2958
VisitNeg(HNeg * neg)2959 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2960 LocationSummary* locations =
2961 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2962 switch (neg->GetResultType()) {
2963 case DataType::Type::kInt32:
2964 case DataType::Type::kInt64:
2965 locations->SetInAt(0, Location::RequiresRegister());
2966 locations->SetOut(Location::SameAsFirstInput());
2967 break;
2968
2969 case DataType::Type::kFloat32:
2970 locations->SetInAt(0, Location::RequiresFpuRegister());
2971 locations->SetOut(Location::SameAsFirstInput());
2972 locations->AddTemp(Location::RequiresRegister());
2973 locations->AddTemp(Location::RequiresFpuRegister());
2974 break;
2975
2976 case DataType::Type::kFloat64:
2977 locations->SetInAt(0, Location::RequiresFpuRegister());
2978 locations->SetOut(Location::SameAsFirstInput());
2979 locations->AddTemp(Location::RequiresFpuRegister());
2980 break;
2981
2982 default:
2983 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2984 }
2985 }
2986
VisitNeg(HNeg * neg)2987 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2988 LocationSummary* locations = neg->GetLocations();
2989 Location out = locations->Out();
2990 Location in = locations->InAt(0);
2991 switch (neg->GetResultType()) {
2992 case DataType::Type::kInt32:
2993 DCHECK(in.IsRegister());
2994 DCHECK(in.Equals(out));
2995 __ negl(out.AsRegister<Register>());
2996 break;
2997
2998 case DataType::Type::kInt64:
2999 DCHECK(in.IsRegisterPair());
3000 DCHECK(in.Equals(out));
3001 __ negl(out.AsRegisterPairLow<Register>());
3002 // Negation is similar to subtraction from zero. The least
3003 // significant byte triggers a borrow when it is different from
3004 // zero; to take it into account, add 1 to the most significant
3005 // byte if the carry flag (CF) is set to 1 after the first NEGL
3006 // operation.
3007 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
3008 __ negl(out.AsRegisterPairHigh<Register>());
3009 break;
3010
3011 case DataType::Type::kFloat32: {
3012 DCHECK(in.Equals(out));
3013 Register constant = locations->GetTemp(0).AsRegister<Register>();
3014 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
3015 // Implement float negation with an exclusive or with value
3016 // 0x80000000 (mask for bit 31, representing the sign of a
3017 // single-precision floating-point number).
3018 __ movl(constant, Immediate(INT32_C(0x80000000)));
3019 __ movd(mask, constant);
3020 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3021 break;
3022 }
3023
3024 case DataType::Type::kFloat64: {
3025 DCHECK(in.Equals(out));
3026 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3027 // Implement double negation with an exclusive or with value
3028 // 0x8000000000000000 (mask for bit 63, representing the sign of
3029 // a double-precision floating-point number).
3030 __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
3031 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3032 break;
3033 }
3034
3035 default:
3036 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3037 }
3038 }
3039
VisitX86FPNeg(HX86FPNeg * neg)3040 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
3041 LocationSummary* locations =
3042 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3043 DCHECK(DataType::IsFloatingPointType(neg->GetType()));
3044 locations->SetInAt(0, Location::RequiresFpuRegister());
3045 locations->SetInAt(1, Location::RequiresRegister());
3046 locations->SetOut(Location::SameAsFirstInput());
3047 locations->AddTemp(Location::RequiresFpuRegister());
3048 }
3049
VisitX86FPNeg(HX86FPNeg * neg)3050 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
3051 LocationSummary* locations = neg->GetLocations();
3052 Location out = locations->Out();
3053 DCHECK(locations->InAt(0).Equals(out));
3054
3055 Register constant_area = locations->InAt(1).AsRegister<Register>();
3056 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3057 if (neg->GetType() == DataType::Type::kFloat32) {
3058 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
3059 neg->GetBaseMethodAddress(),
3060 constant_area));
3061 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3062 } else {
3063 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
3064 neg->GetBaseMethodAddress(),
3065 constant_area));
3066 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3067 }
3068 }
3069
VisitTypeConversion(HTypeConversion * conversion)3070 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
3071 DataType::Type result_type = conversion->GetResultType();
3072 DataType::Type input_type = conversion->GetInputType();
3073 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3074 << input_type << " -> " << result_type;
3075
3076 // The float-to-long and double-to-long type conversions rely on a
3077 // call to the runtime.
3078 LocationSummary::CallKind call_kind =
3079 ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
3080 && result_type == DataType::Type::kInt64)
3081 ? LocationSummary::kCallOnMainOnly
3082 : LocationSummary::kNoCall;
3083 LocationSummary* locations =
3084 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
3085
3086 switch (result_type) {
3087 case DataType::Type::kUint8:
3088 case DataType::Type::kInt8:
3089 switch (input_type) {
3090 case DataType::Type::kUint8:
3091 case DataType::Type::kInt8:
3092 case DataType::Type::kUint16:
3093 case DataType::Type::kInt16:
3094 case DataType::Type::kInt32:
3095 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
3096 // Make the output overlap to please the register allocator. This greatly simplifies
3097 // the validation of the linear scan implementation
3098 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3099 break;
3100 case DataType::Type::kInt64: {
3101 HInstruction* input = conversion->InputAt(0);
3102 Location input_location = input->IsConstant()
3103 ? Location::ConstantLocation(input)
3104 : Location::RegisterPairLocation(EAX, EDX);
3105 locations->SetInAt(0, input_location);
3106 // Make the output overlap to please the register allocator. This greatly simplifies
3107 // the validation of the linear scan implementation
3108 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3109 break;
3110 }
3111
3112 default:
3113 LOG(FATAL) << "Unexpected type conversion from " << input_type
3114 << " to " << result_type;
3115 }
3116 break;
3117
3118 case DataType::Type::kUint16:
3119 case DataType::Type::kInt16:
3120 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3121 locations->SetInAt(0, Location::Any());
3122 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3123 break;
3124
3125 case DataType::Type::kInt32:
3126 switch (input_type) {
3127 case DataType::Type::kInt64:
3128 locations->SetInAt(0, Location::Any());
3129 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3130 break;
3131
3132 case DataType::Type::kFloat32:
3133 locations->SetInAt(0, Location::RequiresFpuRegister());
3134 locations->SetOut(Location::RequiresRegister());
3135 locations->AddTemp(Location::RequiresFpuRegister());
3136 break;
3137
3138 case DataType::Type::kFloat64:
3139 locations->SetInAt(0, Location::RequiresFpuRegister());
3140 locations->SetOut(Location::RequiresRegister());
3141 locations->AddTemp(Location::RequiresFpuRegister());
3142 break;
3143
3144 default:
3145 LOG(FATAL) << "Unexpected type conversion from " << input_type
3146 << " to " << result_type;
3147 }
3148 break;
3149
3150 case DataType::Type::kInt64:
3151 switch (input_type) {
3152 case DataType::Type::kBool:
3153 case DataType::Type::kUint8:
3154 case DataType::Type::kInt8:
3155 case DataType::Type::kUint16:
3156 case DataType::Type::kInt16:
3157 case DataType::Type::kInt32:
3158 locations->SetInAt(0, Location::RegisterLocation(EAX));
3159 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3160 break;
3161
3162 case DataType::Type::kFloat32:
3163 case DataType::Type::kFloat64: {
3164 InvokeRuntimeCallingConvention calling_convention;
3165 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
3166 locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
3167
3168 // The runtime helper puts the result in EAX, EDX.
3169 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3170 }
3171 break;
3172
3173 default:
3174 LOG(FATAL) << "Unexpected type conversion from " << input_type
3175 << " to " << result_type;
3176 }
3177 break;
3178
3179 case DataType::Type::kFloat32:
3180 switch (input_type) {
3181 case DataType::Type::kBool:
3182 case DataType::Type::kUint8:
3183 case DataType::Type::kInt8:
3184 case DataType::Type::kUint16:
3185 case DataType::Type::kInt16:
3186 case DataType::Type::kInt32:
3187 locations->SetInAt(0, Location::RequiresRegister());
3188 locations->SetOut(Location::RequiresFpuRegister());
3189 break;
3190
3191 case DataType::Type::kInt64:
3192 locations->SetInAt(0, Location::Any());
3193 locations->SetOut(Location::Any());
3194 break;
3195
3196 case DataType::Type::kFloat64:
3197 locations->SetInAt(0, Location::RequiresFpuRegister());
3198 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3199 break;
3200
3201 default:
3202 LOG(FATAL) << "Unexpected type conversion from " << input_type
3203 << " to " << result_type;
3204 }
3205 break;
3206
3207 case DataType::Type::kFloat64:
3208 switch (input_type) {
3209 case DataType::Type::kBool:
3210 case DataType::Type::kUint8:
3211 case DataType::Type::kInt8:
3212 case DataType::Type::kUint16:
3213 case DataType::Type::kInt16:
3214 case DataType::Type::kInt32:
3215 locations->SetInAt(0, Location::RequiresRegister());
3216 locations->SetOut(Location::RequiresFpuRegister());
3217 break;
3218
3219 case DataType::Type::kInt64:
3220 locations->SetInAt(0, Location::Any());
3221 locations->SetOut(Location::Any());
3222 break;
3223
3224 case DataType::Type::kFloat32:
3225 locations->SetInAt(0, Location::RequiresFpuRegister());
3226 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3227 break;
3228
3229 default:
3230 LOG(FATAL) << "Unexpected type conversion from " << input_type
3231 << " to " << result_type;
3232 }
3233 break;
3234
3235 default:
3236 LOG(FATAL) << "Unexpected type conversion from " << input_type
3237 << " to " << result_type;
3238 }
3239 }
3240
VisitTypeConversion(HTypeConversion * conversion)3241 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
3242 LocationSummary* locations = conversion->GetLocations();
3243 Location out = locations->Out();
3244 Location in = locations->InAt(0);
3245 DataType::Type result_type = conversion->GetResultType();
3246 DataType::Type input_type = conversion->GetInputType();
3247 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3248 << input_type << " -> " << result_type;
3249 switch (result_type) {
3250 case DataType::Type::kUint8:
3251 switch (input_type) {
3252 case DataType::Type::kInt8:
3253 case DataType::Type::kUint16:
3254 case DataType::Type::kInt16:
3255 case DataType::Type::kInt32:
3256 if (in.IsRegister()) {
3257 __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3258 } else {
3259 DCHECK(in.GetConstant()->IsIntConstant());
3260 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3261 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3262 }
3263 break;
3264 case DataType::Type::kInt64:
3265 if (in.IsRegisterPair()) {
3266 __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3267 } else {
3268 DCHECK(in.GetConstant()->IsLongConstant());
3269 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3270 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3271 }
3272 break;
3273
3274 default:
3275 LOG(FATAL) << "Unexpected type conversion from " << input_type
3276 << " to " << result_type;
3277 }
3278 break;
3279
3280 case DataType::Type::kInt8:
3281 switch (input_type) {
3282 case DataType::Type::kUint8:
3283 case DataType::Type::kUint16:
3284 case DataType::Type::kInt16:
3285 case DataType::Type::kInt32:
3286 if (in.IsRegister()) {
3287 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3288 } else {
3289 DCHECK(in.GetConstant()->IsIntConstant());
3290 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3291 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3292 }
3293 break;
3294 case DataType::Type::kInt64:
3295 if (in.IsRegisterPair()) {
3296 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3297 } else {
3298 DCHECK(in.GetConstant()->IsLongConstant());
3299 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3300 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3301 }
3302 break;
3303
3304 default:
3305 LOG(FATAL) << "Unexpected type conversion from " << input_type
3306 << " to " << result_type;
3307 }
3308 break;
3309
3310 case DataType::Type::kUint16:
3311 switch (input_type) {
3312 case DataType::Type::kInt8:
3313 case DataType::Type::kInt16:
3314 case DataType::Type::kInt32:
3315 if (in.IsRegister()) {
3316 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3317 } else if (in.IsStackSlot()) {
3318 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3319 } else {
3320 DCHECK(in.GetConstant()->IsIntConstant());
3321 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3322 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3323 }
3324 break;
3325 case DataType::Type::kInt64:
3326 if (in.IsRegisterPair()) {
3327 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3328 } else if (in.IsDoubleStackSlot()) {
3329 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3330 } else {
3331 DCHECK(in.GetConstant()->IsLongConstant());
3332 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3333 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3334 }
3335 break;
3336
3337 default:
3338 LOG(FATAL) << "Unexpected type conversion from " << input_type
3339 << " to " << result_type;
3340 }
3341 break;
3342
3343 case DataType::Type::kInt16:
3344 switch (input_type) {
3345 case DataType::Type::kUint16:
3346 case DataType::Type::kInt32:
3347 if (in.IsRegister()) {
3348 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3349 } else if (in.IsStackSlot()) {
3350 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3351 } else {
3352 DCHECK(in.GetConstant()->IsIntConstant());
3353 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3354 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3355 }
3356 break;
3357 case DataType::Type::kInt64:
3358 if (in.IsRegisterPair()) {
3359 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3360 } else if (in.IsDoubleStackSlot()) {
3361 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3362 } else {
3363 DCHECK(in.GetConstant()->IsLongConstant());
3364 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3365 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3366 }
3367 break;
3368
3369 default:
3370 LOG(FATAL) << "Unexpected type conversion from " << input_type
3371 << " to " << result_type;
3372 }
3373 break;
3374
3375 case DataType::Type::kInt32:
3376 switch (input_type) {
3377 case DataType::Type::kInt64:
3378 if (in.IsRegisterPair()) {
3379 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3380 } else if (in.IsDoubleStackSlot()) {
3381 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3382 } else {
3383 DCHECK(in.IsConstant());
3384 DCHECK(in.GetConstant()->IsLongConstant());
3385 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3386 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
3387 }
3388 break;
3389
3390 case DataType::Type::kFloat32: {
3391 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3392 Register output = out.AsRegister<Register>();
3393 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3394 NearLabel done, nan;
3395
3396 __ movl(output, Immediate(kPrimIntMax));
3397 // temp = int-to-float(output)
3398 __ cvtsi2ss(temp, output);
3399 // if input >= temp goto done
3400 __ comiss(input, temp);
3401 __ j(kAboveEqual, &done);
3402 // if input == NaN goto nan
3403 __ j(kUnordered, &nan);
3404 // output = float-to-int-truncate(input)
3405 __ cvttss2si(output, input);
3406 __ jmp(&done);
3407 __ Bind(&nan);
3408 // output = 0
3409 __ xorl(output, output);
3410 __ Bind(&done);
3411 break;
3412 }
3413
3414 case DataType::Type::kFloat64: {
3415 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3416 Register output = out.AsRegister<Register>();
3417 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3418 NearLabel done, nan;
3419
3420 __ movl(output, Immediate(kPrimIntMax));
3421 // temp = int-to-double(output)
3422 __ cvtsi2sd(temp, output);
3423 // if input >= temp goto done
3424 __ comisd(input, temp);
3425 __ j(kAboveEqual, &done);
3426 // if input == NaN goto nan
3427 __ j(kUnordered, &nan);
3428 // output = double-to-int-truncate(input)
3429 __ cvttsd2si(output, input);
3430 __ jmp(&done);
3431 __ Bind(&nan);
3432 // output = 0
3433 __ xorl(output, output);
3434 __ Bind(&done);
3435 break;
3436 }
3437
3438 default:
3439 LOG(FATAL) << "Unexpected type conversion from " << input_type
3440 << " to " << result_type;
3441 }
3442 break;
3443
3444 case DataType::Type::kInt64:
3445 switch (input_type) {
3446 case DataType::Type::kBool:
3447 case DataType::Type::kUint8:
3448 case DataType::Type::kInt8:
3449 case DataType::Type::kUint16:
3450 case DataType::Type::kInt16:
3451 case DataType::Type::kInt32:
3452 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
3453 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
3454 DCHECK_EQ(in.AsRegister<Register>(), EAX);
3455 __ cdq();
3456 break;
3457
3458 case DataType::Type::kFloat32:
3459 codegen_->InvokeRuntime(kQuickF2l, conversion);
3460 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3461 break;
3462
3463 case DataType::Type::kFloat64:
3464 codegen_->InvokeRuntime(kQuickD2l, conversion);
3465 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3466 break;
3467
3468 default:
3469 LOG(FATAL) << "Unexpected type conversion from " << input_type
3470 << " to " << result_type;
3471 }
3472 break;
3473
3474 case DataType::Type::kFloat32:
3475 switch (input_type) {
3476 case DataType::Type::kBool:
3477 case DataType::Type::kUint8:
3478 case DataType::Type::kInt8:
3479 case DataType::Type::kUint16:
3480 case DataType::Type::kInt16:
3481 case DataType::Type::kInt32:
3482 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3483 break;
3484
3485 case DataType::Type::kInt64: {
3486 size_t adjustment = 0;
3487
3488 // Create stack space for the call to
3489 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
3490 // TODO: enhance register allocator to ask for stack temporaries.
3491 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
3492 adjustment = DataType::Size(DataType::Type::kInt64);
3493 codegen_->IncreaseFrame(adjustment);
3494 }
3495
3496 // Load the value to the FP stack, using temporaries if needed.
3497 PushOntoFPStack(in, 0, adjustment, false, true);
3498
3499 if (out.IsStackSlot()) {
3500 __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
3501 } else {
3502 __ fstps(Address(ESP, 0));
3503 Location stack_temp = Location::StackSlot(0);
3504 codegen_->Move32(out, stack_temp);
3505 }
3506
3507 // Remove the temporary stack space we allocated.
3508 if (adjustment != 0) {
3509 codegen_->DecreaseFrame(adjustment);
3510 }
3511 break;
3512 }
3513
3514 case DataType::Type::kFloat64:
3515 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3516 break;
3517
3518 default:
3519 LOG(FATAL) << "Unexpected type conversion from " << input_type
3520 << " to " << result_type;
3521 }
3522 break;
3523
3524 case DataType::Type::kFloat64:
3525 switch (input_type) {
3526 case DataType::Type::kBool:
3527 case DataType::Type::kUint8:
3528 case DataType::Type::kInt8:
3529 case DataType::Type::kUint16:
3530 case DataType::Type::kInt16:
3531 case DataType::Type::kInt32:
3532 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3533 break;
3534
3535 case DataType::Type::kInt64: {
3536 size_t adjustment = 0;
3537
3538 // Create stack space for the call to
3539 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3540 // TODO: enhance register allocator to ask for stack temporaries.
3541 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3542 adjustment = DataType::Size(DataType::Type::kInt64);
3543 codegen_->IncreaseFrame(adjustment);
3544 }
3545
3546 // Load the value to the FP stack, using temporaries if needed.
3547 PushOntoFPStack(in, 0, adjustment, false, true);
3548
3549 if (out.IsDoubleStackSlot()) {
3550 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3551 } else {
3552 __ fstpl(Address(ESP, 0));
3553 Location stack_temp = Location::DoubleStackSlot(0);
3554 codegen_->Move64(out, stack_temp);
3555 }
3556
3557 // Remove the temporary stack space we allocated.
3558 if (adjustment != 0) {
3559 codegen_->DecreaseFrame(adjustment);
3560 }
3561 break;
3562 }
3563
3564 case DataType::Type::kFloat32:
3565 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3566 break;
3567
3568 default:
3569 LOG(FATAL) << "Unexpected type conversion from " << input_type
3570 << " to " << result_type;
3571 }
3572 break;
3573
3574 default:
3575 LOG(FATAL) << "Unexpected type conversion from " << input_type
3576 << " to " << result_type;
3577 }
3578 }
3579
VisitAdd(HAdd * add)3580 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3581 LocationSummary* locations =
3582 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3583 switch (add->GetResultType()) {
3584 case DataType::Type::kInt32: {
3585 locations->SetInAt(0, Location::RequiresRegister());
3586 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3587 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3588 break;
3589 }
3590
3591 case DataType::Type::kInt64: {
3592 locations->SetInAt(0, Location::RequiresRegister());
3593 locations->SetInAt(1, Location::Any());
3594 locations->SetOut(Location::SameAsFirstInput());
3595 break;
3596 }
3597
3598 case DataType::Type::kFloat32:
3599 case DataType::Type::kFloat64: {
3600 locations->SetInAt(0, Location::RequiresFpuRegister());
3601 if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3602 DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3603 } else if (add->InputAt(1)->IsConstant()) {
3604 locations->SetInAt(1, Location::RequiresFpuRegister());
3605 } else {
3606 locations->SetInAt(1, Location::Any());
3607 }
3608 locations->SetOut(Location::SameAsFirstInput());
3609 break;
3610 }
3611
3612 default:
3613 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3614 UNREACHABLE();
3615 }
3616 }
3617
VisitAdd(HAdd * add)3618 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3619 LocationSummary* locations = add->GetLocations();
3620 Location first = locations->InAt(0);
3621 Location second = locations->InAt(1);
3622 Location out = locations->Out();
3623
3624 switch (add->GetResultType()) {
3625 case DataType::Type::kInt32: {
3626 if (second.IsRegister()) {
3627 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3628 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3629 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3630 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3631 } else {
3632 __ leal(out.AsRegister<Register>(), Address(
3633 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3634 }
3635 } else if (second.IsConstant()) {
3636 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3637 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3638 __ addl(out.AsRegister<Register>(), Immediate(value));
3639 } else {
3640 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3641 }
3642 } else {
3643 DCHECK(first.Equals(locations->Out()));
3644 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3645 }
3646 break;
3647 }
3648
3649 case DataType::Type::kInt64: {
3650 if (second.IsRegisterPair()) {
3651 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3652 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3653 } else if (second.IsDoubleStackSlot()) {
3654 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3655 __ adcl(first.AsRegisterPairHigh<Register>(),
3656 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3657 } else {
3658 DCHECK(second.IsConstant()) << second;
3659 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3660 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3661 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3662 }
3663 break;
3664 }
3665
3666 case DataType::Type::kFloat32: {
3667 if (second.IsFpuRegister()) {
3668 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3669 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3670 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3671 DCHECK(const_area->IsEmittedAtUseSite());
3672 __ addss(first.AsFpuRegister<XmmRegister>(),
3673 codegen_->LiteralFloatAddress(
3674 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3675 const_area->GetBaseMethodAddress(),
3676 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3677 } else {
3678 DCHECK(second.IsStackSlot());
3679 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3680 }
3681 break;
3682 }
3683
3684 case DataType::Type::kFloat64: {
3685 if (second.IsFpuRegister()) {
3686 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3687 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3688 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3689 DCHECK(const_area->IsEmittedAtUseSite());
3690 __ addsd(first.AsFpuRegister<XmmRegister>(),
3691 codegen_->LiteralDoubleAddress(
3692 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3693 const_area->GetBaseMethodAddress(),
3694 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3695 } else {
3696 DCHECK(second.IsDoubleStackSlot());
3697 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3698 }
3699 break;
3700 }
3701
3702 default:
3703 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3704 }
3705 }
3706
VisitSub(HSub * sub)3707 void LocationsBuilderX86::VisitSub(HSub* sub) {
3708 LocationSummary* locations =
3709 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3710 switch (sub->GetResultType()) {
3711 case DataType::Type::kInt32:
3712 case DataType::Type::kInt64: {
3713 locations->SetInAt(0, Location::RequiresRegister());
3714 locations->SetInAt(1, Location::Any());
3715 locations->SetOut(Location::SameAsFirstInput());
3716 break;
3717 }
3718 case DataType::Type::kFloat32:
3719 case DataType::Type::kFloat64: {
3720 locations->SetInAt(0, Location::RequiresFpuRegister());
3721 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3722 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3723 } else if (sub->InputAt(1)->IsConstant()) {
3724 locations->SetInAt(1, Location::RequiresFpuRegister());
3725 } else {
3726 locations->SetInAt(1, Location::Any());
3727 }
3728 locations->SetOut(Location::SameAsFirstInput());
3729 break;
3730 }
3731
3732 default:
3733 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3734 }
3735 }
3736
VisitSub(HSub * sub)3737 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3738 LocationSummary* locations = sub->GetLocations();
3739 Location first = locations->InAt(0);
3740 Location second = locations->InAt(1);
3741 DCHECK(first.Equals(locations->Out()));
3742 switch (sub->GetResultType()) {
3743 case DataType::Type::kInt32: {
3744 if (second.IsRegister()) {
3745 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3746 } else if (second.IsConstant()) {
3747 __ subl(first.AsRegister<Register>(),
3748 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3749 } else {
3750 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3751 }
3752 break;
3753 }
3754
3755 case DataType::Type::kInt64: {
3756 if (second.IsRegisterPair()) {
3757 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3758 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3759 } else if (second.IsDoubleStackSlot()) {
3760 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3761 __ sbbl(first.AsRegisterPairHigh<Register>(),
3762 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3763 } else {
3764 DCHECK(second.IsConstant()) << second;
3765 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3766 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3767 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3768 }
3769 break;
3770 }
3771
3772 case DataType::Type::kFloat32: {
3773 if (second.IsFpuRegister()) {
3774 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3775 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3776 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3777 DCHECK(const_area->IsEmittedAtUseSite());
3778 __ subss(first.AsFpuRegister<XmmRegister>(),
3779 codegen_->LiteralFloatAddress(
3780 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3781 const_area->GetBaseMethodAddress(),
3782 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3783 } else {
3784 DCHECK(second.IsStackSlot());
3785 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3786 }
3787 break;
3788 }
3789
3790 case DataType::Type::kFloat64: {
3791 if (second.IsFpuRegister()) {
3792 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3793 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3794 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3795 DCHECK(const_area->IsEmittedAtUseSite());
3796 __ subsd(first.AsFpuRegister<XmmRegister>(),
3797 codegen_->LiteralDoubleAddress(
3798 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3799 const_area->GetBaseMethodAddress(),
3800 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3801 } else {
3802 DCHECK(second.IsDoubleStackSlot());
3803 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3804 }
3805 break;
3806 }
3807
3808 default:
3809 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3810 }
3811 }
3812
VisitMul(HMul * mul)3813 void LocationsBuilderX86::VisitMul(HMul* mul) {
3814 LocationSummary* locations =
3815 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3816 switch (mul->GetResultType()) {
3817 case DataType::Type::kInt32:
3818 locations->SetInAt(0, Location::RequiresRegister());
3819 locations->SetInAt(1, Location::Any());
3820 if (mul->InputAt(1)->IsIntConstant()) {
3821 // Can use 3 operand multiply.
3822 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3823 } else {
3824 locations->SetOut(Location::SameAsFirstInput());
3825 }
3826 break;
3827 case DataType::Type::kInt64: {
3828 locations->SetInAt(0, Location::RequiresRegister());
3829 locations->SetInAt(1, Location::Any());
3830 locations->SetOut(Location::SameAsFirstInput());
3831 // Needed for imul on 32bits with 64bits output.
3832 locations->AddTemp(Location::RegisterLocation(EAX));
3833 locations->AddTemp(Location::RegisterLocation(EDX));
3834 break;
3835 }
3836 case DataType::Type::kFloat32:
3837 case DataType::Type::kFloat64: {
3838 locations->SetInAt(0, Location::RequiresFpuRegister());
3839 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3840 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3841 } else if (mul->InputAt(1)->IsConstant()) {
3842 locations->SetInAt(1, Location::RequiresFpuRegister());
3843 } else {
3844 locations->SetInAt(1, Location::Any());
3845 }
3846 locations->SetOut(Location::SameAsFirstInput());
3847 break;
3848 }
3849
3850 default:
3851 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3852 }
3853 }
3854
VisitMul(HMul * mul)3855 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3856 LocationSummary* locations = mul->GetLocations();
3857 Location first = locations->InAt(0);
3858 Location second = locations->InAt(1);
3859 Location out = locations->Out();
3860
3861 switch (mul->GetResultType()) {
3862 case DataType::Type::kInt32:
3863 // The constant may have ended up in a register, so test explicitly to avoid
3864 // problems where the output may not be the same as the first operand.
3865 if (mul->InputAt(1)->IsIntConstant()) {
3866 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3867 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3868 } else if (second.IsRegister()) {
3869 DCHECK(first.Equals(out));
3870 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3871 } else {
3872 DCHECK(second.IsStackSlot());
3873 DCHECK(first.Equals(out));
3874 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3875 }
3876 break;
3877
3878 case DataType::Type::kInt64: {
3879 Register in1_hi = first.AsRegisterPairHigh<Register>();
3880 Register in1_lo = first.AsRegisterPairLow<Register>();
3881 Register eax = locations->GetTemp(0).AsRegister<Register>();
3882 Register edx = locations->GetTemp(1).AsRegister<Register>();
3883
3884 DCHECK_EQ(EAX, eax);
3885 DCHECK_EQ(EDX, edx);
3886
3887 // input: in1 - 64 bits, in2 - 64 bits.
3888 // output: in1
3889 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3890 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3891 // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3892 if (second.IsConstant()) {
3893 DCHECK(second.GetConstant()->IsLongConstant());
3894
3895 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3896 int32_t low_value = Low32Bits(value);
3897 int32_t high_value = High32Bits(value);
3898 Immediate low(low_value);
3899 Immediate high(high_value);
3900
3901 __ movl(eax, high);
3902 // eax <- in1.lo * in2.hi
3903 __ imull(eax, in1_lo);
3904 // in1.hi <- in1.hi * in2.lo
3905 __ imull(in1_hi, low);
3906 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3907 __ addl(in1_hi, eax);
3908 // move in2_lo to eax to prepare for double precision
3909 __ movl(eax, low);
3910 // edx:eax <- in1.lo * in2.lo
3911 __ mull(in1_lo);
3912 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3913 __ addl(in1_hi, edx);
3914 // in1.lo <- (in1.lo * in2.lo)[31:0];
3915 __ movl(in1_lo, eax);
3916 } else if (second.IsRegisterPair()) {
3917 Register in2_hi = second.AsRegisterPairHigh<Register>();
3918 Register in2_lo = second.AsRegisterPairLow<Register>();
3919
3920 __ movl(eax, in2_hi);
3921 // eax <- in1.lo * in2.hi
3922 __ imull(eax, in1_lo);
3923 // in1.hi <- in1.hi * in2.lo
3924 __ imull(in1_hi, in2_lo);
3925 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3926 __ addl(in1_hi, eax);
3927 // move in1_lo to eax to prepare for double precision
3928 __ movl(eax, in1_lo);
3929 // edx:eax <- in1.lo * in2.lo
3930 __ mull(in2_lo);
3931 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3932 __ addl(in1_hi, edx);
3933 // in1.lo <- (in1.lo * in2.lo)[31:0];
3934 __ movl(in1_lo, eax);
3935 } else {
3936 DCHECK(second.IsDoubleStackSlot()) << second;
3937 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3938 Address in2_lo(ESP, second.GetStackIndex());
3939
3940 __ movl(eax, in2_hi);
3941 // eax <- in1.lo * in2.hi
3942 __ imull(eax, in1_lo);
3943 // in1.hi <- in1.hi * in2.lo
3944 __ imull(in1_hi, in2_lo);
3945 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3946 __ addl(in1_hi, eax);
3947 // move in1_lo to eax to prepare for double precision
3948 __ movl(eax, in1_lo);
3949 // edx:eax <- in1.lo * in2.lo
3950 __ mull(in2_lo);
3951 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3952 __ addl(in1_hi, edx);
3953 // in1.lo <- (in1.lo * in2.lo)[31:0];
3954 __ movl(in1_lo, eax);
3955 }
3956
3957 break;
3958 }
3959
3960 case DataType::Type::kFloat32: {
3961 DCHECK(first.Equals(locations->Out()));
3962 if (second.IsFpuRegister()) {
3963 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3964 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3965 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3966 DCHECK(const_area->IsEmittedAtUseSite());
3967 __ mulss(first.AsFpuRegister<XmmRegister>(),
3968 codegen_->LiteralFloatAddress(
3969 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3970 const_area->GetBaseMethodAddress(),
3971 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3972 } else {
3973 DCHECK(second.IsStackSlot());
3974 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3975 }
3976 break;
3977 }
3978
3979 case DataType::Type::kFloat64: {
3980 DCHECK(first.Equals(locations->Out()));
3981 if (second.IsFpuRegister()) {
3982 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3983 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3984 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3985 DCHECK(const_area->IsEmittedAtUseSite());
3986 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3987 codegen_->LiteralDoubleAddress(
3988 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3989 const_area->GetBaseMethodAddress(),
3990 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3991 } else {
3992 DCHECK(second.IsDoubleStackSlot());
3993 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3994 }
3995 break;
3996 }
3997
3998 default:
3999 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4000 }
4001 }
4002
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)4003 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
4004 uint32_t temp_offset,
4005 uint32_t stack_adjustment,
4006 bool is_fp,
4007 bool is_wide) {
4008 if (source.IsStackSlot()) {
4009 DCHECK(!is_wide);
4010 if (is_fp) {
4011 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
4012 } else {
4013 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
4014 }
4015 } else if (source.IsDoubleStackSlot()) {
4016 DCHECK(is_wide);
4017 if (is_fp) {
4018 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
4019 } else {
4020 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
4021 }
4022 } else {
4023 // Write the value to the temporary location on the stack and load to FP stack.
4024 if (!is_wide) {
4025 Location stack_temp = Location::StackSlot(temp_offset);
4026 codegen_->Move32(stack_temp, source);
4027 if (is_fp) {
4028 __ flds(Address(ESP, temp_offset));
4029 } else {
4030 __ filds(Address(ESP, temp_offset));
4031 }
4032 } else {
4033 Location stack_temp = Location::DoubleStackSlot(temp_offset);
4034 codegen_->Move64(stack_temp, source);
4035 if (is_fp) {
4036 __ fldl(Address(ESP, temp_offset));
4037 } else {
4038 __ fildl(Address(ESP, temp_offset));
4039 }
4040 }
4041 }
4042 }
4043
GenerateRemFP(HRem * rem)4044 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
4045 DataType::Type type = rem->GetResultType();
4046 bool is_float = type == DataType::Type::kFloat32;
4047 size_t elem_size = DataType::Size(type);
4048 LocationSummary* locations = rem->GetLocations();
4049 Location first = locations->InAt(0);
4050 Location second = locations->InAt(1);
4051 Location out = locations->Out();
4052
4053 // Create stack space for 2 elements.
4054 // TODO: enhance register allocator to ask for stack temporaries.
4055 codegen_->IncreaseFrame(2 * elem_size);
4056
4057 // Load the values to the FP stack in reverse order, using temporaries if needed.
4058 const bool is_wide = !is_float;
4059 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
4060 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
4061
4062 // Loop doing FPREM until we stabilize.
4063 NearLabel retry;
4064 __ Bind(&retry);
4065 __ fprem();
4066
4067 // Move FP status to AX.
4068 __ fstsw();
4069
4070 // And see if the argument reduction is complete. This is signaled by the
4071 // C2 FPU flag bit set to 0.
4072 __ andl(EAX, Immediate(kC2ConditionMask));
4073 __ j(kNotEqual, &retry);
4074
4075 // We have settled on the final value. Retrieve it into an XMM register.
4076 // Store FP top of stack to real stack.
4077 if (is_float) {
4078 __ fsts(Address(ESP, 0));
4079 } else {
4080 __ fstl(Address(ESP, 0));
4081 }
4082
4083 // Pop the 2 items from the FP stack.
4084 __ fucompp();
4085
4086 // Load the value from the stack into an XMM register.
4087 DCHECK(out.IsFpuRegister()) << out;
4088 if (is_float) {
4089 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
4090 } else {
4091 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
4092 }
4093
4094 // And remove the temporary stack space we allocated.
4095 codegen_->DecreaseFrame(2 * elem_size);
4096 }
4097
4098
DivRemOneOrMinusOne(HBinaryOperation * instruction)4099 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4100 DCHECK(instruction->IsDiv() || instruction->IsRem());
4101
4102 LocationSummary* locations = instruction->GetLocations();
4103 DCHECK(locations->InAt(1).IsConstant());
4104 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
4105
4106 Register out_register = locations->Out().AsRegister<Register>();
4107 Register input_register = locations->InAt(0).AsRegister<Register>();
4108 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4109
4110 DCHECK(imm == 1 || imm == -1);
4111
4112 if (instruction->IsRem()) {
4113 __ xorl(out_register, out_register);
4114 } else {
4115 __ movl(out_register, input_register);
4116 if (imm == -1) {
4117 __ negl(out_register);
4118 }
4119 }
4120 }
4121
RemByPowerOfTwo(HRem * instruction)4122 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
4123 LocationSummary* locations = instruction->GetLocations();
4124 Location second = locations->InAt(1);
4125
4126 Register out = locations->Out().AsRegister<Register>();
4127 Register numerator = locations->InAt(0).AsRegister<Register>();
4128
4129 int32_t imm = Int64FromConstant(second.GetConstant());
4130 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4131 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4132
4133 Register tmp = locations->GetTemp(0).AsRegister<Register>();
4134 NearLabel done;
4135 __ movl(out, numerator);
4136 __ andl(out, Immediate(abs_imm-1));
4137 __ j(Condition::kZero, &done);
4138 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4139 __ testl(numerator, numerator);
4140 __ cmovl(Condition::kLess, out, tmp);
4141 __ Bind(&done);
4142 }
4143
DivByPowerOfTwo(HDiv * instruction)4144 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
4145 LocationSummary* locations = instruction->GetLocations();
4146
4147 Register out_register = locations->Out().AsRegister<Register>();
4148 Register input_register = locations->InAt(0).AsRegister<Register>();
4149 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4150 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4151 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4152
4153 Register num = locations->GetTemp(0).AsRegister<Register>();
4154
4155 __ leal(num, Address(input_register, abs_imm - 1));
4156 __ testl(input_register, input_register);
4157 __ cmovl(kGreaterEqual, num, input_register);
4158 int shift = CTZ(imm);
4159 __ sarl(num, Immediate(shift));
4160
4161 if (imm < 0) {
4162 __ negl(num);
4163 }
4164
4165 __ movl(out_register, num);
4166 }
4167
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4168 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4169 DCHECK(instruction->IsDiv() || instruction->IsRem());
4170
4171 LocationSummary* locations = instruction->GetLocations();
4172 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4173
4174 Register eax = locations->InAt(0).AsRegister<Register>();
4175 Register out = locations->Out().AsRegister<Register>();
4176 Register num;
4177 Register edx;
4178
4179 if (instruction->IsDiv()) {
4180 edx = locations->GetTemp(0).AsRegister<Register>();
4181 num = locations->GetTemp(1).AsRegister<Register>();
4182 } else {
4183 edx = locations->Out().AsRegister<Register>();
4184 num = locations->GetTemp(0).AsRegister<Register>();
4185 }
4186
4187 DCHECK_EQ(EAX, eax);
4188 DCHECK_EQ(EDX, edx);
4189 if (instruction->IsDiv()) {
4190 DCHECK_EQ(EAX, out);
4191 } else {
4192 DCHECK_EQ(EDX, out);
4193 }
4194
4195 int64_t magic;
4196 int shift;
4197 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4198
4199 // Save the numerator.
4200 __ movl(num, eax);
4201
4202 // EAX = magic
4203 __ movl(eax, Immediate(magic));
4204
4205 // EDX:EAX = magic * numerator
4206 __ imull(num);
4207
4208 if (imm > 0 && magic < 0) {
4209 // EDX += num
4210 __ addl(edx, num);
4211 } else if (imm < 0 && magic > 0) {
4212 __ subl(edx, num);
4213 }
4214
4215 // Shift if needed.
4216 if (shift != 0) {
4217 __ sarl(edx, Immediate(shift));
4218 }
4219
4220 // EDX += 1 if EDX < 0
4221 __ movl(eax, edx);
4222 __ shrl(edx, Immediate(31));
4223 __ addl(edx, eax);
4224
4225 if (instruction->IsRem()) {
4226 __ movl(eax, num);
4227 __ imull(edx, Immediate(imm));
4228 __ subl(eax, edx);
4229 __ movl(edx, eax);
4230 } else {
4231 __ movl(eax, edx);
4232 }
4233 }
4234
GenerateDivRemIntegral(HBinaryOperation * instruction)4235 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4236 DCHECK(instruction->IsDiv() || instruction->IsRem());
4237
4238 LocationSummary* locations = instruction->GetLocations();
4239 Location out = locations->Out();
4240 Location first = locations->InAt(0);
4241 Location second = locations->InAt(1);
4242 bool is_div = instruction->IsDiv();
4243
4244 switch (instruction->GetResultType()) {
4245 case DataType::Type::kInt32: {
4246 DCHECK_EQ(EAX, first.AsRegister<Register>());
4247 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
4248
4249 if (second.IsConstant()) {
4250 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
4251
4252 if (imm == 0) {
4253 // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
4254 } else if (imm == 1 || imm == -1) {
4255 DivRemOneOrMinusOne(instruction);
4256 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4257 if (is_div) {
4258 DivByPowerOfTwo(instruction->AsDiv());
4259 } else {
4260 RemByPowerOfTwo(instruction->AsRem());
4261 }
4262 } else {
4263 DCHECK(imm <= -2 || imm >= 2);
4264 GenerateDivRemWithAnyConstant(instruction);
4265 }
4266 } else {
4267 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
4268 instruction, out.AsRegister<Register>(), is_div);
4269 codegen_->AddSlowPath(slow_path);
4270
4271 Register second_reg = second.AsRegister<Register>();
4272 // 0x80000000/-1 triggers an arithmetic exception!
4273 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
4274 // it's safe to just use negl instead of more complex comparisons.
4275
4276 __ cmpl(second_reg, Immediate(-1));
4277 __ j(kEqual, slow_path->GetEntryLabel());
4278
4279 // edx:eax <- sign-extended of eax
4280 __ cdq();
4281 // eax = quotient, edx = remainder
4282 __ idivl(second_reg);
4283 __ Bind(slow_path->GetExitLabel());
4284 }
4285 break;
4286 }
4287
4288 case DataType::Type::kInt64: {
4289 InvokeRuntimeCallingConvention calling_convention;
4290 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
4291 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
4292 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
4293 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
4294 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
4295 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
4296
4297 if (is_div) {
4298 codegen_->InvokeRuntime(kQuickLdiv, instruction);
4299 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4300 } else {
4301 codegen_->InvokeRuntime(kQuickLmod, instruction);
4302 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4303 }
4304 break;
4305 }
4306
4307 default:
4308 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
4309 }
4310 }
4311
VisitDiv(HDiv * div)4312 void LocationsBuilderX86::VisitDiv(HDiv* div) {
4313 LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
4314 ? LocationSummary::kCallOnMainOnly
4315 : LocationSummary::kNoCall;
4316 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4317
4318 switch (div->GetResultType()) {
4319 case DataType::Type::kInt32: {
4320 locations->SetInAt(0, Location::RegisterLocation(EAX));
4321 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4322 locations->SetOut(Location::SameAsFirstInput());
4323 // Intel uses edx:eax as the dividend.
4324 locations->AddTemp(Location::RegisterLocation(EDX));
4325 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4326 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
4327 // output and request another temp.
4328 if (div->InputAt(1)->IsIntConstant()) {
4329 locations->AddTemp(Location::RequiresRegister());
4330 }
4331 break;
4332 }
4333 case DataType::Type::kInt64: {
4334 InvokeRuntimeCallingConvention calling_convention;
4335 locations->SetInAt(0, Location::RegisterPairLocation(
4336 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4337 locations->SetInAt(1, Location::RegisterPairLocation(
4338 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4339 // Runtime helper puts the result in EAX, EDX.
4340 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4341 break;
4342 }
4343 case DataType::Type::kFloat32:
4344 case DataType::Type::kFloat64: {
4345 locations->SetInAt(0, Location::RequiresFpuRegister());
4346 if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4347 DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
4348 } else if (div->InputAt(1)->IsConstant()) {
4349 locations->SetInAt(1, Location::RequiresFpuRegister());
4350 } else {
4351 locations->SetInAt(1, Location::Any());
4352 }
4353 locations->SetOut(Location::SameAsFirstInput());
4354 break;
4355 }
4356
4357 default:
4358 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4359 }
4360 }
4361
VisitDiv(HDiv * div)4362 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
4363 LocationSummary* locations = div->GetLocations();
4364 Location first = locations->InAt(0);
4365 Location second = locations->InAt(1);
4366
4367 switch (div->GetResultType()) {
4368 case DataType::Type::kInt32:
4369 case DataType::Type::kInt64: {
4370 GenerateDivRemIntegral(div);
4371 break;
4372 }
4373
4374 case DataType::Type::kFloat32: {
4375 if (second.IsFpuRegister()) {
4376 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4377 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4378 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4379 DCHECK(const_area->IsEmittedAtUseSite());
4380 __ divss(first.AsFpuRegister<XmmRegister>(),
4381 codegen_->LiteralFloatAddress(
4382 const_area->GetConstant()->AsFloatConstant()->GetValue(),
4383 const_area->GetBaseMethodAddress(),
4384 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4385 } else {
4386 DCHECK(second.IsStackSlot());
4387 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4388 }
4389 break;
4390 }
4391
4392 case DataType::Type::kFloat64: {
4393 if (second.IsFpuRegister()) {
4394 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4395 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4396 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4397 DCHECK(const_area->IsEmittedAtUseSite());
4398 __ divsd(first.AsFpuRegister<XmmRegister>(),
4399 codegen_->LiteralDoubleAddress(
4400 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
4401 const_area->GetBaseMethodAddress(),
4402 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4403 } else {
4404 DCHECK(second.IsDoubleStackSlot());
4405 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4406 }
4407 break;
4408 }
4409
4410 default:
4411 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4412 }
4413 }
4414
VisitRem(HRem * rem)4415 void LocationsBuilderX86::VisitRem(HRem* rem) {
4416 DataType::Type type = rem->GetResultType();
4417
4418 LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
4419 ? LocationSummary::kCallOnMainOnly
4420 : LocationSummary::kNoCall;
4421 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4422
4423 switch (type) {
4424 case DataType::Type::kInt32: {
4425 locations->SetInAt(0, Location::RegisterLocation(EAX));
4426 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4427 locations->SetOut(Location::RegisterLocation(EDX));
4428 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4429 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
4430 // output and request another temp.
4431 if (rem->InputAt(1)->IsIntConstant()) {
4432 locations->AddTemp(Location::RequiresRegister());
4433 }
4434 break;
4435 }
4436 case DataType::Type::kInt64: {
4437 InvokeRuntimeCallingConvention calling_convention;
4438 locations->SetInAt(0, Location::RegisterPairLocation(
4439 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4440 locations->SetInAt(1, Location::RegisterPairLocation(
4441 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4442 // Runtime helper puts the result in EAX, EDX.
4443 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4444 break;
4445 }
4446 case DataType::Type::kFloat64:
4447 case DataType::Type::kFloat32: {
4448 locations->SetInAt(0, Location::Any());
4449 locations->SetInAt(1, Location::Any());
4450 locations->SetOut(Location::RequiresFpuRegister());
4451 locations->AddTemp(Location::RegisterLocation(EAX));
4452 break;
4453 }
4454
4455 default:
4456 LOG(FATAL) << "Unexpected rem type " << type;
4457 }
4458 }
4459
VisitRem(HRem * rem)4460 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
4461 DataType::Type type = rem->GetResultType();
4462 switch (type) {
4463 case DataType::Type::kInt32:
4464 case DataType::Type::kInt64: {
4465 GenerateDivRemIntegral(rem);
4466 break;
4467 }
4468 case DataType::Type::kFloat32:
4469 case DataType::Type::kFloat64: {
4470 GenerateRemFP(rem);
4471 break;
4472 }
4473 default:
4474 LOG(FATAL) << "Unexpected rem type " << type;
4475 }
4476 }
4477
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4478 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4479 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4480 switch (minmax->GetResultType()) {
4481 case DataType::Type::kInt32:
4482 locations->SetInAt(0, Location::RequiresRegister());
4483 locations->SetInAt(1, Location::RequiresRegister());
4484 locations->SetOut(Location::SameAsFirstInput());
4485 break;
4486 case DataType::Type::kInt64:
4487 locations->SetInAt(0, Location::RequiresRegister());
4488 locations->SetInAt(1, Location::RequiresRegister());
4489 locations->SetOut(Location::SameAsFirstInput());
4490 // Register to use to perform a long subtract to set cc.
4491 locations->AddTemp(Location::RequiresRegister());
4492 break;
4493 case DataType::Type::kFloat32:
4494 locations->SetInAt(0, Location::RequiresFpuRegister());
4495 locations->SetInAt(1, Location::RequiresFpuRegister());
4496 locations->SetOut(Location::SameAsFirstInput());
4497 locations->AddTemp(Location::RequiresRegister());
4498 break;
4499 case DataType::Type::kFloat64:
4500 locations->SetInAt(0, Location::RequiresFpuRegister());
4501 locations->SetInAt(1, Location::RequiresFpuRegister());
4502 locations->SetOut(Location::SameAsFirstInput());
4503 break;
4504 default:
4505 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4506 }
4507 }
4508
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4509 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4510 bool is_min,
4511 DataType::Type type) {
4512 Location op1_loc = locations->InAt(0);
4513 Location op2_loc = locations->InAt(1);
4514
4515 // Shortcut for same input locations.
4516 if (op1_loc.Equals(op2_loc)) {
4517 // Can return immediately, as op1_loc == out_loc.
4518 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4519 // a copy here.
4520 DCHECK(locations->Out().Equals(op1_loc));
4521 return;
4522 }
4523
4524 if (type == DataType::Type::kInt64) {
4525 // Need to perform a subtract to get the sign right.
4526 // op1 is already in the same location as the output.
4527 Location output = locations->Out();
4528 Register output_lo = output.AsRegisterPairLow<Register>();
4529 Register output_hi = output.AsRegisterPairHigh<Register>();
4530
4531 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4532 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4533
4534 // The comparison is performed by subtracting the second operand from
4535 // the first operand and then setting the status flags in the same
4536 // manner as the SUB instruction."
4537 __ cmpl(output_lo, op2_lo);
4538
4539 // Now use a temp and the borrow to finish the subtraction of op2_hi.
4540 Register temp = locations->GetTemp(0).AsRegister<Register>();
4541 __ movl(temp, output_hi);
4542 __ sbbl(temp, op2_hi);
4543
4544 // Now the condition code is correct.
4545 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4546 __ cmovl(cond, output_lo, op2_lo);
4547 __ cmovl(cond, output_hi, op2_hi);
4548 } else {
4549 DCHECK_EQ(type, DataType::Type::kInt32);
4550 Register out = locations->Out().AsRegister<Register>();
4551 Register op2 = op2_loc.AsRegister<Register>();
4552
4553 // (out := op1)
4554 // out <=? op2
4555 // if out is min jmp done
4556 // out := op2
4557 // done:
4558
4559 __ cmpl(out, op2);
4560 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4561 __ cmovl(cond, out, op2);
4562 }
4563 }
4564
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4565 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4566 bool is_min,
4567 DataType::Type type) {
4568 Location op1_loc = locations->InAt(0);
4569 Location op2_loc = locations->InAt(1);
4570 Location out_loc = locations->Out();
4571 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4572
4573 // Shortcut for same input locations.
4574 if (op1_loc.Equals(op2_loc)) {
4575 DCHECK(out_loc.Equals(op1_loc));
4576 return;
4577 }
4578
4579 // (out := op1)
4580 // out <=? op2
4581 // if Nan jmp Nan_label
4582 // if out is min jmp done
4583 // if op2 is min jmp op2_label
4584 // handle -0/+0
4585 // jmp done
4586 // Nan_label:
4587 // out := NaN
4588 // op2_label:
4589 // out := op2
4590 // done:
4591 //
4592 // This removes one jmp, but needs to copy one input (op1) to out.
4593 //
4594 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4595
4596 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4597
4598 NearLabel nan, done, op2_label;
4599 if (type == DataType::Type::kFloat64) {
4600 __ ucomisd(out, op2);
4601 } else {
4602 DCHECK_EQ(type, DataType::Type::kFloat32);
4603 __ ucomiss(out, op2);
4604 }
4605
4606 __ j(Condition::kParityEven, &nan);
4607
4608 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4609 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4610
4611 // Handle 0.0/-0.0.
4612 if (is_min) {
4613 if (type == DataType::Type::kFloat64) {
4614 __ orpd(out, op2);
4615 } else {
4616 __ orps(out, op2);
4617 }
4618 } else {
4619 if (type == DataType::Type::kFloat64) {
4620 __ andpd(out, op2);
4621 } else {
4622 __ andps(out, op2);
4623 }
4624 }
4625 __ jmp(&done);
4626
4627 // NaN handling.
4628 __ Bind(&nan);
4629 if (type == DataType::Type::kFloat64) {
4630 // TODO: Use a constant from the constant table (requires extra input).
4631 __ LoadLongConstant(out, kDoubleNaN);
4632 } else {
4633 Register constant = locations->GetTemp(0).AsRegister<Register>();
4634 __ movl(constant, Immediate(kFloatNaN));
4635 __ movd(out, constant);
4636 }
4637 __ jmp(&done);
4638
4639 // out := op2;
4640 __ Bind(&op2_label);
4641 if (type == DataType::Type::kFloat64) {
4642 __ movsd(out, op2);
4643 } else {
4644 __ movss(out, op2);
4645 }
4646
4647 // Done.
4648 __ Bind(&done);
4649 }
4650
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4651 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4652 DataType::Type type = minmax->GetResultType();
4653 switch (type) {
4654 case DataType::Type::kInt32:
4655 case DataType::Type::kInt64:
4656 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4657 break;
4658 case DataType::Type::kFloat32:
4659 case DataType::Type::kFloat64:
4660 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4661 break;
4662 default:
4663 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4664 }
4665 }
4666
VisitMin(HMin * min)4667 void LocationsBuilderX86::VisitMin(HMin* min) {
4668 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4669 }
4670
VisitMin(HMin * min)4671 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4672 GenerateMinMax(min, /*is_min*/ true);
4673 }
4674
VisitMax(HMax * max)4675 void LocationsBuilderX86::VisitMax(HMax* max) {
4676 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4677 }
4678
VisitMax(HMax * max)4679 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4680 GenerateMinMax(max, /*is_min*/ false);
4681 }
4682
VisitAbs(HAbs * abs)4683 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4684 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4685 switch (abs->GetResultType()) {
4686 case DataType::Type::kInt32:
4687 locations->SetInAt(0, Location::RegisterLocation(EAX));
4688 locations->SetOut(Location::SameAsFirstInput());
4689 locations->AddTemp(Location::RegisterLocation(EDX));
4690 break;
4691 case DataType::Type::kInt64:
4692 locations->SetInAt(0, Location::RequiresRegister());
4693 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4694 locations->AddTemp(Location::RequiresRegister());
4695 break;
4696 case DataType::Type::kFloat32:
4697 locations->SetInAt(0, Location::RequiresFpuRegister());
4698 locations->SetOut(Location::SameAsFirstInput());
4699 locations->AddTemp(Location::RequiresFpuRegister());
4700 locations->AddTemp(Location::RequiresRegister());
4701 break;
4702 case DataType::Type::kFloat64:
4703 locations->SetInAt(0, Location::RequiresFpuRegister());
4704 locations->SetOut(Location::SameAsFirstInput());
4705 locations->AddTemp(Location::RequiresFpuRegister());
4706 break;
4707 default:
4708 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4709 }
4710 }
4711
VisitAbs(HAbs * abs)4712 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4713 LocationSummary* locations = abs->GetLocations();
4714 switch (abs->GetResultType()) {
4715 case DataType::Type::kInt32: {
4716 Register out = locations->Out().AsRegister<Register>();
4717 DCHECK_EQ(out, EAX);
4718 Register temp = locations->GetTemp(0).AsRegister<Register>();
4719 DCHECK_EQ(temp, EDX);
4720 // Sign extend EAX into EDX.
4721 __ cdq();
4722 // XOR EAX with sign.
4723 __ xorl(EAX, EDX);
4724 // Subtract out sign to correct.
4725 __ subl(EAX, EDX);
4726 // The result is in EAX.
4727 break;
4728 }
4729 case DataType::Type::kInt64: {
4730 Location input = locations->InAt(0);
4731 Register input_lo = input.AsRegisterPairLow<Register>();
4732 Register input_hi = input.AsRegisterPairHigh<Register>();
4733 Location output = locations->Out();
4734 Register output_lo = output.AsRegisterPairLow<Register>();
4735 Register output_hi = output.AsRegisterPairHigh<Register>();
4736 Register temp = locations->GetTemp(0).AsRegister<Register>();
4737 // Compute the sign into the temporary.
4738 __ movl(temp, input_hi);
4739 __ sarl(temp, Immediate(31));
4740 // Store the sign into the output.
4741 __ movl(output_lo, temp);
4742 __ movl(output_hi, temp);
4743 // XOR the input to the output.
4744 __ xorl(output_lo, input_lo);
4745 __ xorl(output_hi, input_hi);
4746 // Subtract the sign.
4747 __ subl(output_lo, temp);
4748 __ sbbl(output_hi, temp);
4749 break;
4750 }
4751 case DataType::Type::kFloat32: {
4752 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4753 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4754 Register constant = locations->GetTemp(1).AsRegister<Register>();
4755 __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4756 __ movd(temp, constant);
4757 __ andps(out, temp);
4758 break;
4759 }
4760 case DataType::Type::kFloat64: {
4761 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4762 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4763 // TODO: Use a constant from the constant table (requires extra input).
4764 __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4765 __ andpd(out, temp);
4766 break;
4767 }
4768 default:
4769 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4770 }
4771 }
4772
VisitDivZeroCheck(HDivZeroCheck * instruction)4773 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4774 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4775 switch (instruction->GetType()) {
4776 case DataType::Type::kBool:
4777 case DataType::Type::kUint8:
4778 case DataType::Type::kInt8:
4779 case DataType::Type::kUint16:
4780 case DataType::Type::kInt16:
4781 case DataType::Type::kInt32: {
4782 locations->SetInAt(0, Location::Any());
4783 break;
4784 }
4785 case DataType::Type::kInt64: {
4786 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4787 if (!instruction->IsConstant()) {
4788 locations->AddTemp(Location::RequiresRegister());
4789 }
4790 break;
4791 }
4792 default:
4793 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4794 }
4795 }
4796
VisitDivZeroCheck(HDivZeroCheck * instruction)4797 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4798 SlowPathCode* slow_path =
4799 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4800 codegen_->AddSlowPath(slow_path);
4801
4802 LocationSummary* locations = instruction->GetLocations();
4803 Location value = locations->InAt(0);
4804
4805 switch (instruction->GetType()) {
4806 case DataType::Type::kBool:
4807 case DataType::Type::kUint8:
4808 case DataType::Type::kInt8:
4809 case DataType::Type::kUint16:
4810 case DataType::Type::kInt16:
4811 case DataType::Type::kInt32: {
4812 if (value.IsRegister()) {
4813 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4814 __ j(kEqual, slow_path->GetEntryLabel());
4815 } else if (value.IsStackSlot()) {
4816 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4817 __ j(kEqual, slow_path->GetEntryLabel());
4818 } else {
4819 DCHECK(value.IsConstant()) << value;
4820 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4821 __ jmp(slow_path->GetEntryLabel());
4822 }
4823 }
4824 break;
4825 }
4826 case DataType::Type::kInt64: {
4827 if (value.IsRegisterPair()) {
4828 Register temp = locations->GetTemp(0).AsRegister<Register>();
4829 __ movl(temp, value.AsRegisterPairLow<Register>());
4830 __ orl(temp, value.AsRegisterPairHigh<Register>());
4831 __ j(kEqual, slow_path->GetEntryLabel());
4832 } else {
4833 DCHECK(value.IsConstant()) << value;
4834 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4835 __ jmp(slow_path->GetEntryLabel());
4836 }
4837 }
4838 break;
4839 }
4840 default:
4841 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4842 }
4843 }
4844
HandleShift(HBinaryOperation * op)4845 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4846 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4847
4848 LocationSummary* locations =
4849 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4850
4851 switch (op->GetResultType()) {
4852 case DataType::Type::kInt32:
4853 case DataType::Type::kInt64: {
4854 // Can't have Location::Any() and output SameAsFirstInput()
4855 locations->SetInAt(0, Location::RequiresRegister());
4856 // The shift count needs to be in CL or a constant.
4857 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4858 locations->SetOut(Location::SameAsFirstInput());
4859 break;
4860 }
4861 default:
4862 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4863 }
4864 }
4865
HandleShift(HBinaryOperation * op)4866 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4867 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4868
4869 LocationSummary* locations = op->GetLocations();
4870 Location first = locations->InAt(0);
4871 Location second = locations->InAt(1);
4872 DCHECK(first.Equals(locations->Out()));
4873
4874 switch (op->GetResultType()) {
4875 case DataType::Type::kInt32: {
4876 DCHECK(first.IsRegister());
4877 Register first_reg = first.AsRegister<Register>();
4878 if (second.IsRegister()) {
4879 Register second_reg = second.AsRegister<Register>();
4880 DCHECK_EQ(ECX, second_reg);
4881 if (op->IsShl()) {
4882 __ shll(first_reg, second_reg);
4883 } else if (op->IsShr()) {
4884 __ sarl(first_reg, second_reg);
4885 } else {
4886 __ shrl(first_reg, second_reg);
4887 }
4888 } else {
4889 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4890 if (shift == 0) {
4891 return;
4892 }
4893 Immediate imm(shift);
4894 if (op->IsShl()) {
4895 __ shll(first_reg, imm);
4896 } else if (op->IsShr()) {
4897 __ sarl(first_reg, imm);
4898 } else {
4899 __ shrl(first_reg, imm);
4900 }
4901 }
4902 break;
4903 }
4904 case DataType::Type::kInt64: {
4905 if (second.IsRegister()) {
4906 Register second_reg = second.AsRegister<Register>();
4907 DCHECK_EQ(ECX, second_reg);
4908 if (op->IsShl()) {
4909 GenerateShlLong(first, second_reg);
4910 } else if (op->IsShr()) {
4911 GenerateShrLong(first, second_reg);
4912 } else {
4913 GenerateUShrLong(first, second_reg);
4914 }
4915 } else {
4916 // Shift by a constant.
4917 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4918 // Nothing to do if the shift is 0, as the input is already the output.
4919 if (shift != 0) {
4920 if (op->IsShl()) {
4921 GenerateShlLong(first, shift);
4922 } else if (op->IsShr()) {
4923 GenerateShrLong(first, shift);
4924 } else {
4925 GenerateUShrLong(first, shift);
4926 }
4927 }
4928 }
4929 break;
4930 }
4931 default:
4932 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4933 }
4934 }
4935
GenerateShlLong(const Location & loc,int shift)4936 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4937 Register low = loc.AsRegisterPairLow<Register>();
4938 Register high = loc.AsRegisterPairHigh<Register>();
4939 if (shift == 1) {
4940 // This is just an addition.
4941 __ addl(low, low);
4942 __ adcl(high, high);
4943 } else if (shift == 32) {
4944 // Shift by 32 is easy. High gets low, and low gets 0.
4945 codegen_->EmitParallelMoves(
4946 loc.ToLow(),
4947 loc.ToHigh(),
4948 DataType::Type::kInt32,
4949 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4950 loc.ToLow(),
4951 DataType::Type::kInt32);
4952 } else if (shift > 32) {
4953 // Low part becomes 0. High part is low part << (shift-32).
4954 __ movl(high, low);
4955 __ shll(high, Immediate(shift - 32));
4956 __ xorl(low, low);
4957 } else {
4958 // Between 1 and 31.
4959 __ shld(high, low, Immediate(shift));
4960 __ shll(low, Immediate(shift));
4961 }
4962 }
4963
GenerateShlLong(const Location & loc,Register shifter)4964 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4965 NearLabel done;
4966 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4967 __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4968 __ testl(shifter, Immediate(32));
4969 __ j(kEqual, &done);
4970 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4971 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4972 __ Bind(&done);
4973 }
4974
GenerateShrLong(const Location & loc,int shift)4975 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4976 Register low = loc.AsRegisterPairLow<Register>();
4977 Register high = loc.AsRegisterPairHigh<Register>();
4978 if (shift == 32) {
4979 // Need to copy the sign.
4980 DCHECK_NE(low, high);
4981 __ movl(low, high);
4982 __ sarl(high, Immediate(31));
4983 } else if (shift > 32) {
4984 DCHECK_NE(low, high);
4985 // High part becomes sign. Low part is shifted by shift - 32.
4986 __ movl(low, high);
4987 __ sarl(high, Immediate(31));
4988 __ sarl(low, Immediate(shift - 32));
4989 } else {
4990 // Between 1 and 31.
4991 __ shrd(low, high, Immediate(shift));
4992 __ sarl(high, Immediate(shift));
4993 }
4994 }
4995
GenerateShrLong(const Location & loc,Register shifter)4996 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4997 NearLabel done;
4998 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4999 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
5000 __ testl(shifter, Immediate(32));
5001 __ j(kEqual, &done);
5002 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
5003 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
5004 __ Bind(&done);
5005 }
5006
GenerateUShrLong(const Location & loc,int shift)5007 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
5008 Register low = loc.AsRegisterPairLow<Register>();
5009 Register high = loc.AsRegisterPairHigh<Register>();
5010 if (shift == 32) {
5011 // Shift by 32 is easy. Low gets high, and high gets 0.
5012 codegen_->EmitParallelMoves(
5013 loc.ToHigh(),
5014 loc.ToLow(),
5015 DataType::Type::kInt32,
5016 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
5017 loc.ToHigh(),
5018 DataType::Type::kInt32);
5019 } else if (shift > 32) {
5020 // Low part is high >> (shift - 32). High part becomes 0.
5021 __ movl(low, high);
5022 __ shrl(low, Immediate(shift - 32));
5023 __ xorl(high, high);
5024 } else {
5025 // Between 1 and 31.
5026 __ shrd(low, high, Immediate(shift));
5027 __ shrl(high, Immediate(shift));
5028 }
5029 }
5030
GenerateUShrLong(const Location & loc,Register shifter)5031 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
5032 NearLabel done;
5033 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
5034 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
5035 __ testl(shifter, Immediate(32));
5036 __ j(kEqual, &done);
5037 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
5038 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
5039 __ Bind(&done);
5040 }
5041
VisitRol(HRol * rol)5042 void LocationsBuilderX86::VisitRol(HRol* rol) {
5043 HandleRotate(rol);
5044 }
5045
VisitRor(HRor * ror)5046 void LocationsBuilderX86::VisitRor(HRor* ror) {
5047 HandleRotate(ror);
5048 }
5049
HandleRotate(HBinaryOperation * rotate)5050 void LocationsBuilderX86::HandleRotate(HBinaryOperation* rotate) {
5051 LocationSummary* locations =
5052 new (GetGraph()->GetAllocator()) LocationSummary(rotate, LocationSummary::kNoCall);
5053
5054 switch (rotate->GetResultType()) {
5055 case DataType::Type::kInt64:
5056 // Add the temporary needed.
5057 locations->AddTemp(Location::RequiresRegister());
5058 FALLTHROUGH_INTENDED;
5059 case DataType::Type::kInt32:
5060 locations->SetInAt(0, Location::RequiresRegister());
5061 // The shift count needs to be in CL (unless it is a constant).
5062 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, rotate->InputAt(1)));
5063 locations->SetOut(Location::SameAsFirstInput());
5064 break;
5065 default:
5066 LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5067 UNREACHABLE();
5068 }
5069 }
5070
VisitRol(HRol * rol)5071 void InstructionCodeGeneratorX86::VisitRol(HRol* rol) {
5072 HandleRotate(rol);
5073 }
5074
VisitRor(HRor * ror)5075 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
5076 HandleRotate(ror);
5077 }
5078
HandleRotate(HBinaryOperation * rotate)5079 void InstructionCodeGeneratorX86::HandleRotate(HBinaryOperation* rotate) {
5080 LocationSummary* locations = rotate->GetLocations();
5081 Location first = locations->InAt(0);
5082 Location second = locations->InAt(1);
5083
5084 if (rotate->GetResultType() == DataType::Type::kInt32) {
5085 Register first_reg = first.AsRegister<Register>();
5086 if (second.IsRegister()) {
5087 Register second_reg = second.AsRegister<Register>();
5088 if (rotate->IsRol()) {
5089 __ roll(first_reg, second_reg);
5090 } else {
5091 DCHECK(rotate->IsRor());
5092 __ rorl(first_reg, second_reg);
5093 }
5094 } else {
5095 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5096 if (rotate->IsRol()) {
5097 __ roll(first_reg, imm);
5098 } else {
5099 DCHECK(rotate->IsRor());
5100 __ rorl(first_reg, imm);
5101 }
5102 }
5103 return;
5104 }
5105
5106 DCHECK_EQ(rotate->GetResultType(), DataType::Type::kInt64);
5107 Register first_reg_lo = first.AsRegisterPairLow<Register>();
5108 Register first_reg_hi = first.AsRegisterPairHigh<Register>();
5109 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
5110 if (second.IsRegister()) {
5111 Register second_reg = second.AsRegister<Register>();
5112 DCHECK_EQ(second_reg, ECX);
5113
5114 __ movl(temp_reg, first_reg_hi);
5115 if (rotate->IsRol()) {
5116 __ shld(first_reg_hi, first_reg_lo, second_reg);
5117 __ shld(first_reg_lo, temp_reg, second_reg);
5118 } else {
5119 __ shrd(first_reg_hi, first_reg_lo, second_reg);
5120 __ shrd(first_reg_lo, temp_reg, second_reg);
5121 }
5122 __ movl(temp_reg, first_reg_hi);
5123 __ testl(second_reg, Immediate(32));
5124 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
5125 __ cmovl(kNotEqual, first_reg_lo, temp_reg);
5126 } else {
5127 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
5128 if (rotate->IsRol()) {
5129 value = -value;
5130 }
5131 int32_t shift_amt = value & kMaxLongShiftDistance;
5132
5133 if (shift_amt == 0) {
5134 // Already fine.
5135 return;
5136 }
5137 if (shift_amt == 32) {
5138 // Just swap.
5139 __ movl(temp_reg, first_reg_lo);
5140 __ movl(first_reg_lo, first_reg_hi);
5141 __ movl(first_reg_hi, temp_reg);
5142 return;
5143 }
5144
5145 Immediate imm(shift_amt);
5146 // Save the constents of the low value.
5147 __ movl(temp_reg, first_reg_lo);
5148
5149 // Shift right into low, feeding bits from high.
5150 __ shrd(first_reg_lo, first_reg_hi, imm);
5151
5152 // Shift right into high, feeding bits from the original low.
5153 __ shrd(first_reg_hi, temp_reg, imm);
5154
5155 // Swap if needed.
5156 if (shift_amt > 32) {
5157 __ movl(temp_reg, first_reg_lo);
5158 __ movl(first_reg_lo, first_reg_hi);
5159 __ movl(first_reg_hi, temp_reg);
5160 }
5161 }
5162 }
5163
VisitShl(HShl * shl)5164 void LocationsBuilderX86::VisitShl(HShl* shl) {
5165 HandleShift(shl);
5166 }
5167
VisitShl(HShl * shl)5168 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
5169 HandleShift(shl);
5170 }
5171
VisitShr(HShr * shr)5172 void LocationsBuilderX86::VisitShr(HShr* shr) {
5173 HandleShift(shr);
5174 }
5175
VisitShr(HShr * shr)5176 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
5177 HandleShift(shr);
5178 }
5179
VisitUShr(HUShr * ushr)5180 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
5181 HandleShift(ushr);
5182 }
5183
VisitUShr(HUShr * ushr)5184 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
5185 HandleShift(ushr);
5186 }
5187
VisitNewInstance(HNewInstance * instruction)5188 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
5189 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5190 instruction, LocationSummary::kCallOnMainOnly);
5191 locations->SetOut(Location::RegisterLocation(EAX));
5192 InvokeRuntimeCallingConvention calling_convention;
5193 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5194 }
5195
VisitNewInstance(HNewInstance * instruction)5196 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
5197 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction);
5198 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5199 DCHECK(!codegen_->IsLeafMethod());
5200 }
5201
VisitNewArray(HNewArray * instruction)5202 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
5203 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5204 instruction, LocationSummary::kCallOnMainOnly);
5205 locations->SetOut(Location::RegisterLocation(EAX));
5206 InvokeRuntimeCallingConvention calling_convention;
5207 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5208 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5209 }
5210
VisitNewArray(HNewArray * instruction)5211 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
5212 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5213 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5214 codegen_->InvokeRuntime(entrypoint, instruction);
5215 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5216 DCHECK(!codegen_->IsLeafMethod());
5217 }
5218
VisitParameterValue(HParameterValue * instruction)5219 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
5220 LocationSummary* locations =
5221 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5222 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5223 if (location.IsStackSlot()) {
5224 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5225 } else if (location.IsDoubleStackSlot()) {
5226 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5227 }
5228 locations->SetOut(location);
5229 }
5230
VisitParameterValue(HParameterValue * instruction)5231 void InstructionCodeGeneratorX86::VisitParameterValue(
5232 [[maybe_unused]] HParameterValue* instruction) {}
5233
VisitCurrentMethod(HCurrentMethod * instruction)5234 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
5235 LocationSummary* locations =
5236 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5237 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5238 }
5239
VisitCurrentMethod(HCurrentMethod * instruction)5240 void InstructionCodeGeneratorX86::VisitCurrentMethod([[maybe_unused]] HCurrentMethod* instruction) {
5241 }
5242
VisitClassTableGet(HClassTableGet * instruction)5243 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
5244 LocationSummary* locations =
5245 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5246 locations->SetInAt(0, Location::RequiresRegister());
5247 locations->SetOut(Location::RequiresRegister());
5248 }
5249
VisitClassTableGet(HClassTableGet * instruction)5250 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
5251 LocationSummary* locations = instruction->GetLocations();
5252 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5253 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5254 instruction->GetIndex(), kX86PointerSize).SizeValue();
5255 __ movl(locations->Out().AsRegister<Register>(),
5256 Address(locations->InAt(0).AsRegister<Register>(), method_offset));
5257 } else {
5258 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5259 instruction->GetIndex(), kX86PointerSize));
5260 __ movl(locations->Out().AsRegister<Register>(),
5261 Address(locations->InAt(0).AsRegister<Register>(),
5262 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
5263 // temp = temp->GetImtEntryAt(method_offset);
5264 __ movl(locations->Out().AsRegister<Register>(),
5265 Address(locations->Out().AsRegister<Register>(), method_offset));
5266 }
5267 }
5268
VisitNot(HNot * not_)5269 void LocationsBuilderX86::VisitNot(HNot* not_) {
5270 LocationSummary* locations =
5271 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5272 locations->SetInAt(0, Location::RequiresRegister());
5273 locations->SetOut(Location::SameAsFirstInput());
5274 }
5275
VisitNot(HNot * not_)5276 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
5277 LocationSummary* locations = not_->GetLocations();
5278 Location in = locations->InAt(0);
5279 Location out = locations->Out();
5280 DCHECK(in.Equals(out));
5281 switch (not_->GetResultType()) {
5282 case DataType::Type::kInt32:
5283 __ notl(out.AsRegister<Register>());
5284 break;
5285
5286 case DataType::Type::kInt64:
5287 __ notl(out.AsRegisterPairLow<Register>());
5288 __ notl(out.AsRegisterPairHigh<Register>());
5289 break;
5290
5291 default:
5292 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5293 }
5294 }
5295
VisitBooleanNot(HBooleanNot * bool_not)5296 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
5297 LocationSummary* locations =
5298 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5299 locations->SetInAt(0, Location::RequiresRegister());
5300 locations->SetOut(Location::SameAsFirstInput());
5301 }
5302
VisitBooleanNot(HBooleanNot * bool_not)5303 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
5304 LocationSummary* locations = bool_not->GetLocations();
5305 Location in = locations->InAt(0);
5306 Location out = locations->Out();
5307 DCHECK(in.Equals(out));
5308 __ xorl(out.AsRegister<Register>(), Immediate(1));
5309 }
5310
VisitCompare(HCompare * compare)5311 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
5312 LocationSummary* locations =
5313 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5314 switch (compare->GetComparisonType()) {
5315 case DataType::Type::kBool:
5316 case DataType::Type::kUint8:
5317 case DataType::Type::kInt8:
5318 case DataType::Type::kUint16:
5319 case DataType::Type::kInt16:
5320 case DataType::Type::kInt32:
5321 case DataType::Type::kUint32:
5322 case DataType::Type::kInt64:
5323 case DataType::Type::kUint64: {
5324 locations->SetInAt(0, Location::RequiresRegister());
5325 locations->SetInAt(1, Location::Any());
5326 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5327 break;
5328 }
5329 case DataType::Type::kFloat32:
5330 case DataType::Type::kFloat64: {
5331 locations->SetInAt(0, Location::RequiresFpuRegister());
5332 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
5333 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
5334 } else if (compare->InputAt(1)->IsConstant()) {
5335 locations->SetInAt(1, Location::RequiresFpuRegister());
5336 } else {
5337 locations->SetInAt(1, Location::Any());
5338 }
5339 locations->SetOut(Location::RequiresRegister());
5340 break;
5341 }
5342 default:
5343 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5344 }
5345 }
5346
VisitCompare(HCompare * compare)5347 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
5348 LocationSummary* locations = compare->GetLocations();
5349 Register out = locations->Out().AsRegister<Register>();
5350 Location left = locations->InAt(0);
5351 Location right = locations->InAt(1);
5352
5353 NearLabel less, greater, done;
5354 Condition less_cond = kLess;
5355 Condition greater_cond = kGreater;
5356
5357 switch (compare->GetComparisonType()) {
5358 case DataType::Type::kUint32:
5359 less_cond = kBelow;
5360 // greater_cond - is not needed below
5361 FALLTHROUGH_INTENDED;
5362 case DataType::Type::kBool:
5363 case DataType::Type::kUint8:
5364 case DataType::Type::kInt8:
5365 case DataType::Type::kUint16:
5366 case DataType::Type::kInt16:
5367 case DataType::Type::kInt32: {
5368 codegen_->GenerateIntCompare(left, right);
5369 break;
5370 }
5371 case DataType::Type::kUint64:
5372 less_cond = kBelow;
5373 greater_cond = kAbove;
5374 FALLTHROUGH_INTENDED;
5375 case DataType::Type::kInt64: {
5376 Register left_low = left.AsRegisterPairLow<Register>();
5377 Register left_high = left.AsRegisterPairHigh<Register>();
5378 int32_t val_low = 0;
5379 int32_t val_high = 0;
5380 bool right_is_const = false;
5381
5382 if (right.IsConstant()) {
5383 DCHECK(right.GetConstant()->IsLongConstant());
5384 right_is_const = true;
5385 int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
5386 val_low = Low32Bits(val);
5387 val_high = High32Bits(val);
5388 }
5389
5390 if (right.IsRegisterPair()) {
5391 __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
5392 } else if (right.IsDoubleStackSlot()) {
5393 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
5394 } else {
5395 DCHECK(right_is_const) << right;
5396 codegen_->Compare32BitValue(left_high, val_high);
5397 }
5398 __ j(less_cond, &less); // High part compare.
5399 __ j(greater_cond, &greater); // High part compare.
5400 if (right.IsRegisterPair()) {
5401 __ cmpl(left_low, right.AsRegisterPairLow<Register>());
5402 } else if (right.IsDoubleStackSlot()) {
5403 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
5404 } else {
5405 DCHECK(right_is_const) << right;
5406 codegen_->Compare32BitValue(left_low, val_low);
5407 }
5408 less_cond = kBelow; // for CF (unsigned).
5409 // greater_cond - is not needed below
5410 break;
5411 }
5412 case DataType::Type::kFloat32: {
5413 GenerateFPCompare(left, right, compare, false);
5414 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5415 less_cond = kBelow; // for CF (floats).
5416 break;
5417 }
5418 case DataType::Type::kFloat64: {
5419 GenerateFPCompare(left, right, compare, true);
5420 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5421 less_cond = kBelow; // for CF (floats).
5422 break;
5423 }
5424 default:
5425 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5426 }
5427
5428 __ movl(out, Immediate(0));
5429 __ j(kEqual, &done);
5430 __ j(less_cond, &less);
5431
5432 __ Bind(&greater);
5433 __ movl(out, Immediate(1));
5434 __ jmp(&done);
5435
5436 __ Bind(&less);
5437 __ movl(out, Immediate(-1));
5438
5439 __ Bind(&done);
5440 }
5441
VisitPhi(HPhi * instruction)5442 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
5443 LocationSummary* locations =
5444 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5445 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5446 locations->SetInAt(i, Location::Any());
5447 }
5448 locations->SetOut(Location::Any());
5449 }
5450
VisitPhi(HPhi * instruction)5451 void InstructionCodeGeneratorX86::VisitPhi([[maybe_unused]] HPhi* instruction) {
5452 LOG(FATAL) << "Unreachable";
5453 }
5454
GenerateMemoryBarrier(MemBarrierKind kind)5455 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
5456 /*
5457 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
5458 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
5459 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5460 */
5461 switch (kind) {
5462 case MemBarrierKind::kAnyAny: {
5463 MemoryFence();
5464 break;
5465 }
5466 case MemBarrierKind::kAnyStore:
5467 case MemBarrierKind::kLoadAny:
5468 case MemBarrierKind::kStoreStore: {
5469 // nop
5470 break;
5471 }
5472 case MemBarrierKind::kNTStoreStore:
5473 // Non-Temporal Store/Store needs an explicit fence.
5474 MemoryFence(/* non-temporal= */ true);
5475 break;
5476 }
5477 }
5478
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)5479 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
5480 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
5481 [[maybe_unused]] ArtMethod* method) {
5482 return desired_dispatch_info;
5483 }
5484
GetInvokeExtraParameter(HInvoke * invoke,Register temp)5485 Register CodeGeneratorX86::GetInvokeExtraParameter(HInvoke* invoke, Register temp) {
5486 if (invoke->IsInvokeStaticOrDirect()) {
5487 return GetInvokeStaticOrDirectExtraParameter(invoke->AsInvokeStaticOrDirect(), temp);
5488 }
5489 DCHECK(invoke->IsInvokeInterface());
5490 Location location =
5491 invoke->GetLocations()->InAt(invoke->AsInvokeInterface()->GetSpecialInputIndex());
5492 return location.AsRegister<Register>();
5493 }
5494
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)5495 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
5496 Register temp) {
5497 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
5498 if (!invoke->GetLocations()->Intrinsified()) {
5499 return location.AsRegister<Register>();
5500 }
5501 // For intrinsics we allow any location, so it may be on the stack.
5502 if (!location.IsRegister()) {
5503 __ movl(temp, Address(ESP, location.GetStackIndex()));
5504 return temp;
5505 }
5506 // For register locations, check if the register was saved. If so, get it from the stack.
5507 // Note: There is a chance that the register was saved but not overwritten, so we could
5508 // save one load. However, since this is just an intrinsic slow path we prefer this
5509 // simple and more robust approach rather that trying to determine if that's the case.
5510 SlowPathCode* slow_path = GetCurrentSlowPath();
5511 DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
5512 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
5513 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
5514 __ movl(temp, Address(ESP, stack_offset));
5515 return temp;
5516 }
5517 return location.AsRegister<Register>();
5518 }
5519
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)5520 void CodeGeneratorX86::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
5521 switch (load_kind) {
5522 case MethodLoadKind::kBootImageLinkTimePcRelative: {
5523 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5524 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5525 __ leal(temp.AsRegister<Register>(),
5526 Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5527 RecordBootImageMethodPatch(invoke);
5528 break;
5529 }
5530 case MethodLoadKind::kBootImageRelRo: {
5531 size_t index = invoke->IsInvokeInterface()
5532 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5533 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5534 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5535 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5536 RecordBootImageRelRoPatch(
5537 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress(),
5538 GetBootImageOffset(invoke));
5539 break;
5540 }
5541 case MethodLoadKind::kAppImageRelRo: {
5542 DCHECK(GetCompilerOptions().IsAppImage());
5543 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5544 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5545 RecordAppImageMethodPatch(invoke);
5546 break;
5547 }
5548 case MethodLoadKind::kBssEntry: {
5549 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5550 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5551 RecordMethodBssEntryPatch(invoke);
5552 // No need for memory fence, thanks to the x86 memory model.
5553 break;
5554 }
5555 case MethodLoadKind::kJitDirectAddress: {
5556 __ movl(temp.AsRegister<Register>(),
5557 Immediate(reinterpret_cast32<uint32_t>(invoke->GetResolvedMethod())));
5558 break;
5559 }
5560 case MethodLoadKind::kRuntimeCall: {
5561 // Test situation, don't do anything.
5562 break;
5563 }
5564 default: {
5565 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
5566 UNREACHABLE();
5567 }
5568 }
5569 }
5570
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)5571 void CodeGeneratorX86::GenerateStaticOrDirectCall(
5572 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
5573 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
5574 switch (invoke->GetMethodLoadKind()) {
5575 case MethodLoadKind::kStringInit: {
5576 // temp = thread->string_init_entrypoint
5577 uint32_t offset =
5578 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
5579 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
5580 break;
5581 }
5582 case MethodLoadKind::kRecursive: {
5583 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
5584 break;
5585 }
5586 case MethodLoadKind::kRuntimeCall: {
5587 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5588 return; // No code pointer retrieval; the runtime performs the call directly.
5589 }
5590 case MethodLoadKind::kBootImageLinkTimePcRelative:
5591 // For kCallCriticalNative we skip loading the method and do the call directly.
5592 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
5593 break;
5594 }
5595 FALLTHROUGH_INTENDED;
5596 default: {
5597 LoadMethod(invoke->GetMethodLoadKind(), callee_method, invoke);
5598 }
5599 }
5600
5601 switch (invoke->GetCodePtrLocation()) {
5602 case CodePtrLocation::kCallSelf:
5603 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
5604 __ call(GetFrameEntryLabel());
5605 RecordPcInfo(invoke, slow_path);
5606 break;
5607 case CodePtrLocation::kCallCriticalNative: {
5608 size_t out_frame_size =
5609 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
5610 kNativeStackAlignment,
5611 GetCriticalNativeDirectCallFrameSize>(invoke);
5612 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
5613 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5614 Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5615 __ call(Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5616 RecordBootImageJniEntrypointPatch(invoke);
5617 } else {
5618 // (callee_method + offset_of_jni_entry_point)()
5619 __ call(Address(callee_method.AsRegister<Register>(),
5620 ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
5621 }
5622 RecordPcInfo(invoke, slow_path);
5623 if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
5624 // Create space for conversion.
5625 out_frame_size = 8u;
5626 IncreaseFrame(out_frame_size);
5627 }
5628 // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
5629 switch (invoke->GetType()) {
5630 case DataType::Type::kBool:
5631 __ movzxb(EAX, AL);
5632 break;
5633 case DataType::Type::kInt8:
5634 __ movsxb(EAX, AL);
5635 break;
5636 case DataType::Type::kUint16:
5637 __ movzxw(EAX, EAX);
5638 break;
5639 case DataType::Type::kInt16:
5640 __ movsxw(EAX, EAX);
5641 break;
5642 case DataType::Type::kFloat32:
5643 __ fstps(Address(ESP, 0));
5644 __ movss(XMM0, Address(ESP, 0));
5645 break;
5646 case DataType::Type::kFloat64:
5647 __ fstpl(Address(ESP, 0));
5648 __ movsd(XMM0, Address(ESP, 0));
5649 break;
5650 case DataType::Type::kInt32:
5651 case DataType::Type::kInt64:
5652 case DataType::Type::kVoid:
5653 break;
5654 default:
5655 DCHECK(false) << invoke->GetType();
5656 break;
5657 }
5658 if (out_frame_size != 0u) {
5659 DecreaseFrame(out_frame_size);
5660 }
5661 break;
5662 }
5663 case CodePtrLocation::kCallArtMethod:
5664 // (callee_method + offset_of_quick_compiled_code)()
5665 __ call(Address(callee_method.AsRegister<Register>(),
5666 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5667 kX86PointerSize).Int32Value()));
5668 RecordPcInfo(invoke, slow_path);
5669 break;
5670 }
5671
5672 DCHECK(!IsLeafMethod());
5673 }
5674
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5675 void CodeGeneratorX86::GenerateVirtualCall(
5676 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5677 Register temp = temp_in.AsRegister<Register>();
5678 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5679 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5680
5681 // Use the calling convention instead of the location of the receiver, as
5682 // intrinsics may have put the receiver in a different register. In the intrinsics
5683 // slow path, the arguments have been moved to the right place, so here we are
5684 // guaranteed that the receiver is the first register of the calling convention.
5685 InvokeDexCallingConvention calling_convention;
5686 Register receiver = calling_convention.GetRegisterAt(0);
5687 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5688 // /* HeapReference<Class> */ temp = receiver->klass_
5689 __ movl(temp, Address(receiver, class_offset));
5690 MaybeRecordImplicitNullCheck(invoke);
5691 // Instead of simply (possibly) unpoisoning `temp` here, we should
5692 // emit a read barrier for the previous class reference load.
5693 // However this is not required in practice, as this is an
5694 // intermediate/temporary reference and because the current
5695 // concurrent copying collector keeps the from-space memory
5696 // intact/accessible until the end of the marking phase (the
5697 // concurrent copying collector may not in the future).
5698 __ MaybeUnpoisonHeapReference(temp);
5699
5700 MaybeGenerateInlineCacheCheck(invoke, temp);
5701
5702 // temp = temp->GetMethodAt(method_offset);
5703 __ movl(temp, Address(temp, method_offset));
5704 // call temp->GetEntryPoint();
5705 __ call(Address(
5706 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5707 RecordPcInfo(invoke, slow_path);
5708 }
5709
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5710 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5711 uint32_t intrinsic_data) {
5712 boot_image_other_patches_.emplace_back(
5713 method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5714 __ Bind(&boot_image_other_patches_.back().label);
5715 }
5716
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5717 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5718 uint32_t boot_image_offset) {
5719 boot_image_other_patches_.emplace_back(
5720 method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5721 __ Bind(&boot_image_other_patches_.back().label);
5722 }
5723
RecordBootImageMethodPatch(HInvoke * invoke)5724 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvoke* invoke) {
5725 size_t index = invoke->IsInvokeInterface()
5726 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5727 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5728 HX86ComputeBaseMethodAddress* method_address =
5729 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5730 boot_image_method_patches_.emplace_back(
5731 method_address,
5732 invoke->GetResolvedMethodReference().dex_file,
5733 invoke->GetResolvedMethodReference().index);
5734 __ Bind(&boot_image_method_patches_.back().label);
5735 }
5736
RecordAppImageMethodPatch(HInvoke * invoke)5737 void CodeGeneratorX86::RecordAppImageMethodPatch(HInvoke* invoke) {
5738 size_t index = invoke->IsInvokeInterface()
5739 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5740 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5741 HX86ComputeBaseMethodAddress* method_address =
5742 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5743 app_image_method_patches_.emplace_back(
5744 method_address,
5745 invoke->GetResolvedMethodReference().dex_file,
5746 invoke->GetResolvedMethodReference().index);
5747 __ Bind(&app_image_method_patches_.back().label);
5748 }
5749
RecordMethodBssEntryPatch(HInvoke * invoke)5750 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvoke* invoke) {
5751 size_t index = invoke->IsInvokeInterface()
5752 ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5753 : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5754 DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
5755 GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
5756 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
5757 invoke->GetMethodReference().dex_file));
5758 HX86ComputeBaseMethodAddress* method_address =
5759 invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5760 // Add the patch entry and bind its label at the end of the instruction.
5761 method_bss_entry_patches_.emplace_back(
5762 method_address,
5763 invoke->GetMethodReference().dex_file,
5764 invoke->GetMethodReference().index);
5765 __ Bind(&method_bss_entry_patches_.back().label);
5766 }
5767
RecordBootImageTypePatch(HLoadClass * load_class)5768 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5769 HX86ComputeBaseMethodAddress* method_address =
5770 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5771 boot_image_type_patches_.emplace_back(
5772 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5773 __ Bind(&boot_image_type_patches_.back().label);
5774 }
5775
RecordAppImageTypePatch(HLoadClass * load_class)5776 void CodeGeneratorX86::RecordAppImageTypePatch(HLoadClass* load_class) {
5777 HX86ComputeBaseMethodAddress* method_address =
5778 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5779 app_image_type_patches_.emplace_back(
5780 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5781 __ Bind(&app_image_type_patches_.back().label);
5782 }
5783
NewTypeBssEntryPatch(HLoadClass * load_class)5784 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5785 HX86ComputeBaseMethodAddress* method_address =
5786 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5787 ArenaDeque<X86PcRelativePatchInfo>* patches = nullptr;
5788 switch (load_class->GetLoadKind()) {
5789 case HLoadClass::LoadKind::kBssEntry:
5790 patches = &type_bss_entry_patches_;
5791 break;
5792 case HLoadClass::LoadKind::kBssEntryPublic:
5793 patches = &public_type_bss_entry_patches_;
5794 break;
5795 case HLoadClass::LoadKind::kBssEntryPackage:
5796 patches = &package_type_bss_entry_patches_;
5797 break;
5798 default:
5799 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5800 UNREACHABLE();
5801 }
5802 patches->emplace_back(
5803 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5804 return &patches->back().label;
5805 }
5806
RecordBootImageStringPatch(HLoadString * load_string)5807 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5808 HX86ComputeBaseMethodAddress* method_address =
5809 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5810 boot_image_string_patches_.emplace_back(
5811 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5812 __ Bind(&boot_image_string_patches_.back().label);
5813 }
5814
NewStringBssEntryPatch(HLoadString * load_string)5815 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5816 HX86ComputeBaseMethodAddress* method_address =
5817 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5818 string_bss_entry_patches_.emplace_back(
5819 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5820 return &string_bss_entry_patches_.back().label;
5821 }
5822
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)5823 void CodeGeneratorX86::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
5824 HX86ComputeBaseMethodAddress* method_address =
5825 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5826 boot_image_jni_entrypoint_patches_.emplace_back(
5827 method_address,
5828 invoke->GetResolvedMethodReference().dex_file,
5829 invoke->GetResolvedMethodReference().index);
5830 __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
5831 }
5832
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5833 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5834 uint32_t boot_image_reference,
5835 HInvokeStaticOrDirect* invoke) {
5836 if (GetCompilerOptions().IsBootImage()) {
5837 HX86ComputeBaseMethodAddress* method_address =
5838 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5839 DCHECK(method_address != nullptr);
5840 Register method_address_reg =
5841 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5842 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5843 RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5844 } else if (GetCompilerOptions().GetCompilePic()) {
5845 HX86ComputeBaseMethodAddress* method_address =
5846 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5847 DCHECK(method_address != nullptr);
5848 Register method_address_reg =
5849 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5850 __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5851 RecordBootImageRelRoPatch(method_address, boot_image_reference);
5852 } else {
5853 DCHECK(GetCompilerOptions().IsJitCompiler());
5854 gc::Heap* heap = Runtime::Current()->GetHeap();
5855 DCHECK(!heap->GetBootImageSpaces().empty());
5856 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5857 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5858 }
5859 }
5860
LoadIntrinsicDeclaringClass(Register reg,HInvokeStaticOrDirect * invoke)5861 void CodeGeneratorX86::LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke) {
5862 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5863 if (GetCompilerOptions().IsBootImage()) {
5864 // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5865 HX86ComputeBaseMethodAddress* method_address =
5866 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5867 DCHECK(method_address != nullptr);
5868 Register method_address_reg =
5869 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5870 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5871 MethodReference target_method = invoke->GetResolvedMethodReference();
5872 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5873 boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5874 __ Bind(&boot_image_type_patches_.back().label);
5875 } else {
5876 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5877 LoadBootImageAddress(reg, boot_image_offset, invoke);
5878 }
5879 }
5880
5881 // The label points to the end of the "movl" or another instruction but the literal offset
5882 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5883 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5884
5885 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5886 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5887 const ArenaDeque<X86PcRelativePatchInfo>& infos,
5888 ArenaVector<linker::LinkerPatch>* linker_patches) {
5889 for (const X86PcRelativePatchInfo& info : infos) {
5890 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5891 linker_patches->push_back(Factory(literal_offset,
5892 info.target_dex_file,
5893 GetMethodAddressOffset(info.method_address),
5894 info.offset_or_index));
5895 }
5896 }
5897
5898 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5899 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5900 const DexFile* target_dex_file,
5901 uint32_t pc_insn_offset,
5902 uint32_t boot_image_offset) {
5903 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
5904 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5905 }
5906
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5907 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5908 DCHECK(linker_patches->empty());
5909 size_t size =
5910 boot_image_method_patches_.size() +
5911 app_image_method_patches_.size() +
5912 method_bss_entry_patches_.size() +
5913 boot_image_type_patches_.size() +
5914 app_image_type_patches_.size() +
5915 type_bss_entry_patches_.size() +
5916 public_type_bss_entry_patches_.size() +
5917 package_type_bss_entry_patches_.size() +
5918 boot_image_string_patches_.size() +
5919 string_bss_entry_patches_.size() +
5920 boot_image_jni_entrypoint_patches_.size() +
5921 boot_image_other_patches_.size();
5922 linker_patches->reserve(size);
5923 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5924 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5925 boot_image_method_patches_, linker_patches);
5926 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5927 boot_image_type_patches_, linker_patches);
5928 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5929 boot_image_string_patches_, linker_patches);
5930 } else {
5931 DCHECK(boot_image_method_patches_.empty());
5932 DCHECK(boot_image_type_patches_.empty());
5933 DCHECK(boot_image_string_patches_.empty());
5934 }
5935 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_method_patches_.empty());
5936 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
5937 if (GetCompilerOptions().IsBootImage()) {
5938 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5939 boot_image_other_patches_, linker_patches);
5940 } else {
5941 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
5942 boot_image_other_patches_, linker_patches);
5943 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodAppImageRelRoPatch>(
5944 app_image_method_patches_, linker_patches);
5945 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
5946 app_image_type_patches_, linker_patches);
5947 }
5948 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5949 method_bss_entry_patches_, linker_patches);
5950 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5951 type_bss_entry_patches_, linker_patches);
5952 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5953 public_type_bss_entry_patches_, linker_patches);
5954 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5955 package_type_bss_entry_patches_, linker_patches);
5956 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5957 string_bss_entry_patches_, linker_patches);
5958 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5959 boot_image_jni_entrypoint_patches_, linker_patches);
5960 DCHECK_EQ(size, linker_patches->size());
5961 }
5962
MaybeMarkGCCard(Register temp,Register card,Register object,Register value,bool emit_null_check)5963 void CodeGeneratorX86::MaybeMarkGCCard(
5964 Register temp, Register card, Register object, Register value, bool emit_null_check) {
5965 NearLabel is_null;
5966 if (emit_null_check) {
5967 __ testl(value, value);
5968 __ j(kEqual, &is_null);
5969 }
5970 MarkGCCard(temp, card, object);
5971 if (emit_null_check) {
5972 __ Bind(&is_null);
5973 }
5974 }
5975
MarkGCCard(Register temp,Register card,Register object)5976 void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object) {
5977 // Load the address of the card table into `card`.
5978 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5979 // Calculate the offset (in the card table) of the card corresponding to `object`.
5980 __ movl(temp, object);
5981 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5982 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5983 // `object`'s card.
5984 //
5985 // Register `card` contains the address of the card table. Note that the card
5986 // table's base is biased during its creation so that it always starts at an
5987 // address whose least-significant byte is equal to `kCardDirty` (see
5988 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5989 // below writes the `kCardDirty` (byte) value into the `object`'s card
5990 // (located at `card + object >> kCardShift`).
5991 //
5992 // This dual use of the value in register `card` (1. to calculate the location
5993 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5994 // (no need to explicitly load `kCardDirty` as an immediate value).
5995 __ movb(Address(temp, card, TIMES_1, 0),
5996 X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
5997 }
5998
CheckGCCardIsValid(Register temp,Register card,Register object)5999 void CodeGeneratorX86::CheckGCCardIsValid(Register temp, Register card, Register object) {
6000 NearLabel done;
6001 __ j(kEqual, &done);
6002 // Load the address of the card table into `card`.
6003 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
6004 // Calculate the offset (in the card table) of the card corresponding to `object`.
6005 __ movl(temp, object);
6006 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
6007 // assert (!clean || !self->is_gc_marking)
6008 __ cmpb(Address(temp, card, TIMES_1, 0), Immediate(gc::accounting::CardTable::kCardClean));
6009 __ j(kNotEqual, &done);
6010 __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>()), Immediate(0));
6011 __ j(kEqual, &done);
6012 __ int3();
6013 __ Bind(&done);
6014 }
6015
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6016 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
6017 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6018
6019 bool object_field_get_with_read_barrier =
6020 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6021 LocationSummary* locations =
6022 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6023 codegen_->EmitReadBarrier()
6024 ? LocationSummary::kCallOnSlowPath
6025 : LocationSummary::kNoCall);
6026 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
6027 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6028 }
6029 // receiver_input
6030 locations->SetInAt(0, Location::RequiresRegister());
6031 if (DataType::IsFloatingPointType(instruction->GetType())) {
6032 locations->SetOut(Location::RequiresFpuRegister());
6033 } else {
6034 // The output overlaps in case of long: we don't want the low move
6035 // to overwrite the object's location. Likewise, in the case of
6036 // an object field get with read barriers enabled, we do not want
6037 // the move to overwrite the object's location, as we need it to emit
6038 // the read barrier.
6039 locations->SetOut(
6040 Location::RequiresRegister(),
6041 (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64)
6042 ? Location::kOutputOverlap
6043 : Location::kNoOutputOverlap);
6044 }
6045
6046 if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
6047 // Long values can be loaded atomically into an XMM using movsd.
6048 // So we use an XMM register as a temp to achieve atomicity (first
6049 // load the temp into the XMM and then copy the XMM into the
6050 // output, 32 bits at a time).
6051 locations->AddTemp(Location::RequiresFpuRegister());
6052 }
6053 }
6054
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)6055 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
6056 const FieldInfo& field_info) {
6057 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
6058
6059 LocationSummary* locations = instruction->GetLocations();
6060 Location base_loc = locations->InAt(0);
6061 Register base = base_loc.AsRegister<Register>();
6062 Location out = locations->Out();
6063 bool is_volatile = field_info.IsVolatile();
6064 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
6065 DataType::Type load_type = instruction->GetType();
6066 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6067
6068 if (load_type == DataType::Type::kReference) {
6069 // /* HeapReference<Object> */ out = *(base + offset)
6070 if (codegen_->EmitBakerReadBarrier()) {
6071 // Note that a potential implicit null check is handled in this
6072 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
6073 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6074 instruction, out, base, offset, /* needs_null_check= */ true);
6075 if (is_volatile) {
6076 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6077 }
6078 } else {
6079 __ movl(out.AsRegister<Register>(), Address(base, offset));
6080 codegen_->MaybeRecordImplicitNullCheck(instruction);
6081 if (is_volatile) {
6082 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6083 }
6084 // If read barriers are enabled, emit read barriers other than
6085 // Baker's using a slow path (and also unpoison the loaded
6086 // reference, if heap poisoning is enabled).
6087 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
6088 }
6089 } else {
6090 Address src(base, offset);
6091 XmmRegister temp = (load_type == DataType::Type::kInt64 && is_volatile)
6092 ? locations->GetTemp(0).AsFpuRegister<XmmRegister>()
6093 : kNoXmmRegister;
6094 codegen_->LoadFromMemoryNoBarrier(load_type, out, src, instruction, temp, is_volatile);
6095 if (is_volatile) {
6096 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6097 }
6098 }
6099 }
6100
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)6101 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction,
6102 const FieldInfo& field_info,
6103 WriteBarrierKind write_barrier_kind) {
6104 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6105
6106 LocationSummary* locations =
6107 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6108 locations->SetInAt(0, Location::RequiresRegister());
6109 bool is_volatile = field_info.IsVolatile();
6110 DataType::Type field_type = field_info.GetFieldType();
6111 bool is_byte_type = DataType::Size(field_type) == 1u;
6112
6113 // The register allocator does not support multiple
6114 // inputs that die at entry with one in a specific register.
6115 if (is_byte_type) {
6116 // Ensure the value is in a byte register.
6117 locations->SetInAt(1, Location::RegisterLocation(EAX));
6118 } else if (DataType::IsFloatingPointType(field_type)) {
6119 if (is_volatile && field_type == DataType::Type::kFloat64) {
6120 // In order to satisfy the semantics of volatile, this must be a single instruction store.
6121 locations->SetInAt(1, Location::RequiresFpuRegister());
6122 } else {
6123 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
6124 }
6125 } else if (is_volatile && field_type == DataType::Type::kInt64) {
6126 // In order to satisfy the semantics of volatile, this must be a single instruction store.
6127 locations->SetInAt(1, Location::RequiresRegister());
6128
6129 // 64bits value can be atomically written to an address with movsd and an XMM register.
6130 // We need two XMM registers because there's no easier way to (bit) copy a register pair
6131 // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
6132 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
6133 // isolated cases when we need this it isn't worth adding the extra complexity.
6134 locations->AddTemp(Location::RequiresFpuRegister());
6135 locations->AddTemp(Location::RequiresFpuRegister());
6136 } else {
6137 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6138
6139 bool needs_write_barrier =
6140 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6141 bool check_gc_card =
6142 codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
6143
6144 if (needs_write_barrier || check_gc_card) {
6145 locations->AddTemp(Location::RequiresRegister());
6146 // Ensure the card is in a byte register.
6147 locations->AddTemp(Location::RegisterLocation(ECX));
6148 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
6149 locations->AddTemp(Location::RequiresRegister());
6150 }
6151 }
6152 }
6153
HandleFieldSet(HInstruction * instruction,uint32_t value_index,DataType::Type field_type,Address field_addr,Register base,bool is_volatile,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6154 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
6155 uint32_t value_index,
6156 DataType::Type field_type,
6157 Address field_addr,
6158 Register base,
6159 bool is_volatile,
6160 bool value_can_be_null,
6161 WriteBarrierKind write_barrier_kind) {
6162 LocationSummary* locations = instruction->GetLocations();
6163 Location value = locations->InAt(value_index);
6164 bool needs_write_barrier =
6165 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
6166
6167 if (is_volatile) {
6168 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
6169 }
6170
6171 bool maybe_record_implicit_null_check_done = false;
6172
6173 switch (field_type) {
6174 case DataType::Type::kBool:
6175 case DataType::Type::kUint8:
6176 case DataType::Type::kInt8: {
6177 if (value.IsConstant()) {
6178 __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6179 } else {
6180 __ movb(field_addr, value.AsRegister<ByteRegister>());
6181 }
6182 break;
6183 }
6184
6185 case DataType::Type::kUint16:
6186 case DataType::Type::kInt16: {
6187 if (value.IsConstant()) {
6188 __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6189 } else {
6190 __ movw(field_addr, value.AsRegister<Register>());
6191 }
6192 break;
6193 }
6194
6195 case DataType::Type::kInt32:
6196 case DataType::Type::kReference: {
6197 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
6198 if (value.IsConstant()) {
6199 DCHECK(value.GetConstant()->IsNullConstant())
6200 << "constant value " << CodeGenerator::GetInt32ValueOf(value.GetConstant())
6201 << " is not null. Instruction " << *instruction;
6202 // No need to poison null, just do a movl.
6203 __ movl(field_addr, Immediate(0));
6204 } else {
6205 Register temp = locations->GetTemp(0).AsRegister<Register>();
6206 __ movl(temp, value.AsRegister<Register>());
6207 __ PoisonHeapReference(temp);
6208 __ movl(field_addr, temp);
6209 }
6210 } else if (value.IsConstant()) {
6211 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6212 __ movl(field_addr, Immediate(v));
6213 } else {
6214 DCHECK(value.IsRegister()) << value;
6215 __ movl(field_addr, value.AsRegister<Register>());
6216 }
6217 break;
6218 }
6219
6220 case DataType::Type::kInt64: {
6221 if (is_volatile) {
6222 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
6223 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
6224 __ movd(temp1, value.AsRegisterPairLow<Register>());
6225 __ movd(temp2, value.AsRegisterPairHigh<Register>());
6226 __ punpckldq(temp1, temp2);
6227 __ movsd(field_addr, temp1);
6228 codegen_->MaybeRecordImplicitNullCheck(instruction);
6229 } else if (value.IsConstant()) {
6230 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
6231 __ movl(field_addr, Immediate(Low32Bits(v)));
6232 codegen_->MaybeRecordImplicitNullCheck(instruction);
6233 __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
6234 } else {
6235 __ movl(field_addr, value.AsRegisterPairLow<Register>());
6236 codegen_->MaybeRecordImplicitNullCheck(instruction);
6237 __ movl(Address::displace(field_addr, kX86WordSize), value.AsRegisterPairHigh<Register>());
6238 }
6239 maybe_record_implicit_null_check_done = true;
6240 break;
6241 }
6242
6243 case DataType::Type::kFloat32: {
6244 if (value.IsConstant()) {
6245 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6246 __ movl(field_addr, Immediate(v));
6247 } else {
6248 __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
6249 }
6250 break;
6251 }
6252
6253 case DataType::Type::kFloat64: {
6254 if (value.IsConstant()) {
6255 DCHECK(!is_volatile);
6256 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
6257 __ movl(field_addr, Immediate(Low32Bits(v)));
6258 codegen_->MaybeRecordImplicitNullCheck(instruction);
6259 __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
6260 maybe_record_implicit_null_check_done = true;
6261 } else {
6262 __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
6263 }
6264 break;
6265 }
6266
6267 case DataType::Type::kUint32:
6268 case DataType::Type::kUint64:
6269 case DataType::Type::kVoid:
6270 LOG(FATAL) << "Unreachable type " << field_type;
6271 UNREACHABLE();
6272 }
6273
6274 if (!maybe_record_implicit_null_check_done) {
6275 codegen_->MaybeRecordImplicitNullCheck(instruction);
6276 }
6277
6278 if (needs_write_barrier) {
6279 Register temp = locations->GetTemp(0).AsRegister<Register>();
6280 Register card = locations->GetTemp(1).AsRegister<Register>();
6281 if (value.IsConstant()) {
6282 DCHECK(value.GetConstant()->IsNullConstant())
6283 << "constant value " << CodeGenerator::GetInt32ValueOf(value.GetConstant())
6284 << " is not null. Instruction: " << *instruction;
6285 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6286 codegen_->MarkGCCard(temp, card, base);
6287 }
6288 } else {
6289 codegen_->MaybeMarkGCCard(
6290 temp,
6291 card,
6292 base,
6293 value.AsRegister<Register>(),
6294 value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
6295 }
6296 } else if (codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind)) {
6297 if (value.IsConstant()) {
6298 // If we are storing a constant for a reference, we are in the case where we are storing
6299 // null but we cannot skip it as this write barrier is being relied on by coalesced write
6300 // barriers.
6301 DCHECK(value.GetConstant()->IsNullConstant())
6302 << "constant value " << CodeGenerator::GetInt32ValueOf(value.GetConstant())
6303 << " is not null. Instruction: " << *instruction;
6304 // No need to check the dirty bit as this value is null.
6305 } else {
6306 Register temp = locations->GetTemp(0).AsRegister<Register>();
6307 Register card = locations->GetTemp(1).AsRegister<Register>();
6308 codegen_->CheckGCCardIsValid(temp, card, base);
6309 }
6310 }
6311
6312 if (is_volatile) {
6313 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
6314 }
6315 }
6316
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6317 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
6318 const FieldInfo& field_info,
6319 bool value_can_be_null,
6320 WriteBarrierKind write_barrier_kind) {
6321 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6322
6323 LocationSummary* locations = instruction->GetLocations();
6324 Register base = locations->InAt(0).AsRegister<Register>();
6325 bool is_volatile = field_info.IsVolatile();
6326 DataType::Type field_type = field_info.GetFieldType();
6327 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6328 Address field_addr(base, offset);
6329
6330 HandleFieldSet(instruction,
6331 /* value_index= */ 1,
6332 field_type,
6333 field_addr,
6334 base,
6335 is_volatile,
6336 value_can_be_null,
6337 write_barrier_kind);
6338 }
6339
VisitStaticFieldGet(HStaticFieldGet * instruction)6340 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6341 HandleFieldGet(instruction, instruction->GetFieldInfo());
6342 }
6343
VisitStaticFieldGet(HStaticFieldGet * instruction)6344 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6345 HandleFieldGet(instruction, instruction->GetFieldInfo());
6346 }
6347
VisitStaticFieldSet(HStaticFieldSet * instruction)6348 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6349 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6350 }
6351
VisitStaticFieldSet(HStaticFieldSet * instruction)6352 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6353 HandleFieldSet(instruction,
6354 instruction->GetFieldInfo(),
6355 instruction->GetValueCanBeNull(),
6356 instruction->GetWriteBarrierKind());
6357 }
6358
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6359 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6360 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6361 }
6362
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6363 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6364 HandleFieldSet(instruction,
6365 instruction->GetFieldInfo(),
6366 instruction->GetValueCanBeNull(),
6367 instruction->GetWriteBarrierKind());
6368 }
6369
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6370 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6371 HandleFieldGet(instruction, instruction->GetFieldInfo());
6372 }
6373
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6374 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6375 HandleFieldGet(instruction, instruction->GetFieldInfo());
6376 }
6377
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6378 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6379 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
6380 }
6381
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6382 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6383 __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
6384 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction);
6385 }
6386
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6387 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
6388 HUnresolvedInstanceFieldGet* instruction) {
6389 FieldAccessCallingConventionX86 calling_convention;
6390 codegen_->CreateUnresolvedFieldLocationSummary(
6391 instruction, instruction->GetFieldType(), calling_convention);
6392 }
6393
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6394 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
6395 HUnresolvedInstanceFieldGet* instruction) {
6396 FieldAccessCallingConventionX86 calling_convention;
6397 codegen_->GenerateUnresolvedFieldAccess(instruction,
6398 instruction->GetFieldType(),
6399 instruction->GetFieldIndex(),
6400 calling_convention);
6401 }
6402
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6403 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
6404 HUnresolvedInstanceFieldSet* instruction) {
6405 FieldAccessCallingConventionX86 calling_convention;
6406 codegen_->CreateUnresolvedFieldLocationSummary(
6407 instruction, instruction->GetFieldType(), calling_convention);
6408 }
6409
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6410 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
6411 HUnresolvedInstanceFieldSet* instruction) {
6412 FieldAccessCallingConventionX86 calling_convention;
6413 codegen_->GenerateUnresolvedFieldAccess(instruction,
6414 instruction->GetFieldType(),
6415 instruction->GetFieldIndex(),
6416 calling_convention);
6417 }
6418
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6419 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
6420 HUnresolvedStaticFieldGet* instruction) {
6421 FieldAccessCallingConventionX86 calling_convention;
6422 codegen_->CreateUnresolvedFieldLocationSummary(
6423 instruction, instruction->GetFieldType(), calling_convention);
6424 }
6425
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6426 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
6427 HUnresolvedStaticFieldGet* instruction) {
6428 FieldAccessCallingConventionX86 calling_convention;
6429 codegen_->GenerateUnresolvedFieldAccess(instruction,
6430 instruction->GetFieldType(),
6431 instruction->GetFieldIndex(),
6432 calling_convention);
6433 }
6434
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6435 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
6436 HUnresolvedStaticFieldSet* instruction) {
6437 FieldAccessCallingConventionX86 calling_convention;
6438 codegen_->CreateUnresolvedFieldLocationSummary(
6439 instruction, instruction->GetFieldType(), calling_convention);
6440 }
6441
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6442 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
6443 HUnresolvedStaticFieldSet* instruction) {
6444 FieldAccessCallingConventionX86 calling_convention;
6445 codegen_->GenerateUnresolvedFieldAccess(instruction,
6446 instruction->GetFieldType(),
6447 instruction->GetFieldIndex(),
6448 calling_convention);
6449 }
6450
VisitNullCheck(HNullCheck * instruction)6451 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
6452 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6453 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
6454 ? Location::RequiresRegister()
6455 : Location::Any();
6456 locations->SetInAt(0, loc);
6457 }
6458
GenerateImplicitNullCheck(HNullCheck * instruction)6459 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
6460 if (CanMoveNullCheckToUser(instruction)) {
6461 return;
6462 }
6463 LocationSummary* locations = instruction->GetLocations();
6464 Location obj = locations->InAt(0);
6465
6466 __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
6467 RecordPcInfo(instruction);
6468 }
6469
GenerateExplicitNullCheck(HNullCheck * instruction)6470 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
6471 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
6472 AddSlowPath(slow_path);
6473
6474 LocationSummary* locations = instruction->GetLocations();
6475 Location obj = locations->InAt(0);
6476
6477 if (obj.IsRegister()) {
6478 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
6479 } else if (obj.IsStackSlot()) {
6480 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
6481 } else {
6482 DCHECK(obj.IsConstant()) << obj;
6483 DCHECK(obj.GetConstant()->IsNullConstant());
6484 __ jmp(slow_path->GetEntryLabel());
6485 return;
6486 }
6487 __ j(kEqual, slow_path->GetEntryLabel());
6488 }
6489
VisitNullCheck(HNullCheck * instruction)6490 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
6491 codegen_->GenerateNullCheck(instruction);
6492 }
6493
VisitArrayGet(HArrayGet * instruction)6494 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
6495 bool object_array_get_with_read_barrier =
6496 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
6497 LocationSummary* locations =
6498 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6499 object_array_get_with_read_barrier
6500 ? LocationSummary::kCallOnSlowPath
6501 : LocationSummary::kNoCall);
6502 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6503 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6504 }
6505 locations->SetInAt(0, Location::RequiresRegister());
6506 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6507 if (DataType::IsFloatingPointType(instruction->GetType())) {
6508 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6509 } else {
6510 // The output overlaps in case of long: we don't want the low move
6511 // to overwrite the array's location. Likewise, in the case of an
6512 // object array get with read barriers enabled, we do not want the
6513 // move to overwrite the array's location, as we need it to emit
6514 // the read barrier.
6515 locations->SetOut(
6516 Location::RequiresRegister(),
6517 (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
6518 ? Location::kOutputOverlap
6519 : Location::kNoOutputOverlap);
6520 }
6521 }
6522
VisitArrayGet(HArrayGet * instruction)6523 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
6524 LocationSummary* locations = instruction->GetLocations();
6525 Location obj_loc = locations->InAt(0);
6526 Register obj = obj_loc.AsRegister<Register>();
6527 Location index = locations->InAt(1);
6528 Location out_loc = locations->Out();
6529 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6530
6531 DataType::Type type = instruction->GetType();
6532 if (type == DataType::Type::kReference) {
6533 static_assert(
6534 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6535 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6536 // /* HeapReference<Object> */ out =
6537 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6538 if (codegen_->EmitBakerReadBarrier()) {
6539 // Note that a potential implicit null check is handled in this
6540 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
6541 codegen_->GenerateArrayLoadWithBakerReadBarrier(
6542 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
6543 } else {
6544 Register out = out_loc.AsRegister<Register>();
6545 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6546 codegen_->MaybeRecordImplicitNullCheck(instruction);
6547 // If read barriers are enabled, emit read barriers other than
6548 // Baker's using a slow path (and also unpoison the loaded
6549 // reference, if heap poisoning is enabled).
6550 if (index.IsConstant()) {
6551 uint32_t offset =
6552 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
6553 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6554 } else {
6555 codegen_->MaybeGenerateReadBarrierSlow(
6556 instruction, out_loc, out_loc, obj_loc, data_offset, index);
6557 }
6558 }
6559 } else if (type == DataType::Type::kUint16
6560 && mirror::kUseStringCompression
6561 && instruction->IsStringCharAt()) {
6562 // Branch cases into compressed and uncompressed for each index's type.
6563 Register out = out_loc.AsRegister<Register>();
6564 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6565 NearLabel done, not_compressed;
6566 __ testb(Address(obj, count_offset), Immediate(1));
6567 codegen_->MaybeRecordImplicitNullCheck(instruction);
6568 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6569 "Expecting 0=compressed, 1=uncompressed");
6570 __ j(kNotZero, ¬_compressed);
6571 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6572 __ jmp(&done);
6573 __ Bind(¬_compressed);
6574 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6575 __ Bind(&done);
6576 } else {
6577 ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
6578 Address src = CodeGeneratorX86::ArrayAddress(obj, index, scale, data_offset);
6579 codegen_->LoadFromMemoryNoBarrier(type, out_loc, src, instruction);
6580 }
6581 }
6582
VisitArraySet(HArraySet * instruction)6583 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
6584 DataType::Type value_type = instruction->GetComponentType();
6585
6586 WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6587 bool needs_write_barrier =
6588 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6589 bool check_gc_card =
6590 codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
6591 bool needs_type_check = instruction->NeedsTypeCheck();
6592
6593 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6594 instruction,
6595 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6596
6597 bool is_byte_type = DataType::Size(value_type) == 1u;
6598 // We need the inputs to be different than the output in case of long operation.
6599 // In case of a byte operation, the register allocator does not support multiple
6600 // inputs that die at entry with one in a specific register.
6601 locations->SetInAt(0, Location::RequiresRegister());
6602 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6603 if (is_byte_type) {
6604 // Ensure the value is in a byte register.
6605 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
6606 } else if (DataType::IsFloatingPointType(value_type)) {
6607 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
6608 } else {
6609 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
6610 }
6611 if (needs_write_barrier || check_gc_card) {
6612 // Used by reference poisoning, type checking, emitting, or checking a write barrier.
6613 locations->AddTemp(Location::RequiresRegister());
6614 // Only used when emitting or checking a write barrier. Ensure the card is in a byte register.
6615 locations->AddTemp(Location::RegisterLocation(ECX));
6616 } else if ((kPoisonHeapReferences && value_type == DataType::Type::kReference) ||
6617 instruction->NeedsTypeCheck()) {
6618 locations->AddTemp(Location::RequiresRegister());
6619 }
6620 }
6621
VisitArraySet(HArraySet * instruction)6622 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
6623 LocationSummary* locations = instruction->GetLocations();
6624 Location array_loc = locations->InAt(0);
6625 Register array = array_loc.AsRegister<Register>();
6626 Location index = locations->InAt(1);
6627 Location value = locations->InAt(2);
6628 DataType::Type value_type = instruction->GetComponentType();
6629 bool needs_type_check = instruction->NeedsTypeCheck();
6630 WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
6631 bool needs_write_barrier =
6632 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
6633
6634 switch (value_type) {
6635 case DataType::Type::kBool:
6636 case DataType::Type::kUint8:
6637 case DataType::Type::kInt8: {
6638 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
6639 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
6640 if (value.IsRegister()) {
6641 __ movb(address, value.AsRegister<ByteRegister>());
6642 } else {
6643 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6644 }
6645 codegen_->MaybeRecordImplicitNullCheck(instruction);
6646 break;
6647 }
6648
6649 case DataType::Type::kUint16:
6650 case DataType::Type::kInt16: {
6651 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
6652 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
6653 if (value.IsRegister()) {
6654 __ movw(address, value.AsRegister<Register>());
6655 } else {
6656 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6657 }
6658 codegen_->MaybeRecordImplicitNullCheck(instruction);
6659 break;
6660 }
6661
6662 case DataType::Type::kReference: {
6663 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6664 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6665
6666 if (!value.IsRegister()) {
6667 // Just setting null.
6668 DCHECK(instruction->InputAt(2)->IsNullConstant());
6669 DCHECK(value.IsConstant()) << value;
6670 __ movl(address, Immediate(0));
6671 codegen_->MaybeRecordImplicitNullCheck(instruction);
6672 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
6673 // We need to set a write barrier here even though we are writing null, since this write
6674 // barrier is being relied on.
6675 DCHECK(needs_write_barrier);
6676 Register temp = locations->GetTemp(0).AsRegister<Register>();
6677 Register card = locations->GetTemp(1).AsRegister<Register>();
6678 codegen_->MarkGCCard(temp, card, array);
6679 }
6680 DCHECK(!needs_type_check);
6681 break;
6682 }
6683
6684 Register register_value = value.AsRegister<Register>();
6685 const bool can_value_be_null = instruction->GetValueCanBeNull();
6686 // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
6687 // value is null (without an extra CompareAndBranchIfZero since we already checked if the
6688 // value is null for the type check).
6689 const bool skip_marking_gc_card =
6690 can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
6691 NearLabel do_store;
6692 NearLabel skip_writing_card;
6693 if (can_value_be_null) {
6694 __ testl(register_value, register_value);
6695 if (skip_marking_gc_card) {
6696 __ j(kEqual, &skip_writing_card);
6697 } else {
6698 __ j(kEqual, &do_store);
6699 }
6700 }
6701
6702 SlowPathCode* slow_path = nullptr;
6703 if (needs_type_check) {
6704 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6705 codegen_->AddSlowPath(slow_path);
6706
6707 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6708 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6709 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6710
6711 // Note that when Baker read barriers are enabled, the type
6712 // checks are performed without read barriers. This is fine,
6713 // even in the case where a class object is in the from-space
6714 // after the flip, as a comparison involving such a type would
6715 // not produce a false positive; it may of course produce a
6716 // false negative, in which case we would take the ArraySet
6717 // slow path.
6718
6719 Register temp = locations->GetTemp(0).AsRegister<Register>();
6720 // /* HeapReference<Class> */ temp = array->klass_
6721 __ movl(temp, Address(array, class_offset));
6722 codegen_->MaybeRecordImplicitNullCheck(instruction);
6723 __ MaybeUnpoisonHeapReference(temp);
6724
6725 // /* HeapReference<Class> */ temp = temp->component_type_
6726 __ movl(temp, Address(temp, component_offset));
6727 // If heap poisoning is enabled, no need to unpoison `temp`
6728 // nor the object reference in `register_value->klass`, as
6729 // we are comparing two poisoned references.
6730 __ cmpl(temp, Address(register_value, class_offset));
6731
6732 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6733 NearLabel do_put;
6734 __ j(kEqual, &do_put);
6735 // If heap poisoning is enabled, the `temp` reference has
6736 // not been unpoisoned yet; unpoison it now.
6737 __ MaybeUnpoisonHeapReference(temp);
6738
6739 // If heap poisoning is enabled, no need to unpoison the
6740 // heap reference loaded below, as it is only used for a
6741 // comparison with null.
6742 __ cmpl(Address(temp, super_offset), Immediate(0));
6743 __ j(kNotEqual, slow_path->GetEntryLabel());
6744 __ Bind(&do_put);
6745 } else {
6746 __ j(kNotEqual, slow_path->GetEntryLabel());
6747 }
6748 }
6749
6750 if (can_value_be_null && !skip_marking_gc_card) {
6751 DCHECK(do_store.IsLinked());
6752 __ Bind(&do_store);
6753 }
6754
6755 if (needs_write_barrier) {
6756 Register temp = locations->GetTemp(0).AsRegister<Register>();
6757 Register card = locations->GetTemp(1).AsRegister<Register>();
6758 codegen_->MarkGCCard(temp, card, array);
6759 } else if (codegen_->ShouldCheckGCCard(
6760 value_type, instruction->GetValue(), write_barrier_kind)) {
6761 Register temp = locations->GetTemp(0).AsRegister<Register>();
6762 Register card = locations->GetTemp(1).AsRegister<Register>();
6763 codegen_->CheckGCCardIsValid(temp, card, array);
6764 }
6765
6766 if (skip_marking_gc_card) {
6767 // Note that we don't check that the GC card is valid as it can be correctly clean.
6768 DCHECK(skip_writing_card.IsLinked());
6769 __ Bind(&skip_writing_card);
6770 }
6771
6772 Register source = register_value;
6773 if (kPoisonHeapReferences) {
6774 Register temp = locations->GetTemp(0).AsRegister<Register>();
6775 __ movl(temp, register_value);
6776 __ PoisonHeapReference(temp);
6777 source = temp;
6778 }
6779
6780 __ movl(address, source);
6781
6782 if (can_value_be_null || !needs_type_check) {
6783 codegen_->MaybeRecordImplicitNullCheck(instruction);
6784 }
6785
6786 if (slow_path != nullptr) {
6787 __ Bind(slow_path->GetExitLabel());
6788 }
6789
6790 break;
6791 }
6792
6793 case DataType::Type::kInt32: {
6794 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6795 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6796 if (value.IsRegister()) {
6797 __ movl(address, value.AsRegister<Register>());
6798 } else {
6799 DCHECK(value.IsConstant()) << value;
6800 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6801 __ movl(address, Immediate(v));
6802 }
6803 codegen_->MaybeRecordImplicitNullCheck(instruction);
6804 break;
6805 }
6806
6807 case DataType::Type::kInt64: {
6808 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6809 if (value.IsRegisterPair()) {
6810 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6811 value.AsRegisterPairLow<Register>());
6812 codegen_->MaybeRecordImplicitNullCheck(instruction);
6813 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6814 value.AsRegisterPairHigh<Register>());
6815 } else {
6816 DCHECK(value.IsConstant());
6817 int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6818 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6819 Immediate(Low32Bits(val)));
6820 codegen_->MaybeRecordImplicitNullCheck(instruction);
6821 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6822 Immediate(High32Bits(val)));
6823 }
6824 break;
6825 }
6826
6827 case DataType::Type::kFloat32: {
6828 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6829 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6830 if (value.IsFpuRegister()) {
6831 __ movss(address, value.AsFpuRegister<XmmRegister>());
6832 } else {
6833 DCHECK(value.IsConstant());
6834 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6835 __ movl(address, Immediate(v));
6836 }
6837 codegen_->MaybeRecordImplicitNullCheck(instruction);
6838 break;
6839 }
6840
6841 case DataType::Type::kFloat64: {
6842 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6843 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6844 if (value.IsFpuRegister()) {
6845 __ movsd(address, value.AsFpuRegister<XmmRegister>());
6846 } else {
6847 DCHECK(value.IsConstant());
6848 Address address_hi =
6849 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6850 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6851 __ movl(address, Immediate(Low32Bits(v)));
6852 codegen_->MaybeRecordImplicitNullCheck(instruction);
6853 __ movl(address_hi, Immediate(High32Bits(v)));
6854 }
6855 break;
6856 }
6857
6858 case DataType::Type::kUint32:
6859 case DataType::Type::kUint64:
6860 case DataType::Type::kVoid:
6861 LOG(FATAL) << "Unreachable type " << instruction->GetType();
6862 UNREACHABLE();
6863 }
6864 }
6865
VisitArrayLength(HArrayLength * instruction)6866 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6867 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6868 locations->SetInAt(0, Location::RequiresRegister());
6869 if (!instruction->IsEmittedAtUseSite()) {
6870 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6871 }
6872 }
6873
VisitArrayLength(HArrayLength * instruction)6874 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6875 if (instruction->IsEmittedAtUseSite()) {
6876 return;
6877 }
6878
6879 LocationSummary* locations = instruction->GetLocations();
6880 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6881 Register obj = locations->InAt(0).AsRegister<Register>();
6882 Register out = locations->Out().AsRegister<Register>();
6883 __ movl(out, Address(obj, offset));
6884 codegen_->MaybeRecordImplicitNullCheck(instruction);
6885 // Mask out most significant bit in case the array is String's array of char.
6886 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6887 __ shrl(out, Immediate(1));
6888 }
6889 }
6890
VisitBoundsCheck(HBoundsCheck * instruction)6891 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6892 RegisterSet caller_saves = RegisterSet::Empty();
6893 InvokeRuntimeCallingConvention calling_convention;
6894 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6895 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6896 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6897 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6898 HInstruction* length = instruction->InputAt(1);
6899 if (!length->IsEmittedAtUseSite()) {
6900 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6901 }
6902 // Need register to see array's length.
6903 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6904 locations->AddTemp(Location::RequiresRegister());
6905 }
6906 }
6907
VisitBoundsCheck(HBoundsCheck * instruction)6908 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6909 const bool is_string_compressed_char_at =
6910 mirror::kUseStringCompression && instruction->IsStringCharAt();
6911 LocationSummary* locations = instruction->GetLocations();
6912 Location index_loc = locations->InAt(0);
6913 Location length_loc = locations->InAt(1);
6914 SlowPathCode* slow_path =
6915 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6916
6917 if (length_loc.IsConstant()) {
6918 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6919 if (index_loc.IsConstant()) {
6920 // BCE will remove the bounds check if we are guarenteed to pass.
6921 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6922 if (index < 0 || index >= length) {
6923 codegen_->AddSlowPath(slow_path);
6924 __ jmp(slow_path->GetEntryLabel());
6925 } else {
6926 // Some optimization after BCE may have generated this, and we should not
6927 // generate a bounds check if it is a valid range.
6928 }
6929 return;
6930 }
6931
6932 // We have to reverse the jump condition because the length is the constant.
6933 Register index_reg = index_loc.AsRegister<Register>();
6934 __ cmpl(index_reg, Immediate(length));
6935 codegen_->AddSlowPath(slow_path);
6936 __ j(kAboveEqual, slow_path->GetEntryLabel());
6937 } else {
6938 HInstruction* array_length = instruction->InputAt(1);
6939 if (array_length->IsEmittedAtUseSite()) {
6940 // Address the length field in the array.
6941 DCHECK(array_length->IsArrayLength());
6942 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6943 Location array_loc = array_length->GetLocations()->InAt(0);
6944 Address array_len(array_loc.AsRegister<Register>(), len_offset);
6945 if (is_string_compressed_char_at) {
6946 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6947 // the string compression flag) with the in-memory length and avoid the temporary.
6948 Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6949 __ movl(length_reg, array_len);
6950 codegen_->MaybeRecordImplicitNullCheck(array_length);
6951 __ shrl(length_reg, Immediate(1));
6952 codegen_->GenerateIntCompare(length_reg, index_loc);
6953 } else {
6954 // Checking bounds for general case:
6955 // Array of char or string's array with feature compression off.
6956 if (index_loc.IsConstant()) {
6957 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6958 __ cmpl(array_len, Immediate(value));
6959 } else {
6960 __ cmpl(array_len, index_loc.AsRegister<Register>());
6961 }
6962 codegen_->MaybeRecordImplicitNullCheck(array_length);
6963 }
6964 } else {
6965 codegen_->GenerateIntCompare(length_loc, index_loc);
6966 }
6967 codegen_->AddSlowPath(slow_path);
6968 __ j(kBelowEqual, slow_path->GetEntryLabel());
6969 }
6970 }
6971
VisitParallelMove(HParallelMove * instruction)6972 void LocationsBuilderX86::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6973 LOG(FATAL) << "Unreachable";
6974 }
6975
VisitParallelMove(HParallelMove * instruction)6976 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6977 if (instruction->GetNext()->IsSuspendCheck() &&
6978 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6979 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6980 // The back edge will generate the suspend check.
6981 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6982 }
6983
6984 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6985 }
6986
VisitSuspendCheck(HSuspendCheck * instruction)6987 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6988 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6989 instruction, LocationSummary::kCallOnSlowPath);
6990 // In suspend check slow path, usually there are no caller-save registers at all.
6991 // If SIMD instructions are present, however, we force spilling all live SIMD
6992 // registers in full width (since the runtime only saves/restores lower part).
6993 locations->SetCustomSlowPathCallerSaves(
6994 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6995 }
6996
VisitSuspendCheck(HSuspendCheck * instruction)6997 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6998 HBasicBlock* block = instruction->GetBlock();
6999 if (block->GetLoopInformation() != nullptr) {
7000 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
7001 // The back edge will generate the suspend check.
7002 return;
7003 }
7004 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
7005 // The goto will generate the suspend check.
7006 return;
7007 }
7008 GenerateSuspendCheck(instruction, nullptr);
7009 }
7010
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)7011 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
7012 HBasicBlock* successor) {
7013 SuspendCheckSlowPathX86* slow_path =
7014 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
7015 if (slow_path == nullptr) {
7016 slow_path =
7017 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
7018 instruction->SetSlowPath(slow_path);
7019 codegen_->AddSlowPath(slow_path);
7020 if (successor != nullptr) {
7021 DCHECK(successor->IsLoopHeader());
7022 }
7023 } else {
7024 DCHECK_EQ(slow_path->GetSuccessor(), successor);
7025 }
7026
7027 __ fs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
7028 Immediate(Thread::SuspendOrCheckpointRequestFlags()));
7029 if (successor == nullptr) {
7030 __ j(kNotZero, slow_path->GetEntryLabel());
7031 __ Bind(slow_path->GetReturnLabel());
7032 } else {
7033 __ j(kZero, codegen_->GetLabelOf(successor));
7034 __ jmp(slow_path->GetEntryLabel());
7035 }
7036 }
7037
GetAssembler() const7038 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
7039 return codegen_->GetAssembler();
7040 }
7041
MoveMemoryToMemory(int dst,int src,int number_of_words)7042 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
7043 ScratchRegisterScope ensure_scratch(
7044 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7045 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
7046 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7047
7048 // Now that temp register is available (possibly spilled), move blocks of memory.
7049 for (int i = 0; i < number_of_words; i++) {
7050 __ movl(temp_reg, Address(ESP, src + stack_offset));
7051 __ movl(Address(ESP, dst + stack_offset), temp_reg);
7052 stack_offset += kX86WordSize;
7053 }
7054 }
7055
EmitMove(size_t index)7056 void ParallelMoveResolverX86::EmitMove(size_t index) {
7057 MoveOperands* move = moves_[index];
7058 Location source = move->GetSource();
7059 Location destination = move->GetDestination();
7060
7061 if (source.IsRegister()) {
7062 if (destination.IsRegister()) {
7063 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7064 } else if (destination.IsFpuRegister()) {
7065 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
7066 } else {
7067 DCHECK(destination.IsStackSlot());
7068 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
7069 }
7070 } else if (source.IsRegisterPair()) {
7071 if (destination.IsRegisterPair()) {
7072 __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
7073 DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
7074 __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
7075 } else if (destination.IsFpuRegister()) {
7076 size_t elem_size = DataType::Size(DataType::Type::kInt32);
7077 // Push the 2 source registers to the stack.
7078 __ pushl(source.AsRegisterPairHigh<Register>());
7079 __ cfi().AdjustCFAOffset(elem_size);
7080 __ pushl(source.AsRegisterPairLow<Register>());
7081 __ cfi().AdjustCFAOffset(elem_size);
7082 // Load the destination register.
7083 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
7084 // And remove the temporary stack space we allocated.
7085 codegen_->DecreaseFrame(2 * elem_size);
7086 } else {
7087 DCHECK(destination.IsDoubleStackSlot());
7088 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
7089 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
7090 source.AsRegisterPairHigh<Register>());
7091 }
7092 } else if (source.IsFpuRegister()) {
7093 if (destination.IsRegister()) {
7094 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
7095 } else if (destination.IsFpuRegister()) {
7096 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7097 } else if (destination.IsRegisterPair()) {
7098 size_t elem_size = DataType::Size(DataType::Type::kInt32);
7099 // Create stack space for 2 elements.
7100 codegen_->IncreaseFrame(2 * elem_size);
7101 // Store the source register.
7102 __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
7103 // And pop the values into destination registers.
7104 __ popl(destination.AsRegisterPairLow<Register>());
7105 __ cfi().AdjustCFAOffset(-elem_size);
7106 __ popl(destination.AsRegisterPairHigh<Register>());
7107 __ cfi().AdjustCFAOffset(-elem_size);
7108 } else if (destination.IsStackSlot()) {
7109 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
7110 } else if (destination.IsDoubleStackSlot()) {
7111 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
7112 } else {
7113 DCHECK(destination.IsSIMDStackSlot());
7114 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
7115 }
7116 } else if (source.IsStackSlot()) {
7117 if (destination.IsRegister()) {
7118 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
7119 } else if (destination.IsFpuRegister()) {
7120 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
7121 } else {
7122 DCHECK(destination.IsStackSlot());
7123 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
7124 }
7125 } else if (source.IsDoubleStackSlot()) {
7126 if (destination.IsRegisterPair()) {
7127 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
7128 __ movl(destination.AsRegisterPairHigh<Register>(),
7129 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
7130 } else if (destination.IsFpuRegister()) {
7131 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
7132 } else {
7133 DCHECK(destination.IsDoubleStackSlot()) << destination;
7134 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7135 }
7136 } else if (source.IsSIMDStackSlot()) {
7137 if (destination.IsFpuRegister()) {
7138 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
7139 } else {
7140 DCHECK(destination.IsSIMDStackSlot());
7141 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7142 }
7143 } else if (source.IsConstant()) {
7144 HConstant* constant = source.GetConstant();
7145 if (constant->IsIntConstant() || constant->IsNullConstant()) {
7146 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
7147 if (destination.IsRegister()) {
7148 if (value == 0) {
7149 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
7150 } else {
7151 __ movl(destination.AsRegister<Register>(), Immediate(value));
7152 }
7153 } else {
7154 DCHECK(destination.IsStackSlot()) << destination;
7155 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
7156 }
7157 } else if (constant->IsFloatConstant()) {
7158 float fp_value = constant->AsFloatConstant()->GetValue();
7159 int32_t value = bit_cast<int32_t, float>(fp_value);
7160 Immediate imm(value);
7161 if (destination.IsFpuRegister()) {
7162 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
7163 if (value == 0) {
7164 // Easy handling of 0.0.
7165 __ xorps(dest, dest);
7166 } else {
7167 ScratchRegisterScope ensure_scratch(
7168 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7169 Register temp = static_cast<Register>(ensure_scratch.GetRegister());
7170 __ movl(temp, Immediate(value));
7171 __ movd(dest, temp);
7172 }
7173 } else {
7174 DCHECK(destination.IsStackSlot()) << destination;
7175 __ movl(Address(ESP, destination.GetStackIndex()), imm);
7176 }
7177 } else if (constant->IsLongConstant()) {
7178 int64_t value = constant->AsLongConstant()->GetValue();
7179 int32_t low_value = Low32Bits(value);
7180 int32_t high_value = High32Bits(value);
7181 Immediate low(low_value);
7182 Immediate high(high_value);
7183 if (destination.IsDoubleStackSlot()) {
7184 __ movl(Address(ESP, destination.GetStackIndex()), low);
7185 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
7186 } else {
7187 __ movl(destination.AsRegisterPairLow<Register>(), low);
7188 __ movl(destination.AsRegisterPairHigh<Register>(), high);
7189 }
7190 } else {
7191 DCHECK(constant->IsDoubleConstant());
7192 double dbl_value = constant->AsDoubleConstant()->GetValue();
7193 int64_t value = bit_cast<int64_t, double>(dbl_value);
7194 int32_t low_value = Low32Bits(value);
7195 int32_t high_value = High32Bits(value);
7196 Immediate low(low_value);
7197 Immediate high(high_value);
7198 if (destination.IsFpuRegister()) {
7199 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
7200 if (value == 0) {
7201 // Easy handling of 0.0.
7202 __ xorpd(dest, dest);
7203 } else {
7204 __ pushl(high);
7205 __ cfi().AdjustCFAOffset(4);
7206 __ pushl(low);
7207 __ cfi().AdjustCFAOffset(4);
7208 __ movsd(dest, Address(ESP, 0));
7209 codegen_->DecreaseFrame(8);
7210 }
7211 } else {
7212 DCHECK(destination.IsDoubleStackSlot()) << destination;
7213 __ movl(Address(ESP, destination.GetStackIndex()), low);
7214 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
7215 }
7216 }
7217 } else {
7218 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
7219 }
7220 }
7221
Exchange(Register reg,int mem)7222 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
7223 Register suggested_scratch = reg == EAX ? EBX : EAX;
7224 ScratchRegisterScope ensure_scratch(
7225 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
7226
7227 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7228 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
7229 __ movl(Address(ESP, mem + stack_offset), reg);
7230 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
7231 }
7232
Exchange32(XmmRegister reg,int mem)7233 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
7234 ScratchRegisterScope ensure_scratch(
7235 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7236
7237 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
7238 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7239 __ movl(temp_reg, Address(ESP, mem + stack_offset));
7240 __ movss(Address(ESP, mem + stack_offset), reg);
7241 __ movd(reg, temp_reg);
7242 }
7243
Exchange128(XmmRegister reg,int mem)7244 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
7245 size_t extra_slot = 4 * kX86WordSize;
7246 codegen_->IncreaseFrame(extra_slot);
7247 __ movups(Address(ESP, 0), XmmRegister(reg));
7248 ExchangeMemory(0, mem + extra_slot, 4);
7249 __ movups(XmmRegister(reg), Address(ESP, 0));
7250 codegen_->DecreaseFrame(extra_slot);
7251 }
7252
ExchangeMemory(int mem1,int mem2,int number_of_words)7253 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
7254 ScratchRegisterScope ensure_scratch1(
7255 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7256
7257 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
7258 ScratchRegisterScope ensure_scratch2(
7259 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
7260
7261 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
7262 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
7263
7264 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
7265 for (int i = 0; i < number_of_words; i++) {
7266 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
7267 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
7268 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
7269 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
7270 stack_offset += kX86WordSize;
7271 }
7272 }
7273
EmitSwap(size_t index)7274 void ParallelMoveResolverX86::EmitSwap(size_t index) {
7275 MoveOperands* move = moves_[index];
7276 Location source = move->GetSource();
7277 Location destination = move->GetDestination();
7278
7279 if (source.IsRegister() && destination.IsRegister()) {
7280 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
7281 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
7282 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7283 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
7284 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7285 } else if (source.IsRegister() && destination.IsStackSlot()) {
7286 Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
7287 } else if (source.IsStackSlot() && destination.IsRegister()) {
7288 Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
7289 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7290 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
7291 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7292 // Use XOR Swap algorithm to avoid a temporary.
7293 DCHECK_NE(source.reg(), destination.reg());
7294 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7295 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
7296 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7297 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
7298 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7299 } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
7300 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7301 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
7302 // Take advantage of the 16 bytes in the XMM register.
7303 XmmRegister reg = source.AsFpuRegister<XmmRegister>();
7304 Address stack(ESP, destination.GetStackIndex());
7305 // Load the double into the high doubleword.
7306 __ movhpd(reg, stack);
7307
7308 // Store the low double into the destination.
7309 __ movsd(stack, reg);
7310
7311 // Move the high double to the low double.
7312 __ psrldq(reg, Immediate(8));
7313 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
7314 // Take advantage of the 16 bytes in the XMM register.
7315 XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
7316 Address stack(ESP, source.GetStackIndex());
7317 // Load the double into the high doubleword.
7318 __ movhpd(reg, stack);
7319
7320 // Store the low double into the destination.
7321 __ movsd(stack, reg);
7322
7323 // Move the high double to the low double.
7324 __ psrldq(reg, Immediate(8));
7325 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
7326 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7327 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
7328 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7329 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
7330 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7331 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
7332 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7333 } else {
7334 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
7335 }
7336 }
7337
SpillScratch(int reg)7338 void ParallelMoveResolverX86::SpillScratch(int reg) {
7339 __ pushl(static_cast<Register>(reg));
7340 }
7341
RestoreScratch(int reg)7342 void ParallelMoveResolverX86::RestoreScratch(int reg) {
7343 __ popl(static_cast<Register>(reg));
7344 }
7345
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7346 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
7347 HLoadClass::LoadKind desired_class_load_kind) {
7348 switch (desired_class_load_kind) {
7349 case HLoadClass::LoadKind::kInvalid:
7350 LOG(FATAL) << "UNREACHABLE";
7351 UNREACHABLE();
7352 case HLoadClass::LoadKind::kReferrersClass:
7353 break;
7354 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7355 case HLoadClass::LoadKind::kBootImageRelRo:
7356 case HLoadClass::LoadKind::kAppImageRelRo:
7357 case HLoadClass::LoadKind::kBssEntry:
7358 case HLoadClass::LoadKind::kBssEntryPublic:
7359 case HLoadClass::LoadKind::kBssEntryPackage:
7360 DCHECK(!GetCompilerOptions().IsJitCompiler());
7361 break;
7362 case HLoadClass::LoadKind::kJitBootImageAddress:
7363 case HLoadClass::LoadKind::kJitTableAddress:
7364 DCHECK(GetCompilerOptions().IsJitCompiler());
7365 break;
7366 case HLoadClass::LoadKind::kRuntimeCall:
7367 break;
7368 }
7369 return desired_class_load_kind;
7370 }
7371
VisitLoadClass(HLoadClass * cls)7372 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
7373 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7374 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7375 InvokeRuntimeCallingConvention calling_convention;
7376 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7377 cls,
7378 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
7379 Location::RegisterLocation(EAX));
7380 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
7381 return;
7382 }
7383 DCHECK_EQ(cls->NeedsAccessCheck(),
7384 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7385 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7386
7387 const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
7388 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7389 ? LocationSummary::kCallOnSlowPath
7390 : LocationSummary::kNoCall;
7391 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7392 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7393 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7394 }
7395
7396 if (load_kind == HLoadClass::LoadKind::kReferrersClass || cls->HasPcRelativeLoadKind()) {
7397 locations->SetInAt(0, Location::RequiresRegister());
7398 }
7399 locations->SetOut(Location::RequiresRegister());
7400 if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) {
7401 if (codegen_->EmitNonBakerReadBarrier()) {
7402 // For non-Baker read barrier we have a temp-clobbering call.
7403 } else {
7404 // Rely on the type resolution and/or initialization to save everything.
7405 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7406 }
7407 }
7408 }
7409
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)7410 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
7411 dex::TypeIndex type_index,
7412 Handle<mirror::Class> handle) {
7413 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
7414 // Add a patch entry and return the label.
7415 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
7416 PatchInfo<Label>* info = &jit_class_patches_.back();
7417 return &info->label;
7418 }
7419
7420 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7421 // move.
VisitLoadClass(HLoadClass * cls)7422 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7423 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7424 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7425 codegen_->GenerateLoadClassRuntimeCall(cls);
7426 return;
7427 }
7428 DCHECK_EQ(cls->NeedsAccessCheck(),
7429 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7430 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7431
7432 LocationSummary* locations = cls->GetLocations();
7433 Location out_loc = locations->Out();
7434 Register out = out_loc.AsRegister<Register>();
7435
7436 bool generate_null_check = false;
7437 const ReadBarrierOption read_barrier_option =
7438 cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
7439 switch (load_kind) {
7440 case HLoadClass::LoadKind::kReferrersClass: {
7441 DCHECK(!cls->CanCallRuntime());
7442 DCHECK(!cls->MustGenerateClinitCheck());
7443 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7444 Register current_method = locations->InAt(0).AsRegister<Register>();
7445 GenerateGcRootFieldLoad(
7446 cls,
7447 out_loc,
7448 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
7449 /* fixup_label= */ nullptr,
7450 read_barrier_option);
7451 break;
7452 }
7453 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7454 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7455 codegen_->GetCompilerOptions().IsBootImageExtension());
7456 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7457 Register method_address = locations->InAt(0).AsRegister<Register>();
7458 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7459 codegen_->RecordBootImageTypePatch(cls);
7460 break;
7461 }
7462 case HLoadClass::LoadKind::kBootImageRelRo: {
7463 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7464 Register method_address = locations->InAt(0).AsRegister<Register>();
7465 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7466 codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7467 CodeGenerator::GetBootImageOffset(cls));
7468 break;
7469 }
7470 case HLoadClass::LoadKind::kAppImageRelRo: {
7471 DCHECK(codegen_->GetCompilerOptions().IsAppImage());
7472 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7473 Register method_address = locations->InAt(0).AsRegister<Register>();
7474 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7475 codegen_->RecordAppImageTypePatch(cls);
7476 break;
7477 }
7478 case HLoadClass::LoadKind::kBssEntry:
7479 case HLoadClass::LoadKind::kBssEntryPublic:
7480 case HLoadClass::LoadKind::kBssEntryPackage: {
7481 Register method_address = locations->InAt(0).AsRegister<Register>();
7482 Address address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7483 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
7484 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7485 // No need for memory fence, thanks to the x86 memory model.
7486 generate_null_check = true;
7487 break;
7488 }
7489 case HLoadClass::LoadKind::kJitBootImageAddress: {
7490 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7491 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7492 DCHECK_NE(address, 0u);
7493 __ movl(out, Immediate(address));
7494 break;
7495 }
7496 case HLoadClass::LoadKind::kJitTableAddress: {
7497 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7498 Label* fixup_label = codegen_->NewJitRootClassPatch(
7499 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
7500 // /* GcRoot<mirror::Class> */ out = *address
7501 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7502 break;
7503 }
7504 case HLoadClass::LoadKind::kRuntimeCall:
7505 case HLoadClass::LoadKind::kInvalid:
7506 LOG(FATAL) << "UNREACHABLE";
7507 UNREACHABLE();
7508 }
7509
7510 if (generate_null_check || cls->MustGenerateClinitCheck()) {
7511 DCHECK(cls->CanCallRuntime());
7512 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
7513 codegen_->AddSlowPath(slow_path);
7514
7515 if (generate_null_check) {
7516 __ testl(out, out);
7517 __ j(kEqual, slow_path->GetEntryLabel());
7518 }
7519
7520 if (cls->MustGenerateClinitCheck()) {
7521 GenerateClassInitializationCheck(slow_path, out);
7522 } else {
7523 __ Bind(slow_path->GetExitLabel());
7524 }
7525 }
7526 }
7527
VisitLoadMethodHandle(HLoadMethodHandle * load)7528 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7529 InvokeRuntimeCallingConvention calling_convention;
7530 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7531 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7532 }
7533
VisitLoadMethodHandle(HLoadMethodHandle * load)7534 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7535 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7536 }
7537
VisitLoadMethodType(HLoadMethodType * load)7538 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
7539 InvokeRuntimeCallingConvention calling_convention;
7540 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7541 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7542 }
7543
VisitLoadMethodType(HLoadMethodType * load)7544 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
7545 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7546 }
7547
VisitClinitCheck(HClinitCheck * check)7548 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
7549 LocationSummary* locations =
7550 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7551 locations->SetInAt(0, Location::RequiresRegister());
7552 if (check->HasUses()) {
7553 locations->SetOut(Location::SameAsFirstInput());
7554 }
7555 // Rely on the type initialization to save everything we need.
7556 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7557 }
7558
VisitClinitCheck(HClinitCheck * check)7559 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
7560 // We assume the class to not be null.
7561 SlowPathCode* slow_path =
7562 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
7563 codegen_->AddSlowPath(slow_path);
7564 GenerateClassInitializationCheck(slow_path,
7565 check->GetLocations()->InAt(0).AsRegister<Register>());
7566 }
7567
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)7568 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
7569 SlowPathCode* slow_path, Register class_reg) {
7570 __ cmpb(Address(class_reg, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
7571 __ j(kBelow, slow_path->GetEntryLabel());
7572 __ Bind(slow_path->GetExitLabel());
7573 }
7574
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)7575 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
7576 Register temp) {
7577 uint32_t path_to_root = check->GetBitstringPathToRoot();
7578 uint32_t mask = check->GetBitstringMask();
7579 DCHECK(IsPowerOfTwo(mask + 1));
7580 size_t mask_bits = WhichPowerOf2(mask + 1);
7581
7582 if (mask_bits == 16u) {
7583 // Compare the bitstring in memory.
7584 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
7585 } else {
7586 // /* uint32_t */ temp = temp->status_
7587 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
7588 // Compare the bitstring bits using SUB.
7589 __ subl(temp, Immediate(path_to_root));
7590 // Shift out bits that do not contribute to the comparison.
7591 __ shll(temp, Immediate(32u - mask_bits));
7592 }
7593 }
7594
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7595 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
7596 HLoadString::LoadKind desired_string_load_kind) {
7597 switch (desired_string_load_kind) {
7598 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7599 case HLoadString::LoadKind::kBootImageRelRo:
7600 case HLoadString::LoadKind::kBssEntry:
7601 DCHECK(!GetCompilerOptions().IsJitCompiler());
7602 break;
7603 case HLoadString::LoadKind::kJitBootImageAddress:
7604 case HLoadString::LoadKind::kJitTableAddress:
7605 DCHECK(GetCompilerOptions().IsJitCompiler());
7606 break;
7607 case HLoadString::LoadKind::kRuntimeCall:
7608 break;
7609 }
7610 return desired_string_load_kind;
7611 }
7612
VisitLoadString(HLoadString * load)7613 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
7614 LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
7615 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7616 HLoadString::LoadKind load_kind = load->GetLoadKind();
7617 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
7618 load_kind == HLoadString::LoadKind::kBootImageRelRo ||
7619 load_kind == HLoadString::LoadKind::kBssEntry) {
7620 locations->SetInAt(0, Location::RequiresRegister());
7621 }
7622 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7623 locations->SetOut(Location::RegisterLocation(EAX));
7624 } else {
7625 locations->SetOut(Location::RequiresRegister());
7626 if (load_kind == HLoadString::LoadKind::kBssEntry) {
7627 if (codegen_->EmitNonBakerReadBarrier()) {
7628 // For non-Baker read barrier we have a temp-clobbering call.
7629 } else {
7630 // Rely on the pResolveString to save everything.
7631 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7632 }
7633 }
7634 }
7635 }
7636
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7637 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
7638 dex::StringIndex string_index,
7639 Handle<mirror::String> handle) {
7640 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7641 // Add a patch entry and return the label.
7642 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7643 PatchInfo<Label>* info = &jit_string_patches_.back();
7644 return &info->label;
7645 }
7646
7647 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7648 // move.
VisitLoadString(HLoadString * load)7649 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7650 LocationSummary* locations = load->GetLocations();
7651 Location out_loc = locations->Out();
7652 Register out = out_loc.AsRegister<Register>();
7653
7654 switch (load->GetLoadKind()) {
7655 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7656 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7657 codegen_->GetCompilerOptions().IsBootImageExtension());
7658 Register method_address = locations->InAt(0).AsRegister<Register>();
7659 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7660 codegen_->RecordBootImageStringPatch(load);
7661 return;
7662 }
7663 case HLoadString::LoadKind::kBootImageRelRo: {
7664 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7665 Register method_address = locations->InAt(0).AsRegister<Register>();
7666 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7667 codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7668 CodeGenerator::GetBootImageOffset(load));
7669 return;
7670 }
7671 case HLoadString::LoadKind::kBssEntry: {
7672 Register method_address = locations->InAt(0).AsRegister<Register>();
7673 Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7674 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7675 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
7676 GenerateGcRootFieldLoad(
7677 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7678 // No need for memory fence, thanks to the x86 memory model.
7679 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
7680 codegen_->AddSlowPath(slow_path);
7681 __ testl(out, out);
7682 __ j(kEqual, slow_path->GetEntryLabel());
7683 __ Bind(slow_path->GetExitLabel());
7684 return;
7685 }
7686 case HLoadString::LoadKind::kJitBootImageAddress: {
7687 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7688 DCHECK_NE(address, 0u);
7689 __ movl(out, Immediate(address));
7690 return;
7691 }
7692 case HLoadString::LoadKind::kJitTableAddress: {
7693 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7694 Label* fixup_label = codegen_->NewJitRootStringPatch(
7695 load->GetDexFile(), load->GetStringIndex(), load->GetString());
7696 // /* GcRoot<mirror::String> */ out = *address
7697 GenerateGcRootFieldLoad(
7698 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7699 return;
7700 }
7701 default:
7702 break;
7703 }
7704
7705 InvokeRuntimeCallingConvention calling_convention;
7706 DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
7707 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
7708 codegen_->InvokeRuntime(kQuickResolveString, load);
7709 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7710 }
7711
GetExceptionTlsAddress()7712 static Address GetExceptionTlsAddress() {
7713 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
7714 }
7715
VisitLoadException(HLoadException * load)7716 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
7717 LocationSummary* locations =
7718 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7719 locations->SetOut(Location::RequiresRegister());
7720 }
7721
VisitLoadException(HLoadException * load)7722 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
7723 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
7724 }
7725
VisitClearException(HClearException * clear)7726 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
7727 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7728 }
7729
VisitClearException(HClearException * clear)7730 void InstructionCodeGeneratorX86::VisitClearException([[maybe_unused]] HClearException* clear) {
7731 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7732 }
7733
VisitThrow(HThrow * instruction)7734 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7735 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7736 instruction, LocationSummary::kCallOnMainOnly);
7737 InvokeRuntimeCallingConvention calling_convention;
7738 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7739 }
7740
VisitThrow(HThrow * instruction)7741 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7742 codegen_->InvokeRuntime(kQuickDeliverException, instruction);
7743 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7744 }
7745
7746 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7747 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7748 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7749 return 1;
7750 }
7751 if (emit_read_barrier &&
7752 !kUseBakerReadBarrier &&
7753 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7754 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7755 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7756 return 1;
7757 }
7758 return 0;
7759 }
7760
7761 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7762 // interface pointer, the current interface is compared in memory.
7763 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7764 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7765 return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
7766 }
7767
VisitInstanceOf(HInstanceOf * instruction)7768 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7769 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7770 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7771 bool baker_read_barrier_slow_path = false;
7772 switch (type_check_kind) {
7773 case TypeCheckKind::kExactCheck:
7774 case TypeCheckKind::kAbstractClassCheck:
7775 case TypeCheckKind::kClassHierarchyCheck:
7776 case TypeCheckKind::kArrayObjectCheck:
7777 case TypeCheckKind::kInterfaceCheck: {
7778 bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
7779 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7780 baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
7781 (type_check_kind != TypeCheckKind::kInterfaceCheck);
7782 break;
7783 }
7784 case TypeCheckKind::kArrayCheck:
7785 case TypeCheckKind::kUnresolvedCheck:
7786 call_kind = LocationSummary::kCallOnSlowPath;
7787 break;
7788 case TypeCheckKind::kBitstringCheck:
7789 break;
7790 }
7791
7792 LocationSummary* locations =
7793 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7794 if (baker_read_barrier_slow_path) {
7795 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7796 }
7797 locations->SetInAt(0, Location::RequiresRegister());
7798 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7799 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7800 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7801 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7802 } else if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7803 locations->SetInAt(1, Location::RequiresRegister());
7804 } else {
7805 locations->SetInAt(1, Location::Any());
7806 }
7807 // Note that TypeCheckSlowPathX86 uses this "out" register too.
7808 locations->SetOut(Location::RequiresRegister());
7809 // When read barriers are enabled, we need a temporary register for some cases.
7810 locations->AddRegisterTemps(
7811 NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
7812 }
7813
VisitInstanceOf(HInstanceOf * instruction)7814 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7815 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7816 LocationSummary* locations = instruction->GetLocations();
7817 Location obj_loc = locations->InAt(0);
7818 Register obj = obj_loc.AsRegister<Register>();
7819 Location cls = locations->InAt(1);
7820 Location out_loc = locations->Out();
7821 Register out = out_loc.AsRegister<Register>();
7822 const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
7823 DCHECK_LE(num_temps, 1u);
7824 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7825 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7826 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7827 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7828 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7829 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7830 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7831 const uint32_t object_array_data_offset =
7832 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7833 SlowPathCode* slow_path = nullptr;
7834 NearLabel done, zero;
7835
7836 // Return 0 if `obj` is null.
7837 // Avoid null check if we know obj is not null.
7838 if (instruction->MustDoNullCheck()) {
7839 __ testl(obj, obj);
7840 __ j(kEqual, &zero);
7841 }
7842
7843 switch (type_check_kind) {
7844 case TypeCheckKind::kExactCheck: {
7845 ReadBarrierOption read_barrier_option =
7846 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7847 // /* HeapReference<Class> */ out = obj->klass_
7848 GenerateReferenceLoadTwoRegisters(instruction,
7849 out_loc,
7850 obj_loc,
7851 class_offset,
7852 read_barrier_option);
7853 if (cls.IsRegister()) {
7854 __ cmpl(out, cls.AsRegister<Register>());
7855 } else {
7856 DCHECK(cls.IsStackSlot()) << cls;
7857 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7858 }
7859
7860 // Classes must be equal for the instanceof to succeed.
7861 __ j(kNotEqual, &zero);
7862 __ movl(out, Immediate(1));
7863 __ jmp(&done);
7864 break;
7865 }
7866
7867 case TypeCheckKind::kAbstractClassCheck: {
7868 ReadBarrierOption read_barrier_option =
7869 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7870 // /* HeapReference<Class> */ out = obj->klass_
7871 GenerateReferenceLoadTwoRegisters(instruction,
7872 out_loc,
7873 obj_loc,
7874 class_offset,
7875 read_barrier_option);
7876 // If the class is abstract, we eagerly fetch the super class of the
7877 // object to avoid doing a comparison we know will fail.
7878 NearLabel loop;
7879 __ Bind(&loop);
7880 // /* HeapReference<Class> */ out = out->super_class_
7881 GenerateReferenceLoadOneRegister(instruction,
7882 out_loc,
7883 super_offset,
7884 maybe_temp_loc,
7885 read_barrier_option);
7886 __ testl(out, out);
7887 // If `out` is null, we use it for the result, and jump to `done`.
7888 __ j(kEqual, &done);
7889 if (cls.IsRegister()) {
7890 __ cmpl(out, cls.AsRegister<Register>());
7891 } else {
7892 DCHECK(cls.IsStackSlot()) << cls;
7893 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7894 }
7895 __ j(kNotEqual, &loop);
7896 __ movl(out, Immediate(1));
7897 if (zero.IsLinked()) {
7898 __ jmp(&done);
7899 }
7900 break;
7901 }
7902
7903 case TypeCheckKind::kClassHierarchyCheck: {
7904 ReadBarrierOption read_barrier_option =
7905 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7906 // /* HeapReference<Class> */ out = obj->klass_
7907 GenerateReferenceLoadTwoRegisters(instruction,
7908 out_loc,
7909 obj_loc,
7910 class_offset,
7911 read_barrier_option);
7912 // Walk over the class hierarchy to find a match.
7913 NearLabel loop, success;
7914 __ Bind(&loop);
7915 if (cls.IsRegister()) {
7916 __ cmpl(out, cls.AsRegister<Register>());
7917 } else {
7918 DCHECK(cls.IsStackSlot()) << cls;
7919 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7920 }
7921 __ j(kEqual, &success);
7922 // /* HeapReference<Class> */ out = out->super_class_
7923 GenerateReferenceLoadOneRegister(instruction,
7924 out_loc,
7925 super_offset,
7926 maybe_temp_loc,
7927 read_barrier_option);
7928 __ testl(out, out);
7929 __ j(kNotEqual, &loop);
7930 // If `out` is null, we use it for the result, and jump to `done`.
7931 __ jmp(&done);
7932 __ Bind(&success);
7933 __ movl(out, Immediate(1));
7934 if (zero.IsLinked()) {
7935 __ jmp(&done);
7936 }
7937 break;
7938 }
7939
7940 case TypeCheckKind::kArrayObjectCheck: {
7941 ReadBarrierOption read_barrier_option =
7942 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7943 // /* HeapReference<Class> */ out = obj->klass_
7944 GenerateReferenceLoadTwoRegisters(instruction,
7945 out_loc,
7946 obj_loc,
7947 class_offset,
7948 read_barrier_option);
7949 // Do an exact check.
7950 NearLabel exact_check;
7951 if (cls.IsRegister()) {
7952 __ cmpl(out, cls.AsRegister<Register>());
7953 } else {
7954 DCHECK(cls.IsStackSlot()) << cls;
7955 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7956 }
7957 __ j(kEqual, &exact_check);
7958 // Otherwise, we need to check that the object's class is a non-primitive array.
7959 // /* HeapReference<Class> */ out = out->component_type_
7960 GenerateReferenceLoadOneRegister(instruction,
7961 out_loc,
7962 component_offset,
7963 maybe_temp_loc,
7964 read_barrier_option);
7965 __ testl(out, out);
7966 // If `out` is null, we use it for the result, and jump to `done`.
7967 __ j(kEqual, &done);
7968 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7969 __ j(kNotEqual, &zero);
7970 __ Bind(&exact_check);
7971 __ movl(out, Immediate(1));
7972 __ jmp(&done);
7973 break;
7974 }
7975
7976 case TypeCheckKind::kArrayCheck: {
7977 // No read barrier since the slow path will retry upon failure.
7978 // /* HeapReference<Class> */ out = obj->klass_
7979 GenerateReferenceLoadTwoRegisters(instruction,
7980 out_loc,
7981 obj_loc,
7982 class_offset,
7983 kWithoutReadBarrier);
7984 if (cls.IsRegister()) {
7985 __ cmpl(out, cls.AsRegister<Register>());
7986 } else {
7987 DCHECK(cls.IsStackSlot()) << cls;
7988 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7989 }
7990 DCHECK(locations->OnlyCallsOnSlowPath());
7991 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7992 instruction, /* is_fatal= */ false);
7993 codegen_->AddSlowPath(slow_path);
7994 __ j(kNotEqual, slow_path->GetEntryLabel());
7995 __ movl(out, Immediate(1));
7996 if (zero.IsLinked()) {
7997 __ jmp(&done);
7998 }
7999 break;
8000 }
8001
8002 case TypeCheckKind::kInterfaceCheck: {
8003 if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
8004 DCHECK(locations->OnlyCallsOnSlowPath());
8005 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8006 instruction, /* is_fatal= */ false);
8007 codegen_->AddSlowPath(slow_path);
8008 if (codegen_->EmitNonBakerReadBarrier()) {
8009 __ jmp(slow_path->GetEntryLabel());
8010 break;
8011 }
8012 // For Baker read barrier, take the slow path while marking.
8013 __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>()),
8014 Immediate(0));
8015 __ j(kNotEqual, slow_path->GetEntryLabel());
8016 }
8017
8018 // Fast-path without read barriers.
8019 Register temp = maybe_temp_loc.AsRegister<Register>();
8020 // /* HeapReference<Class> */ temp = obj->klass_
8021 __ movl(temp, Address(obj, class_offset));
8022 __ MaybeUnpoisonHeapReference(temp);
8023 // /* HeapReference<Class> */ temp = temp->iftable_
8024 __ movl(temp, Address(temp, iftable_offset));
8025 __ MaybeUnpoisonHeapReference(temp);
8026 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8027 __ movl(out, Address(temp, array_length_offset));
8028 // Maybe poison the `cls` for direct comparison with memory.
8029 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
8030 // Loop through the iftable and check if any class matches.
8031 NearLabel loop, end;
8032 __ Bind(&loop);
8033 // Check if we still have an entry to compare.
8034 __ subl(out, Immediate(2));
8035 __ j(kNegative, (zero.IsLinked() && !kPoisonHeapReferences) ? &zero : &end);
8036 // Go to next interface if the classes do not match.
8037 __ cmpl(cls.AsRegister<Register>(),
8038 CodeGeneratorX86::ArrayAddress(temp, out_loc, TIMES_4, object_array_data_offset));
8039 __ j(kNotEqual, &loop);
8040 if (zero.IsLinked()) {
8041 __ movl(out, Immediate(1));
8042 // If `cls` was poisoned above, unpoison it.
8043 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8044 __ jmp(&done);
8045 if (kPoisonHeapReferences) {
8046 // The false case needs to unpoison the class before jumping to `zero`.
8047 __ Bind(&end);
8048 __ UnpoisonHeapReference(cls.AsRegister<Register>());
8049 __ jmp(&zero);
8050 }
8051 } else {
8052 // To reduce branching, use the fact that the false case branches with a `-2` in `out`.
8053 __ movl(out, Immediate(-1));
8054 __ Bind(&end);
8055 __ addl(out, Immediate(2));
8056 // If `cls` was poisoned above, unpoison it.
8057 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8058 }
8059 break;
8060 }
8061
8062 case TypeCheckKind::kUnresolvedCheck: {
8063 // Note that we indeed only call on slow path, but we always go
8064 // into the slow path for the unresolved check case.
8065 //
8066 // We cannot directly call the InstanceofNonTrivial runtime
8067 // entry point without resorting to a type checking slow path
8068 // here (i.e. by calling InvokeRuntime directly), as it would
8069 // require to assign fixed registers for the inputs of this
8070 // HInstanceOf instruction (following the runtime calling
8071 // convention), which might be cluttered by the potential first
8072 // read barrier emission at the beginning of this method.
8073 //
8074 // TODO: Introduce a new runtime entry point taking the object
8075 // to test (instead of its class) as argument, and let it deal
8076 // with the read barrier issues. This will let us refactor this
8077 // case of the `switch` code as it was previously (with a direct
8078 // call to the runtime not using a type checking slow path).
8079 // This should also be beneficial for the other cases above.
8080 DCHECK(locations->OnlyCallsOnSlowPath());
8081 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8082 instruction, /* is_fatal= */ false);
8083 codegen_->AddSlowPath(slow_path);
8084 __ jmp(slow_path->GetEntryLabel());
8085 break;
8086 }
8087
8088 case TypeCheckKind::kBitstringCheck: {
8089 // /* HeapReference<Class> */ temp = obj->klass_
8090 GenerateReferenceLoadTwoRegisters(instruction,
8091 out_loc,
8092 obj_loc,
8093 class_offset,
8094 kWithoutReadBarrier);
8095
8096 GenerateBitstringTypeCheckCompare(instruction, out);
8097 __ j(kNotEqual, &zero);
8098 __ movl(out, Immediate(1));
8099 __ jmp(&done);
8100 break;
8101 }
8102 }
8103
8104 if (zero.IsLinked()) {
8105 __ Bind(&zero);
8106 __ xorl(out, out);
8107 }
8108
8109 if (done.IsLinked()) {
8110 __ Bind(&done);
8111 }
8112
8113 if (slow_path != nullptr) {
8114 __ Bind(slow_path->GetExitLabel());
8115 }
8116 }
8117
VisitCheckCast(HCheckCast * instruction)8118 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
8119 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8120 LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
8121 LocationSummary* locations =
8122 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8123 locations->SetInAt(0, Location::RequiresRegister());
8124 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
8125 // Require a register for the interface check since there is a loop that compares the class to
8126 // a memory address.
8127 locations->SetInAt(1, Location::RequiresRegister());
8128 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8129 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
8130 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
8131 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
8132 } else {
8133 locations->SetInAt(1, Location::Any());
8134 }
8135 locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
8136 }
8137
VisitCheckCast(HCheckCast * instruction)8138 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
8139 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8140 LocationSummary* locations = instruction->GetLocations();
8141 Location obj_loc = locations->InAt(0);
8142 Register obj = obj_loc.AsRegister<Register>();
8143 Location cls = locations->InAt(1);
8144 Location temp_loc = locations->GetTemp(0);
8145 Register temp = temp_loc.AsRegister<Register>();
8146 const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
8147 DCHECK_GE(num_temps, 1u);
8148 DCHECK_LE(num_temps, 2u);
8149 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
8150 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8151 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8152 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8153 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8154 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8155 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8156 const uint32_t object_array_data_offset =
8157 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8158
8159 bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
8160 SlowPathCode* type_check_slow_path =
8161 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
8162 instruction, is_type_check_slow_path_fatal);
8163 codegen_->AddSlowPath(type_check_slow_path);
8164
8165 NearLabel done;
8166 // Avoid null check if we know obj is not null.
8167 if (instruction->MustDoNullCheck()) {
8168 __ testl(obj, obj);
8169 __ j(kEqual, &done);
8170 }
8171
8172 switch (type_check_kind) {
8173 case TypeCheckKind::kExactCheck:
8174 case TypeCheckKind::kArrayCheck: {
8175 // /* HeapReference<Class> */ temp = obj->klass_
8176 GenerateReferenceLoadTwoRegisters(instruction,
8177 temp_loc,
8178 obj_loc,
8179 class_offset,
8180 kWithoutReadBarrier);
8181
8182 if (cls.IsRegister()) {
8183 __ cmpl(temp, cls.AsRegister<Register>());
8184 } else {
8185 DCHECK(cls.IsStackSlot()) << cls;
8186 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8187 }
8188 // Jump to slow path for throwing the exception or doing a
8189 // more involved array check.
8190 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8191 break;
8192 }
8193
8194 case TypeCheckKind::kAbstractClassCheck: {
8195 // /* HeapReference<Class> */ temp = obj->klass_
8196 GenerateReferenceLoadTwoRegisters(instruction,
8197 temp_loc,
8198 obj_loc,
8199 class_offset,
8200 kWithoutReadBarrier);
8201
8202 // If the class is abstract, we eagerly fetch the super class of the
8203 // object to avoid doing a comparison we know will fail.
8204 NearLabel loop;
8205 __ Bind(&loop);
8206 // /* HeapReference<Class> */ temp = temp->super_class_
8207 GenerateReferenceLoadOneRegister(instruction,
8208 temp_loc,
8209 super_offset,
8210 maybe_temp2_loc,
8211 kWithoutReadBarrier);
8212
8213 // If the class reference currently in `temp` is null, jump to the slow path to throw the
8214 // exception.
8215 __ testl(temp, temp);
8216 __ j(kZero, type_check_slow_path->GetEntryLabel());
8217
8218 // Otherwise, compare the classes
8219 if (cls.IsRegister()) {
8220 __ cmpl(temp, cls.AsRegister<Register>());
8221 } else {
8222 DCHECK(cls.IsStackSlot()) << cls;
8223 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8224 }
8225 __ j(kNotEqual, &loop);
8226 break;
8227 }
8228
8229 case TypeCheckKind::kClassHierarchyCheck: {
8230 // /* HeapReference<Class> */ temp = obj->klass_
8231 GenerateReferenceLoadTwoRegisters(instruction,
8232 temp_loc,
8233 obj_loc,
8234 class_offset,
8235 kWithoutReadBarrier);
8236
8237 // Walk over the class hierarchy to find a match.
8238 NearLabel loop;
8239 __ Bind(&loop);
8240 if (cls.IsRegister()) {
8241 __ cmpl(temp, cls.AsRegister<Register>());
8242 } else {
8243 DCHECK(cls.IsStackSlot()) << cls;
8244 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8245 }
8246 __ j(kEqual, &done);
8247
8248 // /* HeapReference<Class> */ temp = temp->super_class_
8249 GenerateReferenceLoadOneRegister(instruction,
8250 temp_loc,
8251 super_offset,
8252 maybe_temp2_loc,
8253 kWithoutReadBarrier);
8254
8255 // If the class reference currently in `temp` is not null, jump
8256 // back at the beginning of the loop.
8257 __ testl(temp, temp);
8258 __ j(kNotZero, &loop);
8259 // Otherwise, jump to the slow path to throw the exception.;
8260 __ jmp(type_check_slow_path->GetEntryLabel());
8261 break;
8262 }
8263
8264 case TypeCheckKind::kArrayObjectCheck: {
8265 // /* HeapReference<Class> */ temp = obj->klass_
8266 GenerateReferenceLoadTwoRegisters(instruction,
8267 temp_loc,
8268 obj_loc,
8269 class_offset,
8270 kWithoutReadBarrier);
8271
8272 // Do an exact check.
8273 if (cls.IsRegister()) {
8274 __ cmpl(temp, cls.AsRegister<Register>());
8275 } else {
8276 DCHECK(cls.IsStackSlot()) << cls;
8277 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
8278 }
8279 __ j(kEqual, &done);
8280
8281 // Otherwise, we need to check that the object's class is a non-primitive array.
8282 // /* HeapReference<Class> */ temp = temp->component_type_
8283 GenerateReferenceLoadOneRegister(instruction,
8284 temp_loc,
8285 component_offset,
8286 maybe_temp2_loc,
8287 kWithoutReadBarrier);
8288
8289 // If the component type is null (i.e. the object not an array), jump to the slow path to
8290 // throw the exception. Otherwise proceed with the check.
8291 __ testl(temp, temp);
8292 __ j(kZero, type_check_slow_path->GetEntryLabel());
8293
8294 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
8295 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8296 break;
8297 }
8298
8299 case TypeCheckKind::kUnresolvedCheck:
8300 // We always go into the type check slow path for the unresolved check case.
8301 // We cannot directly call the CheckCast runtime entry point
8302 // without resorting to a type checking slow path here (i.e. by
8303 // calling InvokeRuntime directly), as it would require to
8304 // assign fixed registers for the inputs of this HInstanceOf
8305 // instruction (following the runtime calling convention), which
8306 // might be cluttered by the potential first read barrier
8307 // emission at the beginning of this method.
8308 __ jmp(type_check_slow_path->GetEntryLabel());
8309 break;
8310
8311 case TypeCheckKind::kInterfaceCheck: {
8312 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
8313 // We can not get false positives by doing this.
8314 // /* HeapReference<Class> */ temp = obj->klass_
8315 GenerateReferenceLoadTwoRegisters(instruction,
8316 temp_loc,
8317 obj_loc,
8318 class_offset,
8319 kWithoutReadBarrier);
8320
8321 // /* HeapReference<Class> */ temp = temp->iftable_
8322 GenerateReferenceLoadOneRegister(instruction,
8323 temp_loc,
8324 iftable_offset,
8325 maybe_temp2_loc,
8326 kWithoutReadBarrier);
8327 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
8328 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
8329 // Maybe poison the `cls` for direct comparison with memory.
8330 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
8331 // Loop through the iftable and check if any class matches.
8332 NearLabel start_loop;
8333 __ Bind(&start_loop);
8334 // Check if we still have an entry to compare.
8335 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
8336 __ j(kNegative, type_check_slow_path->GetEntryLabel());
8337 // Go to next interface if the classes do not match.
8338 __ cmpl(cls.AsRegister<Register>(),
8339 CodeGeneratorX86::ArrayAddress(temp,
8340 maybe_temp2_loc,
8341 TIMES_4,
8342 object_array_data_offset));
8343 __ j(kNotEqual, &start_loop);
8344 // If `cls` was poisoned above, unpoison it.
8345 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8346 break;
8347 }
8348
8349 case TypeCheckKind::kBitstringCheck: {
8350 // /* HeapReference<Class> */ temp = obj->klass_
8351 GenerateReferenceLoadTwoRegisters(instruction,
8352 temp_loc,
8353 obj_loc,
8354 class_offset,
8355 kWithoutReadBarrier);
8356
8357 GenerateBitstringTypeCheckCompare(instruction, temp);
8358 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8359 break;
8360 }
8361 }
8362 __ Bind(&done);
8363
8364 __ Bind(type_check_slow_path->GetExitLabel());
8365 }
8366
VisitMonitorOperation(HMonitorOperation * instruction)8367 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8368 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8369 instruction, LocationSummary::kCallOnMainOnly);
8370 InvokeRuntimeCallingConvention calling_convention;
8371 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
8372 }
8373
VisitMonitorOperation(HMonitorOperation * instruction)8374 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8375 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
8376 instruction);
8377 if (instruction->IsEnter()) {
8378 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8379 } else {
8380 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8381 }
8382 }
8383
VisitX86AndNot(HX86AndNot * instruction)8384 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
8385 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8386 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
8387 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8388 locations->SetInAt(0, Location::RequiresRegister());
8389 locations->SetInAt(1, Location::RequiresRegister());
8390 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8391 }
8392
VisitX86AndNot(HX86AndNot * instruction)8393 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
8394 LocationSummary* locations = instruction->GetLocations();
8395 Location first = locations->InAt(0);
8396 Location second = locations->InAt(1);
8397 Location dest = locations->Out();
8398 if (instruction->GetResultType() == DataType::Type::kInt32) {
8399 __ andn(dest.AsRegister<Register>(),
8400 first.AsRegister<Register>(),
8401 second.AsRegister<Register>());
8402 } else {
8403 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8404 __ andn(dest.AsRegisterPairLow<Register>(),
8405 first.AsRegisterPairLow<Register>(),
8406 second.AsRegisterPairLow<Register>());
8407 __ andn(dest.AsRegisterPairHigh<Register>(),
8408 first.AsRegisterPairHigh<Register>(),
8409 second.AsRegisterPairHigh<Register>());
8410 }
8411 }
8412
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8413 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
8414 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8415 DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
8416 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8417 locations->SetInAt(0, Location::RequiresRegister());
8418 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8419 }
8420
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8421 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
8422 HX86MaskOrResetLeastSetBit* instruction) {
8423 LocationSummary* locations = instruction->GetLocations();
8424 Location src = locations->InAt(0);
8425 Location dest = locations->Out();
8426 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
8427 switch (instruction->GetOpKind()) {
8428 case HInstruction::kAnd:
8429 __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
8430 break;
8431 case HInstruction::kXor:
8432 __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
8433 break;
8434 default:
8435 LOG(FATAL) << "Unreachable";
8436 }
8437 }
8438
VisitAnd(HAnd * instruction)8439 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)8440 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)8441 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
8442
HandleBitwiseOperation(HBinaryOperation * instruction)8443 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8444 LocationSummary* locations =
8445 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8446 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8447 || instruction->GetResultType() == DataType::Type::kInt64);
8448 locations->SetInAt(0, Location::RequiresRegister());
8449 locations->SetInAt(1, Location::Any());
8450 locations->SetOut(Location::SameAsFirstInput());
8451 }
8452
VisitAnd(HAnd * instruction)8453 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
8454 HandleBitwiseOperation(instruction);
8455 }
8456
VisitOr(HOr * instruction)8457 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
8458 HandleBitwiseOperation(instruction);
8459 }
8460
VisitXor(HXor * instruction)8461 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
8462 HandleBitwiseOperation(instruction);
8463 }
8464
HandleBitwiseOperation(HBinaryOperation * instruction)8465 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8466 LocationSummary* locations = instruction->GetLocations();
8467 Location first = locations->InAt(0);
8468 Location second = locations->InAt(1);
8469 DCHECK(first.Equals(locations->Out()));
8470
8471 if (instruction->GetResultType() == DataType::Type::kInt32) {
8472 if (second.IsRegister()) {
8473 if (instruction->IsAnd()) {
8474 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
8475 } else if (instruction->IsOr()) {
8476 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
8477 } else {
8478 DCHECK(instruction->IsXor());
8479 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
8480 }
8481 } else if (second.IsConstant()) {
8482 if (instruction->IsAnd()) {
8483 __ andl(first.AsRegister<Register>(),
8484 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8485 } else if (instruction->IsOr()) {
8486 __ orl(first.AsRegister<Register>(),
8487 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8488 } else {
8489 DCHECK(instruction->IsXor());
8490 __ xorl(first.AsRegister<Register>(),
8491 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8492 }
8493 } else {
8494 if (instruction->IsAnd()) {
8495 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8496 } else if (instruction->IsOr()) {
8497 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8498 } else {
8499 DCHECK(instruction->IsXor());
8500 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8501 }
8502 }
8503 } else {
8504 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8505 if (second.IsRegisterPair()) {
8506 if (instruction->IsAnd()) {
8507 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8508 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8509 } else if (instruction->IsOr()) {
8510 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8511 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8512 } else {
8513 DCHECK(instruction->IsXor());
8514 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8515 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8516 }
8517 } else if (second.IsDoubleStackSlot()) {
8518 if (instruction->IsAnd()) {
8519 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8520 __ andl(first.AsRegisterPairHigh<Register>(),
8521 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8522 } else if (instruction->IsOr()) {
8523 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8524 __ orl(first.AsRegisterPairHigh<Register>(),
8525 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8526 } else {
8527 DCHECK(instruction->IsXor());
8528 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8529 __ xorl(first.AsRegisterPairHigh<Register>(),
8530 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8531 }
8532 } else {
8533 DCHECK(second.IsConstant()) << second;
8534 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
8535 int32_t low_value = Low32Bits(value);
8536 int32_t high_value = High32Bits(value);
8537 Immediate low(low_value);
8538 Immediate high(high_value);
8539 Register first_low = first.AsRegisterPairLow<Register>();
8540 Register first_high = first.AsRegisterPairHigh<Register>();
8541 if (instruction->IsAnd()) {
8542 if (low_value == 0) {
8543 __ xorl(first_low, first_low);
8544 } else if (low_value != -1) {
8545 __ andl(first_low, low);
8546 }
8547 if (high_value == 0) {
8548 __ xorl(first_high, first_high);
8549 } else if (high_value != -1) {
8550 __ andl(first_high, high);
8551 }
8552 } else if (instruction->IsOr()) {
8553 if (low_value != 0) {
8554 __ orl(first_low, low);
8555 }
8556 if (high_value != 0) {
8557 __ orl(first_high, high);
8558 }
8559 } else {
8560 DCHECK(instruction->IsXor());
8561 if (low_value != 0) {
8562 __ xorl(first_low, low);
8563 }
8564 if (high_value != 0) {
8565 __ xorl(first_high, high);
8566 }
8567 }
8568 }
8569 }
8570 }
8571
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8572 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
8573 HInstruction* instruction,
8574 Location out,
8575 uint32_t offset,
8576 Location maybe_temp,
8577 ReadBarrierOption read_barrier_option) {
8578 Register out_reg = out.AsRegister<Register>();
8579 if (read_barrier_option == kWithReadBarrier) {
8580 DCHECK(codegen_->EmitReadBarrier());
8581 if (kUseBakerReadBarrier) {
8582 // Load with fast path based Baker's read barrier.
8583 // /* HeapReference<Object> */ out = *(out + offset)
8584 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8585 instruction, out, out_reg, offset, /* needs_null_check= */ false);
8586 } else {
8587 // Load with slow path based read barrier.
8588 // Save the value of `out` into `maybe_temp` before overwriting it
8589 // in the following move operation, as we will need it for the
8590 // read barrier below.
8591 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8592 __ movl(maybe_temp.AsRegister<Register>(), out_reg);
8593 // /* HeapReference<Object> */ out = *(out + offset)
8594 __ movl(out_reg, Address(out_reg, offset));
8595 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
8596 }
8597 } else {
8598 // Plain load with no read barrier.
8599 // /* HeapReference<Object> */ out = *(out + offset)
8600 __ movl(out_reg, Address(out_reg, offset));
8601 __ MaybeUnpoisonHeapReference(out_reg);
8602 }
8603 }
8604
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)8605 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
8606 HInstruction* instruction,
8607 Location out,
8608 Location obj,
8609 uint32_t offset,
8610 ReadBarrierOption read_barrier_option) {
8611 Register out_reg = out.AsRegister<Register>();
8612 Register obj_reg = obj.AsRegister<Register>();
8613 if (read_barrier_option == kWithReadBarrier) {
8614 DCHECK(codegen_->EmitReadBarrier());
8615 if (kUseBakerReadBarrier) {
8616 // Load with fast path based Baker's read barrier.
8617 // /* HeapReference<Object> */ out = *(obj + offset)
8618 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8619 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
8620 } else {
8621 // Load with slow path based read barrier.
8622 // /* HeapReference<Object> */ out = *(obj + offset)
8623 __ movl(out_reg, Address(obj_reg, offset));
8624 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
8625 }
8626 } else {
8627 // Plain load with no read barrier.
8628 // /* HeapReference<Object> */ out = *(obj + offset)
8629 __ movl(out_reg, Address(obj_reg, offset));
8630 __ MaybeUnpoisonHeapReference(out_reg);
8631 }
8632 }
8633
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)8634 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
8635 HInstruction* instruction,
8636 Location root,
8637 const Address& address,
8638 Label* fixup_label,
8639 ReadBarrierOption read_barrier_option) {
8640 Register root_reg = root.AsRegister<Register>();
8641 if (read_barrier_option == kWithReadBarrier) {
8642 DCHECK(codegen_->EmitReadBarrier());
8643 if (kUseBakerReadBarrier) {
8644 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
8645 // Baker's read barrier are used:
8646 //
8647 // root = obj.field;
8648 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
8649 // if (temp != null) {
8650 // root = temp(root)
8651 // }
8652
8653 // /* GcRoot<mirror::Object> */ root = *address
8654 __ movl(root_reg, address);
8655 if (fixup_label != nullptr) {
8656 __ Bind(fixup_label);
8657 }
8658 static_assert(
8659 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
8660 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
8661 "have different sizes.");
8662 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
8663 "art::mirror::CompressedReference<mirror::Object> and int32_t "
8664 "have different sizes.");
8665
8666 // Slow path marking the GC root `root`.
8667 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8668 instruction, root, /* unpoison_ref_before_marking= */ false);
8669 codegen_->AddSlowPath(slow_path);
8670
8671 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
8672 const int32_t entry_point_offset =
8673 Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
8674 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
8675 // The entrypoint is null when the GC is not marking.
8676 __ j(kNotEqual, slow_path->GetEntryLabel());
8677 __ Bind(slow_path->GetExitLabel());
8678 } else {
8679 // GC root loaded through a slow path for read barriers other
8680 // than Baker's.
8681 // /* GcRoot<mirror::Object>* */ root = address
8682 __ leal(root_reg, address);
8683 if (fixup_label != nullptr) {
8684 __ Bind(fixup_label);
8685 }
8686 // /* mirror::Object* */ root = root->Read()
8687 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8688 }
8689 } else {
8690 // Plain GC root load with no read barrier.
8691 // /* GcRoot<mirror::Object> */ root = *address
8692 __ movl(root_reg, address);
8693 if (fixup_label != nullptr) {
8694 __ Bind(fixup_label);
8695 }
8696 // Note that GC roots are not affected by heap poisoning, thus we
8697 // do not have to unpoison `root_reg` here.
8698 }
8699 }
8700
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)8701 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8702 Location ref,
8703 Register obj,
8704 uint32_t offset,
8705 bool needs_null_check) {
8706 DCHECK(EmitBakerReadBarrier());
8707
8708 // /* HeapReference<Object> */ ref = *(obj + offset)
8709 Address src(obj, offset);
8710 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8711 }
8712
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)8713 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8714 Location ref,
8715 Register obj,
8716 uint32_t data_offset,
8717 Location index,
8718 bool needs_null_check) {
8719 DCHECK(EmitBakerReadBarrier());
8720
8721 static_assert(
8722 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8723 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8724 // /* HeapReference<Object> */ ref =
8725 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
8726 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
8727 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8728 }
8729
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)8730 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8731 Location ref,
8732 Register obj,
8733 const Address& src,
8734 bool needs_null_check,
8735 bool always_update_field,
8736 Register* temp) {
8737 DCHECK(EmitBakerReadBarrier());
8738
8739 // In slow path based read barriers, the read barrier call is
8740 // inserted after the original load. However, in fast path based
8741 // Baker's read barriers, we need to perform the load of
8742 // mirror::Object::monitor_ *before* the original reference load.
8743 // This load-load ordering is required by the read barrier.
8744 // The fast path/slow path (for Baker's algorithm) should look like:
8745 //
8746 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8747 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
8748 // HeapReference<Object> ref = *src; // Original reference load.
8749 // bool is_gray = (rb_state == ReadBarrier::GrayState());
8750 // if (is_gray) {
8751 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
8752 // }
8753 //
8754 // Note: the original implementation in ReadBarrier::Barrier is
8755 // slightly more complex as:
8756 // - it implements the load-load fence using a data dependency on
8757 // the high-bits of rb_state, which are expected to be all zeroes
8758 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
8759 // which is a no-op thanks to the x86 memory model);
8760 // - it performs additional checks that we do not do here for
8761 // performance reasons.
8762
8763 Register ref_reg = ref.AsRegister<Register>();
8764 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8765
8766 // Given the numeric representation, it's enough to check the low bit of the rb_state.
8767 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8768 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8769 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8770 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8771 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8772
8773 // if (rb_state == ReadBarrier::GrayState())
8774 // ref = ReadBarrier::Mark(ref);
8775 // At this point, just do the "if" and make sure that flags are preserved until the branch.
8776 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8777 if (needs_null_check) {
8778 MaybeRecordImplicitNullCheck(instruction);
8779 }
8780
8781 // Load fence to prevent load-load reordering.
8782 // Note that this is a no-op, thanks to the x86 memory model.
8783 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8784
8785 // The actual reference load.
8786 // /* HeapReference<Object> */ ref = *src
8787 __ movl(ref_reg, src); // Flags are unaffected.
8788
8789 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8790 // Slow path marking the object `ref` when it is gray.
8791 SlowPathCode* slow_path;
8792 if (always_update_field) {
8793 DCHECK(temp != nullptr);
8794 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8795 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8796 } else {
8797 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8798 instruction, ref, /* unpoison_ref_before_marking= */ true);
8799 }
8800 AddSlowPath(slow_path);
8801
8802 // We have done the "if" of the gray bit check above, now branch based on the flags.
8803 __ j(kNotZero, slow_path->GetEntryLabel());
8804
8805 // Object* ref = ref_addr->AsMirrorPtr()
8806 __ MaybeUnpoisonHeapReference(ref_reg);
8807
8808 __ Bind(slow_path->GetExitLabel());
8809 }
8810
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8811 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8812 Location out,
8813 Location ref,
8814 Location obj,
8815 uint32_t offset,
8816 Location index) {
8817 DCHECK(EmitReadBarrier());
8818
8819 // Insert a slow path based read barrier *after* the reference load.
8820 //
8821 // If heap poisoning is enabled, the unpoisoning of the loaded
8822 // reference will be carried out by the runtime within the slow
8823 // path.
8824 //
8825 // Note that `ref` currently does not get unpoisoned (when heap
8826 // poisoning is enabled), which is alright as the `ref` argument is
8827 // not used by the artReadBarrierSlow entry point.
8828 //
8829 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8830 SlowPathCode* slow_path = new (GetScopedAllocator())
8831 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8832 AddSlowPath(slow_path);
8833
8834 __ jmp(slow_path->GetEntryLabel());
8835 __ Bind(slow_path->GetExitLabel());
8836 }
8837
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8838 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8839 Location out,
8840 Location ref,
8841 Location obj,
8842 uint32_t offset,
8843 Location index) {
8844 if (EmitReadBarrier()) {
8845 // Baker's read barriers shall be handled by the fast path
8846 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8847 DCHECK(!kUseBakerReadBarrier);
8848 // If heap poisoning is enabled, unpoisoning will be taken care of
8849 // by the runtime within the slow path.
8850 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8851 } else if (kPoisonHeapReferences) {
8852 __ UnpoisonHeapReference(out.AsRegister<Register>());
8853 }
8854 }
8855
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8856 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8857 Location out,
8858 Location root) {
8859 DCHECK(EmitReadBarrier());
8860
8861 // Insert a slow path based read barrier *after* the GC root load.
8862 //
8863 // Note that GC roots are not affected by heap poisoning, so we do
8864 // not need to do anything special for this here.
8865 SlowPathCode* slow_path =
8866 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8867 AddSlowPath(slow_path);
8868
8869 __ jmp(slow_path->GetEntryLabel());
8870 __ Bind(slow_path->GetExitLabel());
8871 }
8872
VisitBoundType(HBoundType * instruction)8873 void LocationsBuilderX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8874 // Nothing to do, this should be removed during prepare for register allocator.
8875 LOG(FATAL) << "Unreachable";
8876 }
8877
VisitBoundType(HBoundType * instruction)8878 void InstructionCodeGeneratorX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8879 // Nothing to do, this should be removed during prepare for register allocator.
8880 LOG(FATAL) << "Unreachable";
8881 }
8882
8883 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8884 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8885 LocationSummary* locations =
8886 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8887 locations->SetInAt(0, Location::RequiresRegister());
8888 }
8889
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8890 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8891 int32_t lower_bound,
8892 uint32_t num_entries,
8893 HBasicBlock* switch_block,
8894 HBasicBlock* default_block) {
8895 // Figure out the correct compare values and jump conditions.
8896 // Handle the first compare/branch as a special case because it might
8897 // jump to the default case.
8898 DCHECK_GT(num_entries, 2u);
8899 Condition first_condition;
8900 uint32_t index;
8901 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8902 if (lower_bound != 0) {
8903 first_condition = kLess;
8904 __ cmpl(value_reg, Immediate(lower_bound));
8905 __ j(first_condition, codegen_->GetLabelOf(default_block));
8906 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8907
8908 index = 1;
8909 } else {
8910 // Handle all the compare/jumps below.
8911 first_condition = kBelow;
8912 index = 0;
8913 }
8914
8915 // Handle the rest of the compare/jumps.
8916 for (; index + 1 < num_entries; index += 2) {
8917 int32_t compare_to_value = lower_bound + index + 1;
8918 __ cmpl(value_reg, Immediate(compare_to_value));
8919 // Jump to successors[index] if value < case_value[index].
8920 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8921 // Jump to successors[index + 1] if value == case_value[index + 1].
8922 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8923 }
8924
8925 if (index != num_entries) {
8926 // There are an odd number of entries. Handle the last one.
8927 DCHECK_EQ(index + 1, num_entries);
8928 __ cmpl(value_reg, Immediate(lower_bound + index));
8929 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8930 }
8931
8932 // And the default for any other value.
8933 if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8934 __ jmp(codegen_->GetLabelOf(default_block));
8935 }
8936 }
8937
VisitPackedSwitch(HPackedSwitch * switch_instr)8938 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8939 int32_t lower_bound = switch_instr->GetStartValue();
8940 uint32_t num_entries = switch_instr->GetNumEntries();
8941 LocationSummary* locations = switch_instr->GetLocations();
8942 Register value_reg = locations->InAt(0).AsRegister<Register>();
8943
8944 GenPackedSwitchWithCompares(value_reg,
8945 lower_bound,
8946 num_entries,
8947 switch_instr->GetBlock(),
8948 switch_instr->GetDefaultBlock());
8949 }
8950
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8951 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8952 LocationSummary* locations =
8953 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8954 locations->SetInAt(0, Location::RequiresRegister());
8955
8956 // Constant area pointer.
8957 locations->SetInAt(1, Location::RequiresRegister());
8958
8959 // And the temporary we need.
8960 locations->AddTemp(Location::RequiresRegister());
8961 }
8962
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8963 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8964 int32_t lower_bound = switch_instr->GetStartValue();
8965 uint32_t num_entries = switch_instr->GetNumEntries();
8966 LocationSummary* locations = switch_instr->GetLocations();
8967 Register value_reg = locations->InAt(0).AsRegister<Register>();
8968 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8969
8970 if (num_entries <= kPackedSwitchJumpTableThreshold) {
8971 GenPackedSwitchWithCompares(value_reg,
8972 lower_bound,
8973 num_entries,
8974 switch_instr->GetBlock(),
8975 default_block);
8976 return;
8977 }
8978
8979 // Optimizing has a jump area.
8980 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
8981 Register constant_area = locations->InAt(1).AsRegister<Register>();
8982
8983 // Remove the bias, if needed.
8984 if (lower_bound != 0) {
8985 __ leal(temp_reg, Address(value_reg, -lower_bound));
8986 value_reg = temp_reg;
8987 }
8988
8989 // Is the value in range?
8990 DCHECK_GE(num_entries, 1u);
8991 __ cmpl(value_reg, Immediate(num_entries - 1));
8992 __ j(kAbove, codegen_->GetLabelOf(default_block));
8993
8994 // We are in the range of the table.
8995 // Load (target-constant_area) from the jump table, indexing by the value.
8996 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
8997
8998 // Compute the actual target address by adding in constant_area.
8999 __ addl(temp_reg, constant_area);
9000
9001 // And jump.
9002 __ jmp(temp_reg);
9003 }
9004
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)9005 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
9006 HX86ComputeBaseMethodAddress* insn) {
9007 LocationSummary* locations =
9008 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
9009 locations->SetOut(Location::RequiresRegister());
9010 }
9011
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)9012 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
9013 HX86ComputeBaseMethodAddress* insn) {
9014 LocationSummary* locations = insn->GetLocations();
9015 Register reg = locations->Out().AsRegister<Register>();
9016
9017 // Generate call to next instruction.
9018 Label next_instruction;
9019 __ call(&next_instruction);
9020 __ Bind(&next_instruction);
9021
9022 // Remember this offset for later use with constant area.
9023 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
9024
9025 // Grab the return address off the stack.
9026 __ popl(reg);
9027 }
9028
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)9029 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
9030 HX86LoadFromConstantTable* insn) {
9031 LocationSummary* locations =
9032 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
9033
9034 locations->SetInAt(0, Location::RequiresRegister());
9035 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
9036
9037 // If we don't need to be materialized, we only need the inputs to be set.
9038 if (insn->IsEmittedAtUseSite()) {
9039 return;
9040 }
9041
9042 switch (insn->GetType()) {
9043 case DataType::Type::kFloat32:
9044 case DataType::Type::kFloat64:
9045 locations->SetOut(Location::RequiresFpuRegister());
9046 break;
9047
9048 case DataType::Type::kInt32:
9049 locations->SetOut(Location::RequiresRegister());
9050 break;
9051
9052 default:
9053 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
9054 }
9055 }
9056
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)9057 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
9058 if (insn->IsEmittedAtUseSite()) {
9059 return;
9060 }
9061
9062 LocationSummary* locations = insn->GetLocations();
9063 Location out = locations->Out();
9064 Register const_area = locations->InAt(0).AsRegister<Register>();
9065 HConstant *value = insn->GetConstant();
9066
9067 switch (insn->GetType()) {
9068 case DataType::Type::kFloat32:
9069 __ movss(out.AsFpuRegister<XmmRegister>(),
9070 codegen_->LiteralFloatAddress(
9071 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
9072 break;
9073
9074 case DataType::Type::kFloat64:
9075 __ movsd(out.AsFpuRegister<XmmRegister>(),
9076 codegen_->LiteralDoubleAddress(
9077 value->AsDoubleConstant()->GetValue(),
9078 insn->GetBaseMethodAddress(),
9079 const_area));
9080 break;
9081
9082 case DataType::Type::kInt32:
9083 __ movl(out.AsRegister<Register>(),
9084 codegen_->LiteralInt32Address(
9085 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
9086 break;
9087
9088 default:
9089 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
9090 }
9091 }
9092
9093 /**
9094 * Class to handle late fixup of offsets into constant area.
9095 */
9096 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
9097 public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)9098 RIPFixup(CodeGeneratorX86& codegen,
9099 HX86ComputeBaseMethodAddress* base_method_address,
9100 size_t offset)
9101 : codegen_(&codegen),
9102 base_method_address_(base_method_address),
9103 offset_into_constant_area_(offset) {}
9104
9105 protected:
SetOffset(size_t offset)9106 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
9107
9108 CodeGeneratorX86* codegen_;
9109 HX86ComputeBaseMethodAddress* base_method_address_;
9110
9111 private:
Process(const MemoryRegion & region,int pos)9112 void Process(const MemoryRegion& region, int pos) override {
9113 // Patch the correct offset for the instruction. The place to patch is the
9114 // last 4 bytes of the instruction.
9115 // The value to patch is the distance from the offset in the constant area
9116 // from the address computed by the HX86ComputeBaseMethodAddress instruction.
9117 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
9118 int32_t relative_position =
9119 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
9120
9121 // Patch in the right value.
9122 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
9123 }
9124
9125 // Location in constant area that the fixup refers to.
9126 int32_t offset_into_constant_area_;
9127 };
9128
9129 /**
9130 * Class to handle late fixup of offsets to a jump table that will be created in the
9131 * constant area.
9132 */
9133 class JumpTableRIPFixup : public RIPFixup {
9134 public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)9135 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
9136 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
9137 switch_instr_(switch_instr) {}
9138
CreateJumpTable()9139 void CreateJumpTable() {
9140 X86Assembler* assembler = codegen_->GetAssembler();
9141
9142 // Ensure that the reference to the jump table has the correct offset.
9143 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
9144 SetOffset(offset_in_constant_table);
9145
9146 // The label values in the jump table are computed relative to the
9147 // instruction addressing the constant area.
9148 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
9149
9150 // Populate the jump table with the correct values for the jump table.
9151 int32_t num_entries = switch_instr_->GetNumEntries();
9152 HBasicBlock* block = switch_instr_->GetBlock();
9153 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
9154 // The value that we want is the target offset - the position of the table.
9155 for (int32_t i = 0; i < num_entries; i++) {
9156 HBasicBlock* b = successors[i];
9157 Label* l = codegen_->GetLabelOf(b);
9158 DCHECK(l->IsBound());
9159 int32_t offset_to_block = l->Position() - relative_offset;
9160 assembler->AppendInt32(offset_to_block);
9161 }
9162 }
9163
9164 private:
9165 const HX86PackedSwitch* switch_instr_;
9166 };
9167
Finalize()9168 void CodeGeneratorX86::Finalize() {
9169 // Generate the constant area if needed.
9170 X86Assembler* assembler = GetAssembler();
9171
9172 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
9173 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
9174 // byte values.
9175 assembler->Align(4, 0);
9176 constant_area_start_ = assembler->CodeSize();
9177
9178 // Populate any jump tables.
9179 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
9180 jump_table->CreateJumpTable();
9181 }
9182
9183 // And now add the constant area to the generated code.
9184 assembler->AddConstantArea();
9185 }
9186
9187 // And finish up.
9188 CodeGenerator::Finalize();
9189 }
9190
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)9191 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
9192 HX86ComputeBaseMethodAddress* method_base,
9193 Register reg) {
9194 AssemblerFixup* fixup =
9195 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
9196 return Address(reg, kPlaceholder32BitOffset, fixup);
9197 }
9198
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)9199 Address CodeGeneratorX86::LiteralFloatAddress(float v,
9200 HX86ComputeBaseMethodAddress* method_base,
9201 Register reg) {
9202 AssemblerFixup* fixup =
9203 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
9204 return Address(reg, kPlaceholder32BitOffset, fixup);
9205 }
9206
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)9207 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
9208 HX86ComputeBaseMethodAddress* method_base,
9209 Register reg) {
9210 AssemblerFixup* fixup =
9211 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
9212 return Address(reg, kPlaceholder32BitOffset, fixup);
9213 }
9214
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)9215 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
9216 HX86ComputeBaseMethodAddress* method_base,
9217 Register reg) {
9218 AssemblerFixup* fixup =
9219 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
9220 return Address(reg, kPlaceholder32BitOffset, fixup);
9221 }
9222
Load32BitValue(Register dest,int32_t value)9223 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
9224 if (value == 0) {
9225 __ xorl(dest, dest);
9226 } else {
9227 __ movl(dest, Immediate(value));
9228 }
9229 }
9230
Compare32BitValue(Register dest,int32_t value)9231 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
9232 if (value == 0) {
9233 __ testl(dest, dest);
9234 } else {
9235 __ cmpl(dest, Immediate(value));
9236 }
9237 }
9238
GenerateIntCompare(Location lhs,Location rhs)9239 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
9240 Register lhs_reg = lhs.AsRegister<Register>();
9241 GenerateIntCompare(lhs_reg, rhs);
9242 }
9243
GenerateIntCompare(Register lhs,Location rhs)9244 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
9245 if (rhs.IsConstant()) {
9246 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
9247 Compare32BitValue(lhs, value);
9248 } else if (rhs.IsStackSlot()) {
9249 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
9250 } else {
9251 __ cmpl(lhs, rhs.AsRegister<Register>());
9252 }
9253 }
9254
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)9255 Address CodeGeneratorX86::ArrayAddress(Register obj,
9256 Location index,
9257 ScaleFactor scale,
9258 uint32_t data_offset) {
9259 return index.IsConstant()
9260 ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
9261 : Address(obj, index.AsRegister<Register>(), scale, data_offset);
9262 }
9263
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)9264 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
9265 Register reg,
9266 Register value) {
9267 // Create a fixup to be used to create and address the jump table.
9268 JumpTableRIPFixup* table_fixup =
9269 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
9270
9271 // We have to populate the jump tables.
9272 fixups_to_jump_tables_.push_back(table_fixup);
9273
9274 // We want a scaled address, as we are extracting the correct offset from the table.
9275 return Address(reg, value, TIMES_4, kPlaceholder32BitOffset, table_fixup);
9276 }
9277
9278 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)9279 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
9280 if (!target.IsValid()) {
9281 DCHECK_EQ(type, DataType::Type::kVoid);
9282 return;
9283 }
9284
9285 DCHECK_NE(type, DataType::Type::kVoid);
9286
9287 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
9288 if (target.Equals(return_loc)) {
9289 return;
9290 }
9291
9292 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
9293 // with the else branch.
9294 if (type == DataType::Type::kInt64) {
9295 HParallelMove parallel_move(GetGraph()->GetAllocator());
9296 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
9297 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
9298 GetMoveResolver()->EmitNativeCode(¶llel_move);
9299 } else {
9300 // Let the parallel move resolver take care of all of this.
9301 HParallelMove parallel_move(GetGraph()->GetAllocator());
9302 parallel_move.AddMove(return_loc, target, type, nullptr);
9303 GetMoveResolver()->EmitNativeCode(¶llel_move);
9304 }
9305 }
9306
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const9307 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
9308 const uint8_t* roots_data,
9309 const PatchInfo<Label>& info,
9310 uint64_t index_in_table) const {
9311 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
9312 uintptr_t address =
9313 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
9314 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
9315 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
9316 dchecked_integral_cast<uint32_t>(address);
9317 }
9318
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)9319 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
9320 for (const PatchInfo<Label>& info : jit_string_patches_) {
9321 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
9322 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
9323 PatchJitRootUse(code, roots_data, info, index_in_table);
9324 }
9325
9326 for (const PatchInfo<Label>& info : jit_class_patches_) {
9327 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
9328 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
9329 PatchJitRootUse(code, roots_data, info, index_in_table);
9330 }
9331 }
9332
VisitIntermediateAddress(HIntermediateAddress * instruction)9333 void LocationsBuilderX86::VisitIntermediateAddress(
9334 [[maybe_unused]] HIntermediateAddress* instruction) {
9335 LOG(FATAL) << "Unreachable";
9336 }
9337
VisitIntermediateAddress(HIntermediateAddress * instruction)9338 void InstructionCodeGeneratorX86::VisitIntermediateAddress(
9339 [[maybe_unused]] HIntermediateAddress* instruction) {
9340 LOG(FATAL) << "Unreachable";
9341 }
9342
CpuHasAvxFeatureFlag()9343 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
9344 return codegen_->GetInstructionSetFeatures().HasAVX();
9345 }
CpuHasAvx2FeatureFlag()9346 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
9347 return codegen_->GetInstructionSetFeatures().HasAVX2();
9348 }
CpuHasAvxFeatureFlag()9349 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
9350 return codegen_->GetInstructionSetFeatures().HasAVX();
9351 }
CpuHasAvx2FeatureFlag()9352 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
9353 return codegen_->GetInstructionSetFeatures().HasAVX2();
9354 }
9355
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)9356 void LocationsBuilderX86::VisitBitwiseNegatedRight(
9357 [[maybe_unused]] HBitwiseNegatedRight* instruction) {
9358 LOG(FATAL) << "Unimplemented";
9359 }
9360
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)9361 void InstructionCodeGeneratorX86::VisitBitwiseNegatedRight(
9362 [[maybe_unused]] HBitwiseNegatedRight* instruction) {
9363 LOG(FATAL) << "Unimplemented";
9364 }
9365
9366 #undef __
9367
9368 } // namespace x86
9369 } // namespace art
9370