1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86_64.h"
18
19 #include <limits>
20
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86_64.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_utils.h"
29 #include "lock_word.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/object_array-inl.h"
32 #include "mirror/reference.h"
33 #include "mirror/string.h"
34 #include "scoped_thread_state_change-inl.h"
35 #include "thread-current-inl.h"
36 #include "utils/x86_64/assembler_x86_64.h"
37 #include "utils/x86_64/constants_x86_64.h"
38
39 namespace art HIDDEN {
40
41 namespace x86_64 {
42
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)43 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
44 : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) {
45 }
46
GetAssembler()47 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
48 return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
49 }
50
GetAllocator()51 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
52 return codegen_->GetGraph()->GetAllocator();
53 }
54
TryDispatch(HInvoke * invoke)55 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
56 Dispatch(invoke);
57 LocationSummary* res = invoke->GetLocations();
58 if (res == nullptr) {
59 return false;
60 }
61 return res->Intrinsified();
62 }
63
64 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
65
66 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
67 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
68
69 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
70 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
71 public:
ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction * instruction)72 explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
73 : SlowPathCode(instruction) {
74 DCHECK(gUseReadBarrier);
75 DCHECK(kUseBakerReadBarrier);
76 }
77
EmitNativeCode(CodeGenerator * codegen)78 void EmitNativeCode(CodeGenerator* codegen) override {
79 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
80 LocationSummary* locations = instruction_->GetLocations();
81 DCHECK(locations->CanCall());
82 DCHECK(instruction_->IsInvokeStaticOrDirect())
83 << "Unexpected instruction in read barrier arraycopy slow path: "
84 << instruction_->DebugName();
85 DCHECK(instruction_->GetLocations()->Intrinsified());
86 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
87
88 int32_t element_size = DataType::Size(DataType::Type::kReference);
89
90 CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
91 CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
92 CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
93
94 __ Bind(GetEntryLabel());
95 NearLabel loop;
96 __ Bind(&loop);
97 __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
98 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
99 // TODO: Inline the mark bit check before calling the runtime?
100 // TMP = ReadBarrier::Mark(TMP);
101 // No need to save live registers; it's taken care of by the
102 // entrypoint. Also, there is no need to update the stack mask,
103 // as this runtime call will not trigger a garbage collection.
104 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
105 // This runtime call does not require a stack map.
106 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
107 __ MaybePoisonHeapReference(CpuRegister(TMP));
108 __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
109 __ addl(src_curr_addr, Immediate(element_size));
110 __ addl(dst_curr_addr, Immediate(element_size));
111 __ cmpl(src_curr_addr, src_stop_addr);
112 __ j(kNotEqual, &loop);
113 __ jmp(GetExitLabel());
114 }
115
GetDescription() const116 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
117
118 private:
119 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
120 };
121
122 #undef __
123
124 #define __ assembler->
125
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)126 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
127 LocationSummary* locations =
128 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
129 locations->SetInAt(0, Location::RequiresFpuRegister());
130 locations->SetOut(Location::RequiresRegister());
131 }
132
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)133 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
134 LocationSummary* locations =
135 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
136 locations->SetInAt(0, Location::RequiresRegister());
137 locations->SetOut(Location::RequiresFpuRegister());
138 }
139
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)140 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
141 Location input = locations->InAt(0);
142 Location output = locations->Out();
143 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
144 }
145
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)146 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
147 Location input = locations->InAt(0);
148 Location output = locations->Out();
149 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
150 }
151
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)152 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
153 CreateFPToIntLocations(allocator_, invoke);
154 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)155 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
156 CreateIntToFPLocations(allocator_, invoke);
157 }
158
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)159 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
160 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
161 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)162 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
163 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
164 }
165
VisitFloatFloatToRawIntBits(HInvoke * invoke)166 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
167 CreateFPToIntLocations(allocator_, invoke);
168 }
VisitFloatIntBitsToFloat(HInvoke * invoke)169 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
170 CreateIntToFPLocations(allocator_, invoke);
171 }
172
VisitFloatFloatToRawIntBits(HInvoke * invoke)173 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
174 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
175 }
VisitFloatIntBitsToFloat(HInvoke * invoke)176 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
177 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
178 }
179
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)180 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
181 LocationSummary* locations =
182 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
183 locations->SetInAt(0, Location::RequiresRegister());
184 locations->SetOut(Location::SameAsFirstInput());
185 }
186
VisitIntegerReverseBytes(HInvoke * invoke)187 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
188 CreateIntToIntLocations(allocator_, invoke);
189 }
190
VisitIntegerReverseBytes(HInvoke * invoke)191 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
192 codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt32);
193 }
194
VisitLongReverseBytes(HInvoke * invoke)195 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
196 CreateIntToIntLocations(allocator_, invoke);
197 }
198
VisitLongReverseBytes(HInvoke * invoke)199 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
200 codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt64);
201 }
202
VisitShortReverseBytes(HInvoke * invoke)203 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
204 CreateIntToIntLocations(allocator_, invoke);
205 }
206
VisitShortReverseBytes(HInvoke * invoke)207 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
208 codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt16);
209 }
210
GenIsInfinite(LocationSummary * locations,bool is64bit,CodeGeneratorX86_64 * codegen)211 static void GenIsInfinite(LocationSummary* locations,
212 bool is64bit,
213 CodeGeneratorX86_64* codegen) {
214 X86_64Assembler* assembler = codegen->GetAssembler();
215
216 XmmRegister input = locations->InAt(0).AsFpuRegister<XmmRegister>();
217 CpuRegister output = locations->Out().AsRegister<CpuRegister>();
218
219 NearLabel done1, done2;
220
221 if (is64bit) {
222 double kPositiveInfinity = std::numeric_limits<double>::infinity();
223 double kNegativeInfinity = -1 * kPositiveInfinity;
224
225 __ xorq(output, output);
226 __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
227 __ j(kNotEqual, &done1);
228 __ j(kParityEven, &done2);
229 __ movq(output, Immediate(1));
230 __ jmp(&done2);
231 __ Bind(&done1);
232 __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
233 __ j(kNotEqual, &done2);
234 __ j(kParityEven, &done2);
235 __ movq(output, Immediate(1));
236 __ Bind(&done2);
237 } else {
238 float kPositiveInfinity = std::numeric_limits<float>::infinity();
239 float kNegativeInfinity = -1 * kPositiveInfinity;
240
241 __ xorl(output, output);
242 __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
243 __ j(kNotEqual, &done1);
244 __ j(kParityEven, &done2);
245 __ movl(output, Immediate(1));
246 __ jmp(&done2);
247 __ Bind(&done1);
248 __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
249 __ j(kNotEqual, &done2);
250 __ j(kParityEven, &done2);
251 __ movl(output, Immediate(1));
252 __ Bind(&done2);
253 }
254 }
255
VisitFloatIsInfinite(HInvoke * invoke)256 void IntrinsicLocationsBuilderX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
257 CreateFPToIntLocations(allocator_, invoke);
258 }
259
VisitFloatIsInfinite(HInvoke * invoke)260 void IntrinsicCodeGeneratorX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
261 GenIsInfinite(invoke->GetLocations(), /* is64bit=*/ false, codegen_);
262 }
263
VisitDoubleIsInfinite(HInvoke * invoke)264 void IntrinsicLocationsBuilderX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
265 CreateFPToIntLocations(allocator_, invoke);
266 }
267
VisitDoubleIsInfinite(HInvoke * invoke)268 void IntrinsicCodeGeneratorX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
269 GenIsInfinite(invoke->GetLocations(), /* is64bit=*/ true, codegen_);
270 }
271
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)272 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
273 LocationSummary* locations =
274 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
275 locations->SetInAt(0, Location::RequiresFpuRegister());
276 locations->SetOut(Location::RequiresFpuRegister());
277 }
278
VisitMathSqrt(HInvoke * invoke)279 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
280 CreateFPToFPLocations(allocator_, invoke);
281 }
282
VisitMathSqrt(HInvoke * invoke)283 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
284 LocationSummary* locations = invoke->GetLocations();
285 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
286 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
287
288 GetAssembler()->sqrtsd(out, in);
289 }
290
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)291 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
292 HInvoke* invoke,
293 CodeGeneratorX86_64* codegen) {
294 // Do we have instruction support?
295 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
296 return;
297 }
298
299 CreateFPToFPLocations(allocator, invoke);
300 }
301
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86_64Assembler * assembler,int round_mode)302 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86_64Assembler* assembler, int round_mode) {
303 LocationSummary* locations = invoke->GetLocations();
304 DCHECK(!locations->WillCall());
305 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
306 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
307 __ roundsd(out, in, Immediate(round_mode));
308 }
309
VisitMathCeil(HInvoke * invoke)310 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
311 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
312 }
313
VisitMathCeil(HInvoke * invoke)314 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
315 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
316 }
317
VisitMathFloor(HInvoke * invoke)318 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
319 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
320 }
321
VisitMathFloor(HInvoke * invoke)322 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
323 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
324 }
325
VisitMathRint(HInvoke * invoke)326 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
327 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
328 }
329
VisitMathRint(HInvoke * invoke)330 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
331 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
332 }
333
CreateSSE41FPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)334 static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator,
335 HInvoke* invoke,
336 CodeGeneratorX86_64* codegen) {
337 // Do we have instruction support?
338 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
339 return;
340 }
341
342 LocationSummary* locations =
343 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
344 locations->SetInAt(0, Location::RequiresFpuRegister());
345 locations->SetOut(Location::RequiresRegister());
346 locations->AddTemp(Location::RequiresFpuRegister());
347 locations->AddTemp(Location::RequiresFpuRegister());
348 }
349
VisitMathRoundFloat(HInvoke * invoke)350 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
351 CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
352 }
353
VisitMathRoundFloat(HInvoke * invoke)354 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
355 LocationSummary* locations = invoke->GetLocations();
356 DCHECK(!locations->WillCall());
357
358 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
359 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
360 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
361 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
362 NearLabel skip_incr, done;
363 X86_64Assembler* assembler = GetAssembler();
364
365 // Since no direct x86 rounding instruction matches the required semantics,
366 // this intrinsic is implemented as follows:
367 // result = floor(in);
368 // if (in - result >= 0.5f)
369 // result = result + 1.0f;
370 __ movss(t2, in);
371 __ roundss(t1, in, Immediate(1));
372 __ subss(t2, t1);
373 __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
374 __ j(kBelow, &skip_incr);
375 __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
376 __ Bind(&skip_incr);
377
378 // Final conversion to an integer. Unfortunately this also does not have a
379 // direct x86 instruction, since NaN should map to 0 and large positive
380 // values need to be clipped to the extreme value.
381 codegen_->Load32BitValue(out, kPrimIntMax);
382 __ cvtsi2ss(t2, out);
383 __ comiss(t1, t2);
384 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
385 __ movl(out, Immediate(0)); // does not change flags
386 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
387 __ cvttss2si(out, t1);
388 __ Bind(&done);
389 }
390
VisitMathRoundDouble(HInvoke * invoke)391 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
392 CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
393 }
394
VisitMathRoundDouble(HInvoke * invoke)395 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
396 LocationSummary* locations = invoke->GetLocations();
397 DCHECK(!locations->WillCall());
398
399 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
400 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
401 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
402 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
403 NearLabel skip_incr, done;
404 X86_64Assembler* assembler = GetAssembler();
405
406 // Since no direct x86 rounding instruction matches the required semantics,
407 // this intrinsic is implemented as follows:
408 // result = floor(in);
409 // if (in - result >= 0.5)
410 // result = result + 1.0f;
411 __ movsd(t2, in);
412 __ roundsd(t1, in, Immediate(1));
413 __ subsd(t2, t1);
414 __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
415 __ j(kBelow, &skip_incr);
416 __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
417 __ Bind(&skip_incr);
418
419 // Final conversion to an integer. Unfortunately this also does not have a
420 // direct x86 instruction, since NaN should map to 0 and large positive
421 // values need to be clipped to the extreme value.
422 codegen_->Load64BitValue(out, kPrimLongMax);
423 __ cvtsi2sd(t2, out, /* is64bit= */ true);
424 __ comisd(t1, t2);
425 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
426 __ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit
427 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
428 __ cvttsd2si(out, t1, /* is64bit= */ true);
429 __ Bind(&done);
430 }
431
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)432 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
433 LocationSummary* locations =
434 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
435 InvokeRuntimeCallingConvention calling_convention;
436 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
437 locations->SetOut(Location::FpuRegisterLocation(XMM0));
438
439 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
440 }
441
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)442 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
443 QuickEntrypointEnum entry) {
444 LocationSummary* locations = invoke->GetLocations();
445 DCHECK(locations->WillCall());
446 DCHECK(invoke->IsInvokeStaticOrDirect());
447
448 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
449 }
450
VisitMathCos(HInvoke * invoke)451 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
452 CreateFPToFPCallLocations(allocator_, invoke);
453 }
454
VisitMathCos(HInvoke * invoke)455 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
456 GenFPToFPCall(invoke, codegen_, kQuickCos);
457 }
458
VisitMathSin(HInvoke * invoke)459 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
460 CreateFPToFPCallLocations(allocator_, invoke);
461 }
462
VisitMathSin(HInvoke * invoke)463 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
464 GenFPToFPCall(invoke, codegen_, kQuickSin);
465 }
466
VisitMathAcos(HInvoke * invoke)467 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
468 CreateFPToFPCallLocations(allocator_, invoke);
469 }
470
VisitMathAcos(HInvoke * invoke)471 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
472 GenFPToFPCall(invoke, codegen_, kQuickAcos);
473 }
474
VisitMathAsin(HInvoke * invoke)475 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
476 CreateFPToFPCallLocations(allocator_, invoke);
477 }
478
VisitMathAsin(HInvoke * invoke)479 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
480 GenFPToFPCall(invoke, codegen_, kQuickAsin);
481 }
482
VisitMathAtan(HInvoke * invoke)483 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
484 CreateFPToFPCallLocations(allocator_, invoke);
485 }
486
VisitMathAtan(HInvoke * invoke)487 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
488 GenFPToFPCall(invoke, codegen_, kQuickAtan);
489 }
490
VisitMathCbrt(HInvoke * invoke)491 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
492 CreateFPToFPCallLocations(allocator_, invoke);
493 }
494
VisitMathCbrt(HInvoke * invoke)495 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
496 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
497 }
498
VisitMathCosh(HInvoke * invoke)499 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
500 CreateFPToFPCallLocations(allocator_, invoke);
501 }
502
VisitMathCosh(HInvoke * invoke)503 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
504 GenFPToFPCall(invoke, codegen_, kQuickCosh);
505 }
506
VisitMathExp(HInvoke * invoke)507 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
508 CreateFPToFPCallLocations(allocator_, invoke);
509 }
510
VisitMathExp(HInvoke * invoke)511 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
512 GenFPToFPCall(invoke, codegen_, kQuickExp);
513 }
514
VisitMathExpm1(HInvoke * invoke)515 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
516 CreateFPToFPCallLocations(allocator_, invoke);
517 }
518
VisitMathExpm1(HInvoke * invoke)519 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
520 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
521 }
522
VisitMathLog(HInvoke * invoke)523 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
524 CreateFPToFPCallLocations(allocator_, invoke);
525 }
526
VisitMathLog(HInvoke * invoke)527 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
528 GenFPToFPCall(invoke, codegen_, kQuickLog);
529 }
530
VisitMathLog10(HInvoke * invoke)531 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
532 CreateFPToFPCallLocations(allocator_, invoke);
533 }
534
VisitMathLog10(HInvoke * invoke)535 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
536 GenFPToFPCall(invoke, codegen_, kQuickLog10);
537 }
538
VisitMathSinh(HInvoke * invoke)539 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
540 CreateFPToFPCallLocations(allocator_, invoke);
541 }
542
VisitMathSinh(HInvoke * invoke)543 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
544 GenFPToFPCall(invoke, codegen_, kQuickSinh);
545 }
546
VisitMathTan(HInvoke * invoke)547 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
548 CreateFPToFPCallLocations(allocator_, invoke);
549 }
550
VisitMathTan(HInvoke * invoke)551 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
552 GenFPToFPCall(invoke, codegen_, kQuickTan);
553 }
554
VisitMathTanh(HInvoke * invoke)555 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
556 CreateFPToFPCallLocations(allocator_, invoke);
557 }
558
VisitMathTanh(HInvoke * invoke)559 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
560 GenFPToFPCall(invoke, codegen_, kQuickTanh);
561 }
562
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)563 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
564 LocationSummary* locations =
565 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
566 InvokeRuntimeCallingConvention calling_convention;
567 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
568 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
569 locations->SetOut(Location::FpuRegisterLocation(XMM0));
570
571 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
572 }
573
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)574 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
575 DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
576 LocationSummary* locations =
577 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
578 InvokeRuntimeCallingConvention calling_convention;
579 locations->SetInAt(0, Location::RequiresFpuRegister());
580 locations->SetInAt(1, Location::RequiresFpuRegister());
581 locations->SetInAt(2, Location::RequiresFpuRegister());
582 locations->SetOut(Location::SameAsFirstInput());
583 }
584
VisitMathAtan2(HInvoke * invoke)585 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
586 CreateFPFPToFPCallLocations(allocator_, invoke);
587 }
588
VisitMathAtan2(HInvoke * invoke)589 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
590 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
591 }
592
VisitMathPow(HInvoke * invoke)593 void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
594 CreateFPFPToFPCallLocations(allocator_, invoke);
595 }
596
VisitMathPow(HInvoke * invoke)597 void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
598 GenFPToFPCall(invoke, codegen_, kQuickPow);
599 }
600
VisitMathHypot(HInvoke * invoke)601 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
602 CreateFPFPToFPCallLocations(allocator_, invoke);
603 }
604
VisitMathHypot(HInvoke * invoke)605 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
606 GenFPToFPCall(invoke, codegen_, kQuickHypot);
607 }
608
VisitMathNextAfter(HInvoke * invoke)609 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
610 CreateFPFPToFPCallLocations(allocator_, invoke);
611 }
612
VisitMathNextAfter(HInvoke * invoke)613 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
614 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
615 }
616
CreateSystemArrayCopyLocations(HInvoke * invoke)617 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
618 // Check to see if we have known failures that will cause us to have to bail out
619 // to the runtime, and just generate the runtime call directly.
620 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
621 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
622
623 // The positions must be non-negative.
624 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
625 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
626 // We will have to fail anyways.
627 return;
628 }
629
630 // The length must be > 0.
631 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
632 if (length != nullptr) {
633 int32_t len = length->GetValue();
634 if (len < 0) {
635 // Just call as normal.
636 return;
637 }
638 }
639 LocationSummary* locations =
640 new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary
641 (invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
642 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
643 locations->SetInAt(0, Location::RequiresRegister());
644 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
645 locations->SetInAt(2, Location::RequiresRegister());
646 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
647 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
648
649 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
650 locations->AddTemp(Location::RegisterLocation(RSI));
651 locations->AddTemp(Location::RegisterLocation(RDI));
652 locations->AddTemp(Location::RegisterLocation(RCX));
653 }
654
CheckPosition(X86_64Assembler * assembler,Location pos,CpuRegister input,Location length,SlowPathCode * slow_path,CpuRegister temp,bool length_is_input_length=false)655 static void CheckPosition(X86_64Assembler* assembler,
656 Location pos,
657 CpuRegister input,
658 Location length,
659 SlowPathCode* slow_path,
660 CpuRegister temp,
661 bool length_is_input_length = false) {
662 // Where is the length in the Array?
663 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
664
665 if (pos.IsConstant()) {
666 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
667 if (pos_const == 0) {
668 if (!length_is_input_length) {
669 // Check that length(input) >= length.
670 if (length.IsConstant()) {
671 __ cmpl(Address(input, length_offset),
672 Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
673 } else {
674 __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>());
675 }
676 __ j(kLess, slow_path->GetEntryLabel());
677 }
678 } else {
679 // Check that length(input) >= pos.
680 __ movl(temp, Address(input, length_offset));
681 __ subl(temp, Immediate(pos_const));
682 __ j(kLess, slow_path->GetEntryLabel());
683
684 // Check that (length(input) - pos) >= length.
685 if (length.IsConstant()) {
686 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
687 } else {
688 __ cmpl(temp, length.AsRegister<CpuRegister>());
689 }
690 __ j(kLess, slow_path->GetEntryLabel());
691 }
692 } else if (length_is_input_length) {
693 // The only way the copy can succeed is if pos is zero.
694 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
695 __ testl(pos_reg, pos_reg);
696 __ j(kNotEqual, slow_path->GetEntryLabel());
697 } else {
698 // Check that pos >= 0.
699 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
700 __ testl(pos_reg, pos_reg);
701 __ j(kLess, slow_path->GetEntryLabel());
702
703 // Check that pos <= length(input).
704 __ cmpl(Address(input, length_offset), pos_reg);
705 __ j(kLess, slow_path->GetEntryLabel());
706
707 // Check that (length(input) - pos) >= length.
708 __ movl(temp, Address(input, length_offset));
709 __ subl(temp, pos_reg);
710 if (length.IsConstant()) {
711 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
712 } else {
713 __ cmpl(temp, length.AsRegister<CpuRegister>());
714 }
715 __ j(kLess, slow_path->GetEntryLabel());
716 }
717 }
718
SystemArrayCopyPrimitive(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,DataType::Type type)719 static void SystemArrayCopyPrimitive(HInvoke* invoke,
720 X86_64Assembler* assembler,
721 CodeGeneratorX86_64* codegen,
722 DataType::Type type) {
723 LocationSummary* locations = invoke->GetLocations();
724 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
725 Location src_pos = locations->InAt(1);
726 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
727 Location dest_pos = locations->InAt(3);
728 Location length = locations->InAt(4);
729
730 // Temporaries that we need for MOVSB/W/L.
731 CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
732 DCHECK_EQ(src_base.AsRegister(), RSI);
733 CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
734 DCHECK_EQ(dest_base.AsRegister(), RDI);
735 CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
736 DCHECK_EQ(count.AsRegister(), RCX);
737
738 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
739 codegen->AddSlowPath(slow_path);
740
741 // Bail out if the source and destination are the same.
742 __ cmpl(src, dest);
743 __ j(kEqual, slow_path->GetEntryLabel());
744
745 // Bail out if the source is null.
746 __ testl(src, src);
747 __ j(kEqual, slow_path->GetEntryLabel());
748
749 // Bail out if the destination is null.
750 __ testl(dest, dest);
751 __ j(kEqual, slow_path->GetEntryLabel());
752
753 // If the length is negative, bail out.
754 // We have already checked in the LocationsBuilder for the constant case.
755 if (!length.IsConstant()) {
756 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
757 __ j(kLess, slow_path->GetEntryLabel());
758 }
759
760 // Validity checks: source. Use src_base as a temporary register.
761 CheckPosition(assembler, src_pos, src, length, slow_path, src_base);
762
763 // Validity checks: dest. Use src_base as a temporary register.
764 CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base);
765
766 // We need the count in RCX.
767 if (length.IsConstant()) {
768 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
769 } else {
770 __ movl(count, length.AsRegister<CpuRegister>());
771 }
772
773 // Okay, everything checks out. Finally time to do the copy.
774 // Check assumption that sizeof(Char) is 2 (used in scaling below).
775 const size_t data_size = DataType::Size(type);
776 const ScaleFactor scale_factor = CodeGenerator::ScaleFactorForType(type);
777 const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
778
779 if (src_pos.IsConstant()) {
780 int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
781 __ leal(src_base, Address(src, data_size * src_pos_const + data_offset));
782 } else {
783 __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
784 }
785 if (dest_pos.IsConstant()) {
786 int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
787 __ leal(dest_base, Address(dest, data_size * dest_pos_const + data_offset));
788 } else {
789 __ leal(dest_base,
790 Address(dest, dest_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
791 }
792
793 // Do the move.
794 switch (type) {
795 case DataType::Type::kInt8:
796 __ rep_movsb();
797 break;
798 case DataType::Type::kUint16:
799 __ rep_movsw();
800 break;
801 case DataType::Type::kInt32:
802 __ rep_movsl();
803 break;
804 default:
805 LOG(FATAL) << "Unexpected data type for intrinsic";
806 }
807 __ Bind(slow_path->GetExitLabel());
808 }
809
VisitSystemArrayCopyChar(HInvoke * invoke)810 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
811 CreateSystemArrayCopyLocations(invoke);
812 }
VisitSystemArrayCopyChar(HInvoke * invoke)813 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
814 X86_64Assembler* assembler = GetAssembler();
815 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
816 }
817
VisitSystemArrayCopyByte(HInvoke * invoke)818 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
819 X86_64Assembler* assembler = GetAssembler();
820 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
821 }
822
VisitSystemArrayCopyByte(HInvoke * invoke)823 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
824 CreateSystemArrayCopyLocations(invoke);
825 }
826
VisitSystemArrayCopyInt(HInvoke * invoke)827 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
828 X86_64Assembler* assembler = GetAssembler();
829 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
830 }
831
VisitSystemArrayCopyInt(HInvoke * invoke)832 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
833 CreateSystemArrayCopyLocations(invoke);
834 }
835
VisitSystemArrayCopy(HInvoke * invoke)836 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
837 // The only read barrier implementation supporting the
838 // SystemArrayCopy intrinsic is the Baker-style read barriers.
839 if (gUseReadBarrier && !kUseBakerReadBarrier) {
840 return;
841 }
842
843 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
844 }
845
846 // Compute base source address, base destination address, and end
847 // source address for the System.arraycopy intrinsic in `src_base`,
848 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(X86_64Assembler * assembler,DataType::Type type,const CpuRegister & src,const Location & src_pos,const CpuRegister & dst,const Location & dst_pos,const Location & copy_length,const CpuRegister & src_base,const CpuRegister & dst_base,const CpuRegister & src_end)849 static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler,
850 DataType::Type type,
851 const CpuRegister& src,
852 const Location& src_pos,
853 const CpuRegister& dst,
854 const Location& dst_pos,
855 const Location& copy_length,
856 const CpuRegister& src_base,
857 const CpuRegister& dst_base,
858 const CpuRegister& src_end) {
859 // This routine is only used by the SystemArrayCopy intrinsic.
860 DCHECK_EQ(type, DataType::Type::kReference);
861 const int32_t element_size = DataType::Size(type);
862 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
863 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
864
865 if (src_pos.IsConstant()) {
866 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
867 __ leal(src_base, Address(src, element_size * constant + data_offset));
868 } else {
869 __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
870 }
871
872 if (dst_pos.IsConstant()) {
873 int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
874 __ leal(dst_base, Address(dst, element_size * constant + data_offset));
875 } else {
876 __ leal(dst_base, Address(dst, dst_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
877 }
878
879 if (copy_length.IsConstant()) {
880 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
881 __ leal(src_end, Address(src_base, element_size * constant));
882 } else {
883 __ leal(src_end, Address(src_base, copy_length.AsRegister<CpuRegister>(), scale_factor, 0));
884 }
885 }
886
VisitSystemArrayCopy(HInvoke * invoke)887 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
888 // The only read barrier implementation supporting the
889 // SystemArrayCopy intrinsic is the Baker-style read barriers.
890 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
891
892 X86_64Assembler* assembler = GetAssembler();
893 LocationSummary* locations = invoke->GetLocations();
894
895 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
896 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
897 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
898 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
899 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
900
901 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
902 Location src_pos = locations->InAt(1);
903 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
904 Location dest_pos = locations->InAt(3);
905 Location length = locations->InAt(4);
906 Location temp1_loc = locations->GetTemp(0);
907 CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
908 Location temp2_loc = locations->GetTemp(1);
909 CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
910 Location temp3_loc = locations->GetTemp(2);
911 CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
912 Location TMP_loc = Location::RegisterLocation(TMP);
913
914 SlowPathCode* intrinsic_slow_path =
915 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
916 codegen_->AddSlowPath(intrinsic_slow_path);
917
918 NearLabel conditions_on_positions_validated;
919 SystemArrayCopyOptimizations optimizations(invoke);
920
921 // If source and destination are the same, we go to slow path if we need to do
922 // forward copying.
923 if (src_pos.IsConstant()) {
924 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
925 if (dest_pos.IsConstant()) {
926 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
927 if (optimizations.GetDestinationIsSource()) {
928 // Checked when building locations.
929 DCHECK_GE(src_pos_constant, dest_pos_constant);
930 } else if (src_pos_constant < dest_pos_constant) {
931 __ cmpl(src, dest);
932 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
933 }
934 } else {
935 if (!optimizations.GetDestinationIsSource()) {
936 __ cmpl(src, dest);
937 __ j(kNotEqual, &conditions_on_positions_validated);
938 }
939 __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
940 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
941 }
942 } else {
943 if (!optimizations.GetDestinationIsSource()) {
944 __ cmpl(src, dest);
945 __ j(kNotEqual, &conditions_on_positions_validated);
946 }
947 if (dest_pos.IsConstant()) {
948 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
949 __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
950 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
951 } else {
952 __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
953 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
954 }
955 }
956
957 __ Bind(&conditions_on_positions_validated);
958
959 if (!optimizations.GetSourceIsNotNull()) {
960 // Bail out if the source is null.
961 __ testl(src, src);
962 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
963 }
964
965 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
966 // Bail out if the destination is null.
967 __ testl(dest, dest);
968 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
969 }
970
971 // If the length is negative, bail out.
972 // We have already checked in the LocationsBuilder for the constant case.
973 if (!length.IsConstant() &&
974 !optimizations.GetCountIsSourceLength() &&
975 !optimizations.GetCountIsDestinationLength()) {
976 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
977 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
978 }
979
980 // Validity checks: source.
981 CheckPosition(assembler,
982 src_pos,
983 src,
984 length,
985 intrinsic_slow_path,
986 temp1,
987 optimizations.GetCountIsSourceLength());
988
989 // Validity checks: dest.
990 CheckPosition(assembler,
991 dest_pos,
992 dest,
993 length,
994 intrinsic_slow_path,
995 temp1,
996 optimizations.GetCountIsDestinationLength());
997
998 if (!optimizations.GetDoesNotNeedTypeCheck()) {
999 // Check whether all elements of the source array are assignable to the component
1000 // type of the destination array. We do two checks: the classes are the same,
1001 // or the destination is Object[]. If none of these checks succeed, we go to the
1002 // slow path.
1003
1004 bool did_unpoison = false;
1005 if (gUseReadBarrier && kUseBakerReadBarrier) {
1006 // /* HeapReference<Class> */ temp1 = dest->klass_
1007 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1008 invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
1009 // Register `temp1` is not trashed by the read barrier emitted
1010 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1011 // method produces a call to a ReadBarrierMarkRegX entry point,
1012 // which saves all potentially live registers, including
1013 // temporaries such a `temp1`.
1014 // /* HeapReference<Class> */ temp2 = src->klass_
1015 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1016 invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
1017 // If heap poisoning is enabled, `temp1` and `temp2` have been
1018 // unpoisoned by the the previous calls to
1019 // GenerateFieldLoadWithBakerReadBarrier.
1020 } else {
1021 // /* HeapReference<Class> */ temp1 = dest->klass_
1022 __ movl(temp1, Address(dest, class_offset));
1023 // /* HeapReference<Class> */ temp2 = src->klass_
1024 __ movl(temp2, Address(src, class_offset));
1025 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1026 !optimizations.GetSourceIsNonPrimitiveArray()) {
1027 // One or two of the references need to be unpoisoned. Unpoison them
1028 // both to make the identity check valid.
1029 __ MaybeUnpoisonHeapReference(temp1);
1030 __ MaybeUnpoisonHeapReference(temp2);
1031 did_unpoison = true;
1032 }
1033 }
1034
1035 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1036 // Bail out if the destination is not a non primitive array.
1037 if (gUseReadBarrier && kUseBakerReadBarrier) {
1038 // /* HeapReference<Class> */ TMP = temp1->component_type_
1039 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1040 invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
1041 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1042 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1043 // If heap poisoning is enabled, `TMP` has been unpoisoned by
1044 // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1045 } else {
1046 // /* HeapReference<Class> */ TMP = temp1->component_type_
1047 __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1048 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1049 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1050 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1051 }
1052 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1053 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1054 }
1055
1056 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1057 // Bail out if the source is not a non primitive array.
1058 if (gUseReadBarrier && kUseBakerReadBarrier) {
1059 // For the same reason given earlier, `temp1` is not trashed by the
1060 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1061 // /* HeapReference<Class> */ TMP = temp2->component_type_
1062 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1063 invoke, TMP_loc, temp2, component_offset, /* needs_null_check= */ false);
1064 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1065 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1066 // If heap poisoning is enabled, `TMP` has been unpoisoned by
1067 // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1068 } else {
1069 // /* HeapReference<Class> */ TMP = temp2->component_type_
1070 __ movl(CpuRegister(TMP), Address(temp2, component_offset));
1071 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1072 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1073 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1074 }
1075 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1076 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1077 }
1078
1079 __ cmpl(temp1, temp2);
1080
1081 if (optimizations.GetDestinationIsTypedObjectArray()) {
1082 NearLabel do_copy;
1083 __ j(kEqual, &do_copy);
1084 if (gUseReadBarrier && kUseBakerReadBarrier) {
1085 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1086 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1087 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
1088 // We do not need to emit a read barrier for the following
1089 // heap reference load, as `temp1` is only used in a
1090 // comparison with null below, and this reference is not
1091 // kept afterwards.
1092 __ cmpl(Address(temp1, super_offset), Immediate(0));
1093 } else {
1094 if (!did_unpoison) {
1095 __ MaybeUnpoisonHeapReference(temp1);
1096 }
1097 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1098 __ movl(temp1, Address(temp1, component_offset));
1099 __ MaybeUnpoisonHeapReference(temp1);
1100 // No need to unpoison the following heap reference load, as
1101 // we're comparing against null.
1102 __ cmpl(Address(temp1, super_offset), Immediate(0));
1103 }
1104 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1105 __ Bind(&do_copy);
1106 } else {
1107 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1108 }
1109 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1110 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1111 // Bail out if the source is not a non primitive array.
1112 if (gUseReadBarrier && kUseBakerReadBarrier) {
1113 // /* HeapReference<Class> */ temp1 = src->klass_
1114 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1115 invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
1116 // /* HeapReference<Class> */ TMP = temp1->component_type_
1117 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1118 invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
1119 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1120 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1121 } else {
1122 // /* HeapReference<Class> */ temp1 = src->klass_
1123 __ movl(temp1, Address(src, class_offset));
1124 __ MaybeUnpoisonHeapReference(temp1);
1125 // /* HeapReference<Class> */ TMP = temp1->component_type_
1126 __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1127 // No need to unpoison `TMP` now, as we're comparing against null.
1128 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1129 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1130 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1131 }
1132 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1133 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1134 }
1135
1136 const DataType::Type type = DataType::Type::kReference;
1137 const int32_t element_size = DataType::Size(type);
1138
1139 // Compute base source address, base destination address, and end
1140 // source address in `temp1`, `temp2` and `temp3` respectively.
1141 GenSystemArrayCopyAddresses(
1142 GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3);
1143
1144 if (gUseReadBarrier && kUseBakerReadBarrier) {
1145 // SystemArrayCopy implementation for Baker read barriers (see
1146 // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
1147 //
1148 // if (src_ptr != end_ptr) {
1149 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1150 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
1151 // bool is_gray = (rb_state == ReadBarrier::GrayState());
1152 // if (is_gray) {
1153 // // Slow-path copy.
1154 // do {
1155 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1156 // } while (src_ptr != end_ptr)
1157 // } else {
1158 // // Fast-path copy.
1159 // do {
1160 // *dest_ptr++ = *src_ptr++;
1161 // } while (src_ptr != end_ptr)
1162 // }
1163 // }
1164
1165 NearLabel loop, done;
1166
1167 // Don't enter copy loop if `length == 0`.
1168 __ cmpl(temp1, temp3);
1169 __ j(kEqual, &done);
1170
1171 // Given the numeric representation, it's enough to check the low bit of the rb_state.
1172 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1173 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1174 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
1175 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
1176 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
1177
1178 // if (rb_state == ReadBarrier::GrayState())
1179 // goto slow_path;
1180 // At this point, just do the "if" and make sure that flags are preserved until the branch.
1181 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
1182
1183 // Load fence to prevent load-load reordering.
1184 // Note that this is a no-op, thanks to the x86-64 memory model.
1185 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
1186
1187 // Slow path used to copy array when `src` is gray.
1188 SlowPathCode* read_barrier_slow_path =
1189 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
1190 codegen_->AddSlowPath(read_barrier_slow_path);
1191
1192 // We have done the "if" of the gray bit check above, now branch based on the flags.
1193 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
1194
1195 // Fast-path copy.
1196 // Iterate over the arrays and do a raw copy of the objects. We don't need to
1197 // poison/unpoison.
1198 __ Bind(&loop);
1199 __ movl(CpuRegister(TMP), Address(temp1, 0));
1200 __ movl(Address(temp2, 0), CpuRegister(TMP));
1201 __ addl(temp1, Immediate(element_size));
1202 __ addl(temp2, Immediate(element_size));
1203 __ cmpl(temp1, temp3);
1204 __ j(kNotEqual, &loop);
1205
1206 __ Bind(read_barrier_slow_path->GetExitLabel());
1207 __ Bind(&done);
1208 } else {
1209 // Non read barrier code.
1210
1211 // Iterate over the arrays and do a raw copy of the objects. We don't need to
1212 // poison/unpoison.
1213 NearLabel loop, done;
1214 __ cmpl(temp1, temp3);
1215 __ j(kEqual, &done);
1216 __ Bind(&loop);
1217 __ movl(CpuRegister(TMP), Address(temp1, 0));
1218 __ movl(Address(temp2, 0), CpuRegister(TMP));
1219 __ addl(temp1, Immediate(element_size));
1220 __ addl(temp2, Immediate(element_size));
1221 __ cmpl(temp1, temp3);
1222 __ j(kNotEqual, &loop);
1223 __ Bind(&done);
1224 }
1225
1226 // We only need one card marking on the destination array.
1227 codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* emit_null_check= */ false);
1228
1229 __ Bind(intrinsic_slow_path->GetExitLabel());
1230 }
1231
VisitStringCompareTo(HInvoke * invoke)1232 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1233 LocationSummary* locations = new (allocator_) LocationSummary(
1234 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1235 InvokeRuntimeCallingConvention calling_convention;
1236 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1237 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1238 locations->SetOut(Location::RegisterLocation(RAX));
1239 }
1240
VisitStringCompareTo(HInvoke * invoke)1241 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1242 X86_64Assembler* assembler = GetAssembler();
1243 LocationSummary* locations = invoke->GetLocations();
1244
1245 // Note that the null check must have been done earlier.
1246 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1247
1248 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1249 __ testl(argument, argument);
1250 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1251 codegen_->AddSlowPath(slow_path);
1252 __ j(kEqual, slow_path->GetEntryLabel());
1253
1254 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1255 __ Bind(slow_path->GetExitLabel());
1256 }
1257
VisitStringEquals(HInvoke * invoke)1258 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1259 LocationSummary* locations =
1260 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1261 locations->SetInAt(0, Location::RequiresRegister());
1262 locations->SetInAt(1, Location::RequiresRegister());
1263
1264 // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1265 locations->AddTemp(Location::RegisterLocation(RCX));
1266 locations->AddTemp(Location::RegisterLocation(RDI));
1267
1268 // Set output, RSI needed for repe_cmpsq instruction anyways.
1269 locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1270 }
1271
VisitStringEquals(HInvoke * invoke)1272 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1273 X86_64Assembler* assembler = GetAssembler();
1274 LocationSummary* locations = invoke->GetLocations();
1275
1276 CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1277 CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1278 CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1279 CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1280 CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1281
1282 NearLabel end, return_true, return_false;
1283
1284 // Get offsets of count, value, and class fields within a string object.
1285 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1286 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1287 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1288
1289 // Note that the null check must have been done earlier.
1290 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1291
1292 StringEqualsOptimizations optimizations(invoke);
1293 if (!optimizations.GetArgumentNotNull()) {
1294 // Check if input is null, return false if it is.
1295 __ testl(arg, arg);
1296 __ j(kEqual, &return_false);
1297 }
1298
1299 if (!optimizations.GetArgumentIsString()) {
1300 // Instanceof check for the argument by comparing class fields.
1301 // All string objects must have the same type since String cannot be subclassed.
1302 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1303 // If the argument is a string object, its class field must be equal to receiver's class field.
1304 //
1305 // As the String class is expected to be non-movable, we can read the class
1306 // field from String.equals' arguments without read barriers.
1307 AssertNonMovableStringClass();
1308 // Also, because we use the loaded class references only to compare them, we
1309 // don't need to unpoison them.
1310 // /* HeapReference<Class> */ rcx = str->klass_
1311 __ movl(rcx, Address(str, class_offset));
1312 // if (rcx != /* HeapReference<Class> */ arg->klass_) return false
1313 __ cmpl(rcx, Address(arg, class_offset));
1314 __ j(kNotEqual, &return_false);
1315 }
1316
1317 // Reference equality check, return true if same reference.
1318 __ cmpl(str, arg);
1319 __ j(kEqual, &return_true);
1320
1321 // Load length and compression flag of receiver string.
1322 __ movl(rcx, Address(str, count_offset));
1323 // Check if lengths and compressiond flags are equal, return false if they're not.
1324 // Two identical strings will always have same compression style since
1325 // compression style is decided on alloc.
1326 __ cmpl(rcx, Address(arg, count_offset));
1327 __ j(kNotEqual, &return_false);
1328 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1329 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1330 "Expecting 0=compressed, 1=uncompressed");
1331 __ jrcxz(&return_true);
1332
1333 if (mirror::kUseStringCompression) {
1334 NearLabel string_uncompressed;
1335 // Extract length and differentiate between both compressed or both uncompressed.
1336 // Different compression style is cut above.
1337 __ shrl(rcx, Immediate(1));
1338 __ j(kCarrySet, &string_uncompressed);
1339 // Divide string length by 2, rounding up, and continue as if uncompressed.
1340 // Merge clearing the compression flag with +1 for rounding.
1341 __ addl(rcx, Immediate(1));
1342 __ shrl(rcx, Immediate(1));
1343 __ Bind(&string_uncompressed);
1344 }
1345 // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1346 __ leal(rsi, Address(str, value_offset));
1347 __ leal(rdi, Address(arg, value_offset));
1348
1349 // Divide string length by 4 and adjust for lengths not divisible by 4.
1350 __ addl(rcx, Immediate(3));
1351 __ shrl(rcx, Immediate(2));
1352
1353 // Assertions that must hold in order to compare strings 4 characters (uncompressed)
1354 // or 8 characters (compressed) at a time.
1355 DCHECK_ALIGNED(value_offset, 8);
1356 static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1357
1358 // Loop to compare strings four characters at a time starting at the beginning of the string.
1359 __ repe_cmpsq();
1360 // If strings are not equal, zero flag will be cleared.
1361 __ j(kNotEqual, &return_false);
1362
1363 // Return true and exit the function.
1364 // If loop does not result in returning false, we return true.
1365 __ Bind(&return_true);
1366 __ movl(rsi, Immediate(1));
1367 __ jmp(&end);
1368
1369 // Return false and exit the function.
1370 __ Bind(&return_false);
1371 __ xorl(rsi, rsi);
1372 __ Bind(&end);
1373 }
1374
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1375 static void CreateStringIndexOfLocations(HInvoke* invoke,
1376 ArenaAllocator* allocator,
1377 bool start_at_zero) {
1378 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1379 LocationSummary::kCallOnSlowPath,
1380 kIntrinsified);
1381 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1382 locations->SetInAt(0, Location::RegisterLocation(RDI));
1383 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1384 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1385 // of the instruction explicitly.
1386 // Note: This works as we don't clobber RAX anywhere.
1387 locations->SetInAt(1, Location::RegisterLocation(RAX));
1388 if (!start_at_zero) {
1389 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1390 }
1391 // As we clobber RDI during execution anyways, also use it as the output.
1392 locations->SetOut(Location::SameAsFirstInput());
1393
1394 // repne scasw uses RCX as the counter.
1395 locations->AddTemp(Location::RegisterLocation(RCX));
1396 // Need another temporary to be able to compute the result.
1397 locations->AddTemp(Location::RequiresRegister());
1398 }
1399
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,bool start_at_zero)1400 static void GenerateStringIndexOf(HInvoke* invoke,
1401 X86_64Assembler* assembler,
1402 CodeGeneratorX86_64* codegen,
1403 bool start_at_zero) {
1404 LocationSummary* locations = invoke->GetLocations();
1405
1406 // Note that the null check must have been done earlier.
1407 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1408
1409 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1410 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1411 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1412 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1413 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1414
1415 // Check our assumptions for registers.
1416 DCHECK_EQ(string_obj.AsRegister(), RDI);
1417 DCHECK_EQ(search_value.AsRegister(), RAX);
1418 DCHECK_EQ(counter.AsRegister(), RCX);
1419 DCHECK_EQ(out.AsRegister(), RDI);
1420
1421 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1422 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1423 SlowPathCode* slow_path = nullptr;
1424 HInstruction* code_point = invoke->InputAt(1);
1425 if (code_point->IsIntConstant()) {
1426 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1427 std::numeric_limits<uint16_t>::max()) {
1428 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1429 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1430 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1431 codegen->AddSlowPath(slow_path);
1432 __ jmp(slow_path->GetEntryLabel());
1433 __ Bind(slow_path->GetExitLabel());
1434 return;
1435 }
1436 } else if (code_point->GetType() != DataType::Type::kUint16) {
1437 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1438 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1439 codegen->AddSlowPath(slow_path);
1440 __ j(kAbove, slow_path->GetEntryLabel());
1441 }
1442
1443 // From here down, we know that we are looking for a char that fits in
1444 // 16 bits (uncompressed) or 8 bits (compressed).
1445 // Location of reference to data array within the String object.
1446 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1447 // Location of count within the String object.
1448 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1449
1450 // Load the count field of the string containing the length and compression flag.
1451 __ movl(string_length, Address(string_obj, count_offset));
1452
1453 // Do a zero-length check. Even with string compression `count == 0` means empty.
1454 // TODO: Support jecxz.
1455 NearLabel not_found_label;
1456 __ testl(string_length, string_length);
1457 __ j(kEqual, ¬_found_label);
1458
1459 if (mirror::kUseStringCompression) {
1460 // Use TMP to keep string_length_flagged.
1461 __ movl(CpuRegister(TMP), string_length);
1462 // Mask out first bit used as compression flag.
1463 __ shrl(string_length, Immediate(1));
1464 }
1465
1466 if (start_at_zero) {
1467 // Number of chars to scan is the same as the string length.
1468 __ movl(counter, string_length);
1469 // Move to the start of the string.
1470 __ addq(string_obj, Immediate(value_offset));
1471 } else {
1472 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1473
1474 // Do a start_index check.
1475 __ cmpl(start_index, string_length);
1476 __ j(kGreaterEqual, ¬_found_label);
1477
1478 // Ensure we have a start index >= 0;
1479 __ xorl(counter, counter);
1480 __ cmpl(start_index, Immediate(0));
1481 __ cmov(kGreater, counter, start_index, /* is64bit= */ false); // 32-bit copy is enough.
1482
1483 if (mirror::kUseStringCompression) {
1484 NearLabel modify_counter, offset_uncompressed_label;
1485 __ testl(CpuRegister(TMP), Immediate(1));
1486 __ j(kNotZero, &offset_uncompressed_label);
1487 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1488 __ jmp(&modify_counter);
1489 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1490 __ Bind(&offset_uncompressed_label);
1491 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1492 __ Bind(&modify_counter);
1493 } else {
1494 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1495 }
1496 // Now update ecx, the work counter: it's gonna be string.length - start_index.
1497 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
1498 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1499 }
1500
1501 if (mirror::kUseStringCompression) {
1502 NearLabel uncompressed_string_comparison;
1503 NearLabel comparison_done;
1504 __ testl(CpuRegister(TMP), Immediate(1));
1505 __ j(kNotZero, &uncompressed_string_comparison);
1506 // Check if RAX (search_value) is ASCII.
1507 __ cmpl(search_value, Immediate(127));
1508 __ j(kGreater, ¬_found_label);
1509 // Comparing byte-per-byte.
1510 __ repne_scasb();
1511 __ jmp(&comparison_done);
1512 // Everything is set up for repne scasw:
1513 // * Comparison address in RDI.
1514 // * Counter in ECX.
1515 __ Bind(&uncompressed_string_comparison);
1516 __ repne_scasw();
1517 __ Bind(&comparison_done);
1518 } else {
1519 __ repne_scasw();
1520 }
1521 // Did we find a match?
1522 __ j(kNotEqual, ¬_found_label);
1523
1524 // Yes, we matched. Compute the index of the result.
1525 __ subl(string_length, counter);
1526 __ leal(out, Address(string_length, -1));
1527
1528 NearLabel done;
1529 __ jmp(&done);
1530
1531 // Failed to match; return -1.
1532 __ Bind(¬_found_label);
1533 __ movl(out, Immediate(-1));
1534
1535 // And join up at the end.
1536 __ Bind(&done);
1537 if (slow_path != nullptr) {
1538 __ Bind(slow_path->GetExitLabel());
1539 }
1540 }
1541
VisitStringIndexOf(HInvoke * invoke)1542 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1543 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1544 }
1545
VisitStringIndexOf(HInvoke * invoke)1546 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1547 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1548 }
1549
VisitStringIndexOfAfter(HInvoke * invoke)1550 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1551 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1552 }
1553
VisitStringIndexOfAfter(HInvoke * invoke)1554 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1555 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1556 }
1557
VisitStringNewStringFromBytes(HInvoke * invoke)1558 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1559 LocationSummary* locations = new (allocator_) LocationSummary(
1560 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1561 InvokeRuntimeCallingConvention calling_convention;
1562 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1563 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1564 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1565 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1566 locations->SetOut(Location::RegisterLocation(RAX));
1567 }
1568
VisitStringNewStringFromBytes(HInvoke * invoke)1569 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1570 X86_64Assembler* assembler = GetAssembler();
1571 LocationSummary* locations = invoke->GetLocations();
1572
1573 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1574 __ testl(byte_array, byte_array);
1575 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1576 codegen_->AddSlowPath(slow_path);
1577 __ j(kEqual, slow_path->GetEntryLabel());
1578
1579 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1580 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1581 __ Bind(slow_path->GetExitLabel());
1582 }
1583
VisitStringNewStringFromChars(HInvoke * invoke)1584 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1585 LocationSummary* locations =
1586 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1587 InvokeRuntimeCallingConvention calling_convention;
1588 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1589 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1590 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1591 locations->SetOut(Location::RegisterLocation(RAX));
1592 }
1593
VisitStringNewStringFromChars(HInvoke * invoke)1594 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1595 // No need to emit code checking whether `locations->InAt(2)` is a null
1596 // pointer, as callers of the native method
1597 //
1598 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1599 //
1600 // all include a null check on `data` before calling that method.
1601 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1602 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1603 }
1604
VisitStringNewStringFromString(HInvoke * invoke)1605 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1606 LocationSummary* locations = new (allocator_) LocationSummary(
1607 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1608 InvokeRuntimeCallingConvention calling_convention;
1609 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1610 locations->SetOut(Location::RegisterLocation(RAX));
1611 }
1612
VisitStringNewStringFromString(HInvoke * invoke)1613 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1614 X86_64Assembler* assembler = GetAssembler();
1615 LocationSummary* locations = invoke->GetLocations();
1616
1617 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1618 __ testl(string_to_copy, string_to_copy);
1619 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1620 codegen_->AddSlowPath(slow_path);
1621 __ j(kEqual, slow_path->GetEntryLabel());
1622
1623 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1624 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1625 __ Bind(slow_path->GetExitLabel());
1626 }
1627
VisitStringGetCharsNoCheck(HInvoke * invoke)1628 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1629 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1630 LocationSummary* locations =
1631 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1632 locations->SetInAt(0, Location::RequiresRegister());
1633 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1634 locations->SetInAt(2, Location::RequiresRegister());
1635 locations->SetInAt(3, Location::RequiresRegister());
1636 locations->SetInAt(4, Location::RequiresRegister());
1637
1638 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1639 locations->AddTemp(Location::RegisterLocation(RSI));
1640 locations->AddTemp(Location::RegisterLocation(RDI));
1641 locations->AddTemp(Location::RegisterLocation(RCX));
1642 }
1643
VisitStringGetCharsNoCheck(HInvoke * invoke)1644 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1645 X86_64Assembler* assembler = GetAssembler();
1646 LocationSummary* locations = invoke->GetLocations();
1647
1648 size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1649 // Location of data in char array buffer.
1650 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1651 // Location of char array data in string.
1652 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1653
1654 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1655 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1656 Location srcBegin = locations->InAt(1);
1657 int srcBegin_value =
1658 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1659 CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1660 CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1661 CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1662
1663 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1664 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1665 DCHECK_EQ(char_size, 2u);
1666
1667 NearLabel done;
1668 // Compute the number of chars (words) to move.
1669 __ movl(CpuRegister(RCX), srcEnd);
1670 if (srcBegin.IsConstant()) {
1671 __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1672 } else {
1673 DCHECK(srcBegin.IsRegister());
1674 __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1675 }
1676 if (mirror::kUseStringCompression) {
1677 NearLabel copy_uncompressed, copy_loop;
1678 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1679 DCHECK_EQ(c_char_size, 1u);
1680 // Location of count in string.
1681 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1682
1683 __ testl(Address(obj, count_offset), Immediate(1));
1684 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1685 "Expecting 0=compressed, 1=uncompressed");
1686 __ j(kNotZero, ©_uncompressed);
1687 // Compute the address of the source string by adding the number of chars from
1688 // the source beginning to the value offset of a string.
1689 __ leaq(CpuRegister(RSI),
1690 CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1691 // Start the loop to copy String's value to Array of Char.
1692 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1693
1694 __ Bind(©_loop);
1695 __ jrcxz(&done);
1696 // Use TMP as temporary (convert byte from RSI to word).
1697 // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
1698 __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
1699 __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
1700 __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
1701 __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
1702 // TODO: Add support for LOOP to X86_64Assembler.
1703 __ subl(CpuRegister(RCX), Immediate(1));
1704 __ jmp(©_loop);
1705
1706 __ Bind(©_uncompressed);
1707 }
1708
1709 __ leaq(CpuRegister(RSI),
1710 CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1711 // Compute the address of the destination buffer.
1712 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1713 // Do the move.
1714 __ rep_movsw();
1715
1716 __ Bind(&done);
1717 }
1718
GenPeek(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1719 static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1720 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1721 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1722 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1723 // to avoid a SIGBUS.
1724 switch (size) {
1725 case DataType::Type::kInt8:
1726 __ movsxb(out, Address(address, 0));
1727 break;
1728 case DataType::Type::kInt16:
1729 __ movsxw(out, Address(address, 0));
1730 break;
1731 case DataType::Type::kInt32:
1732 __ movl(out, Address(address, 0));
1733 break;
1734 case DataType::Type::kInt64:
1735 __ movq(out, Address(address, 0));
1736 break;
1737 default:
1738 LOG(FATAL) << "Type not recognized for peek: " << size;
1739 UNREACHABLE();
1740 }
1741 }
1742
VisitMemoryPeekByte(HInvoke * invoke)1743 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1744 CreateIntToIntLocations(allocator_, invoke);
1745 }
1746
VisitMemoryPeekByte(HInvoke * invoke)1747 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1748 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1749 }
1750
VisitMemoryPeekIntNative(HInvoke * invoke)1751 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1752 CreateIntToIntLocations(allocator_, invoke);
1753 }
1754
VisitMemoryPeekIntNative(HInvoke * invoke)1755 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1756 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1757 }
1758
VisitMemoryPeekLongNative(HInvoke * invoke)1759 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1760 CreateIntToIntLocations(allocator_, invoke);
1761 }
1762
VisitMemoryPeekLongNative(HInvoke * invoke)1763 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1764 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1765 }
1766
VisitMemoryPeekShortNative(HInvoke * invoke)1767 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1768 CreateIntToIntLocations(allocator_, invoke);
1769 }
1770
VisitMemoryPeekShortNative(HInvoke * invoke)1771 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1772 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1773 }
1774
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)1775 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1776 LocationSummary* locations =
1777 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1778 locations->SetInAt(0, Location::RequiresRegister());
1779 locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
1780 }
1781
GenPoke(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1782 static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1783 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1784 Location value = locations->InAt(1);
1785 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1786 // to avoid a SIGBUS.
1787 switch (size) {
1788 case DataType::Type::kInt8:
1789 if (value.IsConstant()) {
1790 __ movb(Address(address, 0),
1791 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1792 } else {
1793 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1794 }
1795 break;
1796 case DataType::Type::kInt16:
1797 if (value.IsConstant()) {
1798 __ movw(Address(address, 0),
1799 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1800 } else {
1801 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1802 }
1803 break;
1804 case DataType::Type::kInt32:
1805 if (value.IsConstant()) {
1806 __ movl(Address(address, 0),
1807 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1808 } else {
1809 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1810 }
1811 break;
1812 case DataType::Type::kInt64:
1813 if (value.IsConstant()) {
1814 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1815 DCHECK(IsInt<32>(v));
1816 int32_t v_32 = v;
1817 __ movq(Address(address, 0), Immediate(v_32));
1818 } else {
1819 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1820 }
1821 break;
1822 default:
1823 LOG(FATAL) << "Type not recognized for poke: " << size;
1824 UNREACHABLE();
1825 }
1826 }
1827
VisitMemoryPokeByte(HInvoke * invoke)1828 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1829 CreateIntIntToVoidLocations(allocator_, invoke);
1830 }
1831
VisitMemoryPokeByte(HInvoke * invoke)1832 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1833 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1834 }
1835
VisitMemoryPokeIntNative(HInvoke * invoke)1836 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1837 CreateIntIntToVoidLocations(allocator_, invoke);
1838 }
1839
VisitMemoryPokeIntNative(HInvoke * invoke)1840 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1841 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1842 }
1843
VisitMemoryPokeLongNative(HInvoke * invoke)1844 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1845 CreateIntIntToVoidLocations(allocator_, invoke);
1846 }
1847
VisitMemoryPokeLongNative(HInvoke * invoke)1848 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1849 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1850 }
1851
VisitMemoryPokeShortNative(HInvoke * invoke)1852 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1853 CreateIntIntToVoidLocations(allocator_, invoke);
1854 }
1855
VisitMemoryPokeShortNative(HInvoke * invoke)1856 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1857 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1858 }
1859
VisitThreadCurrentThread(HInvoke * invoke)1860 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1861 LocationSummary* locations =
1862 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1863 locations->SetOut(Location::RequiresRegister());
1864 }
1865
VisitThreadCurrentThread(HInvoke * invoke)1866 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1867 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1868 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
1869 /* no_rip= */ true));
1870 }
1871
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile ATTRIBUTE_UNUSED,CodeGeneratorX86_64 * codegen)1872 static void GenUnsafeGet(HInvoke* invoke,
1873 DataType::Type type,
1874 bool is_volatile ATTRIBUTE_UNUSED,
1875 CodeGeneratorX86_64* codegen) {
1876 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1877 LocationSummary* locations = invoke->GetLocations();
1878 Location base_loc = locations->InAt(1);
1879 CpuRegister base = base_loc.AsRegister<CpuRegister>();
1880 Location offset_loc = locations->InAt(2);
1881 CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1882 Location output_loc = locations->Out();
1883 CpuRegister output = output_loc.AsRegister<CpuRegister>();
1884
1885 switch (type) {
1886 case DataType::Type::kInt32:
1887 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1888 break;
1889
1890 case DataType::Type::kReference: {
1891 if (gUseReadBarrier) {
1892 if (kUseBakerReadBarrier) {
1893 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1894 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1895 invoke, output_loc, base, src, /* needs_null_check= */ false);
1896 } else {
1897 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1898 codegen->GenerateReadBarrierSlow(
1899 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1900 }
1901 } else {
1902 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1903 __ MaybeUnpoisonHeapReference(output);
1904 }
1905 break;
1906 }
1907
1908 case DataType::Type::kInt64:
1909 __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1910 break;
1911
1912 default:
1913 LOG(FATAL) << "Unsupported op size " << type;
1914 UNREACHABLE();
1915 }
1916 }
1917
UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic)1918 static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
1919 switch (intrinsic) {
1920 case Intrinsics::kUnsafeGetObject:
1921 case Intrinsics::kUnsafeGetObjectVolatile:
1922 case Intrinsics::kJdkUnsafeGetObject:
1923 case Intrinsics::kJdkUnsafeGetObjectVolatile:
1924 case Intrinsics::kJdkUnsafeGetObjectAcquire:
1925 return true;
1926 default:
1927 break;
1928 }
1929 return false;
1930 }
1931
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)1932 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1933 bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
1934 LocationSummary* locations =
1935 new (allocator) LocationSummary(invoke,
1936 can_call
1937 ? LocationSummary::kCallOnSlowPath
1938 : LocationSummary::kNoCall,
1939 kIntrinsified);
1940 if (can_call && kUseBakerReadBarrier) {
1941 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1942 }
1943 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1944 locations->SetInAt(1, Location::RequiresRegister());
1945 locations->SetInAt(2, Location::RequiresRegister());
1946 locations->SetOut(Location::RequiresRegister(),
1947 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1948 }
1949
VisitUnsafeGet(HInvoke * invoke)1950 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1951 VisitJdkUnsafeGet(invoke);
1952 }
VisitUnsafeGetVolatile(HInvoke * invoke)1953 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1954 VisitJdkUnsafeGetVolatile(invoke);
1955 }
VisitUnsafeGetLong(HInvoke * invoke)1956 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1957 VisitJdkUnsafeGetLong(invoke);
1958 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1959 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1960 VisitJdkUnsafeGetLongVolatile(invoke);
1961 }
VisitUnsafeGetObject(HInvoke * invoke)1962 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1963 VisitJdkUnsafeGetObject(invoke);
1964 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1965 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1966 VisitJdkUnsafeGetObjectVolatile(invoke);
1967 }
1968
VisitJdkUnsafeGet(HInvoke * invoke)1969 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
1970 CreateIntIntIntToIntLocations(allocator_, invoke);
1971 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1972 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1973 CreateIntIntIntToIntLocations(allocator_, invoke);
1974 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1975 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1976 CreateIntIntIntToIntLocations(allocator_, invoke);
1977 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1978 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1979 CreateIntIntIntToIntLocations(allocator_, invoke);
1980 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1981 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1982 CreateIntIntIntToIntLocations(allocator_, invoke);
1983 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1984 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1985 CreateIntIntIntToIntLocations(allocator_, invoke);
1986 }
VisitJdkUnsafeGetObject(HInvoke * invoke)1987 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetObject(HInvoke* invoke) {
1988 CreateIntIntIntToIntLocations(allocator_, invoke);
1989 }
VisitJdkUnsafeGetObjectVolatile(HInvoke * invoke)1990 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
1991 CreateIntIntIntToIntLocations(allocator_, invoke);
1992 }
VisitJdkUnsafeGetObjectAcquire(HInvoke * invoke)1993 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
1994 CreateIntIntIntToIntLocations(allocator_, invoke);
1995 }
1996
1997
VisitUnsafeGet(HInvoke * invoke)1998 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
1999 VisitJdkUnsafeGet(invoke);
2000 }
VisitUnsafeGetVolatile(HInvoke * invoke)2001 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2002 VisitJdkUnsafeGetVolatile(invoke);
2003 }
VisitUnsafeGetLong(HInvoke * invoke)2004 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2005 VisitJdkUnsafeGetLong(invoke);
2006 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2007 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2008 VisitJdkUnsafeGetLongVolatile(invoke);
2009 }
VisitUnsafeGetObject(HInvoke * invoke)2010 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2011 VisitJdkUnsafeGetObject(invoke);
2012 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2013 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2014 VisitJdkUnsafeGetObjectVolatile(invoke);
2015 }
2016
VisitJdkUnsafeGet(HInvoke * invoke)2017 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
2018 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2019 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2020 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2021 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2022 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2023 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2024 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2025 }
VisitJdkUnsafeGetLong(HInvoke * invoke)2026 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2027 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2028 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2029 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2030 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2031 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2032 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2033 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2034 }
VisitJdkUnsafeGetObject(HInvoke * invoke)2035 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetObject(HInvoke* invoke) {
2036 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2037 }
VisitJdkUnsafeGetObjectVolatile(HInvoke * invoke)2038 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
2039 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2040 }
VisitJdkUnsafeGetObjectAcquire(HInvoke * invoke)2041 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
2042 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2043 }
2044
2045
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2046 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
2047 DataType::Type type,
2048 HInvoke* invoke) {
2049 LocationSummary* locations =
2050 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2051 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2052 locations->SetInAt(1, Location::RequiresRegister());
2053 locations->SetInAt(2, Location::RequiresRegister());
2054 locations->SetInAt(3, Location::RequiresRegister());
2055 if (type == DataType::Type::kReference) {
2056 // Need temp registers for card-marking.
2057 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2058 locations->AddTemp(Location::RequiresRegister());
2059 }
2060 }
2061
VisitUnsafePut(HInvoke * invoke)2062 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
2063 VisitJdkUnsafePut(invoke);
2064 }
VisitUnsafePutOrdered(HInvoke * invoke)2065 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2066 VisitJdkUnsafePutOrdered(invoke);
2067 }
VisitUnsafePutVolatile(HInvoke * invoke)2068 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2069 VisitJdkUnsafePutVolatile(invoke);
2070 }
VisitUnsafePutObject(HInvoke * invoke)2071 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2072 VisitJdkUnsafePutObject(invoke);
2073 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2074 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2075 VisitJdkUnsafePutObjectOrdered(invoke);
2076 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2077 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2078 VisitJdkUnsafePutObjectVolatile(invoke);
2079 }
VisitUnsafePutLong(HInvoke * invoke)2080 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2081 VisitJdkUnsafePutLong(invoke);
2082 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2083 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2084 VisitJdkUnsafePutLongOrdered(invoke);
2085 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2086 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2087 VisitJdkUnsafePutLongVolatile(invoke);
2088 }
2089
VisitJdkUnsafePut(HInvoke * invoke)2090 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2091 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2092 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2093 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2094 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2095 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2096 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2097 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2098 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2099 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2100 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2101 }
VisitJdkUnsafePutObject(HInvoke * invoke)2102 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObject(HInvoke* invoke) {
2103 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2104 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2105 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2106 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2107 }
VisitJdkUnsafePutObjectVolatile(HInvoke * invoke)2108 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
2109 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2110 }
VisitJdkUnsafePutObjectRelease(HInvoke * invoke)2111 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
2112 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2113 }
VisitJdkUnsafePutLong(HInvoke * invoke)2114 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2115 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2116 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2117 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2118 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2119 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2120 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2121 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2122 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2123 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2124 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2125 }
2126
2127 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2128 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2129 static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile,
2130 CodeGeneratorX86_64* codegen) {
2131 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2132 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2133 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2134 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2135
2136 if (type == DataType::Type::kInt64) {
2137 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2138 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2139 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2140 __ movl(temp, value);
2141 __ PoisonHeapReference(temp);
2142 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2143 } else {
2144 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2145 }
2146
2147 if (is_volatile) {
2148 codegen->MemoryFence();
2149 }
2150
2151 if (type == DataType::Type::kReference) {
2152 bool value_can_be_null = true; // TODO: Worth finding out this information?
2153 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2154 locations->GetTemp(1).AsRegister<CpuRegister>(),
2155 base,
2156 value,
2157 value_can_be_null);
2158 }
2159 }
2160
VisitUnsafePut(HInvoke * invoke)2161 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
2162 VisitJdkUnsafePut(invoke);
2163 }
VisitUnsafePutOrdered(HInvoke * invoke)2164 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2165 VisitJdkUnsafePutOrdered(invoke);
2166 }
VisitUnsafePutVolatile(HInvoke * invoke)2167 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2168 VisitJdkUnsafePutVolatile(invoke);
2169 }
VisitUnsafePutObject(HInvoke * invoke)2170 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2171 VisitJdkUnsafePutObject(invoke);
2172 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2173 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2174 VisitJdkUnsafePutObjectOrdered(invoke);
2175 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2176 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2177 VisitJdkUnsafePutObjectVolatile(invoke);
2178 }
VisitUnsafePutLong(HInvoke * invoke)2179 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2180 VisitJdkUnsafePutLong(invoke);
2181 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2182 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2183 VisitJdkUnsafePutLongOrdered(invoke);
2184 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2185 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2186 VisitJdkUnsafePutLongVolatile(invoke);
2187 }
2188
VisitJdkUnsafePut(HInvoke * invoke)2189 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2190 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2191 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2192 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2193 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2194 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2195 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2196 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2197 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2198 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2199 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
2200 }
VisitJdkUnsafePutObject(HInvoke * invoke)2201 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObject(HInvoke* invoke) {
2202 GenUnsafePut(
2203 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2204 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2205 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2206 GenUnsafePut(
2207 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2208 }
VisitJdkUnsafePutObjectVolatile(HInvoke * invoke)2209 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
2210 GenUnsafePut(
2211 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2212 }
VisitJdkUnsafePutObjectRelease(HInvoke * invoke)2213 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
2214 GenUnsafePut(
2215 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2216 }
VisitJdkUnsafePutLong(HInvoke * invoke)2217 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2218 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2219 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2220 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2221 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2222 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2223 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2224 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2225 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2226 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2227 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2228 }
2229
CreateUnsafeCASLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2230 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
2231 DataType::Type type,
2232 HInvoke* invoke) {
2233 const bool can_call = gUseReadBarrier &&
2234 kUseBakerReadBarrier &&
2235 IsUnsafeCASObject(invoke);
2236 LocationSummary* locations =
2237 new (allocator) LocationSummary(invoke,
2238 can_call
2239 ? LocationSummary::kCallOnSlowPath
2240 : LocationSummary::kNoCall,
2241 kIntrinsified);
2242 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2243 locations->SetInAt(1, Location::RequiresRegister());
2244 locations->SetInAt(2, Location::RequiresRegister());
2245 // expected value must be in EAX/RAX.
2246 locations->SetInAt(3, Location::RegisterLocation(RAX));
2247 locations->SetInAt(4, Location::RequiresRegister());
2248
2249 // RAX is clobbered in CMPXCHG, but we set it as out so no need to add it as temporary.
2250 locations->SetOut(Location::RegisterLocation(RAX));
2251
2252 if (type == DataType::Type::kReference) {
2253 // Need two temporaries for MarkGCCard.
2254 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2255 locations->AddTemp(Location::RequiresRegister());
2256 if (gUseReadBarrier) {
2257 // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
2258 DCHECK(kUseBakerReadBarrier);
2259 locations->AddTemp(Location::RequiresRegister());
2260 }
2261 }
2262 }
2263
VisitUnsafeCASInt(HInvoke * invoke)2264 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2265 VisitJdkUnsafeCASInt(invoke);
2266 }
2267
VisitUnsafeCASLong(HInvoke * invoke)2268 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2269 VisitJdkUnsafeCASLong(invoke);
2270 }
2271
VisitUnsafeCASObject(HInvoke * invoke)2272 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2273 VisitJdkUnsafeCASObject(invoke);
2274 }
2275
VisitJdkUnsafeCASInt(HInvoke * invoke)2276 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2277 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2278 VisitJdkUnsafeCompareAndSetInt(invoke);
2279 }
2280
VisitJdkUnsafeCASLong(HInvoke * invoke)2281 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2282 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2283 VisitJdkUnsafeCompareAndSetLong(invoke);
2284 }
2285
VisitJdkUnsafeCASObject(HInvoke * invoke)2286 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2287 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2288 VisitJdkUnsafeCompareAndSetObject(invoke);
2289 }
2290
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2291 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2292 CreateUnsafeCASLocations(allocator_, DataType::Type::kInt32, invoke);
2293 }
2294
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2295 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2296 CreateUnsafeCASLocations(allocator_, DataType::Type::kInt64, invoke);
2297 }
2298
VisitJdkUnsafeCompareAndSetObject(HInvoke * invoke)2299 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
2300 // The only supported read barrier implementation is the Baker-style read barriers.
2301 if (gUseReadBarrier && !kUseBakerReadBarrier) {
2302 return;
2303 }
2304
2305 CreateUnsafeCASLocations(allocator_, DataType::Type::kReference, invoke);
2306 }
2307
2308 // Convert ZF into the Boolean result.
GenZFlagToResult(X86_64Assembler * assembler,CpuRegister out)2309 static inline void GenZFlagToResult(X86_64Assembler* assembler, CpuRegister out) {
2310 __ setcc(kZero, out);
2311 __ movzxb(out, out);
2312 }
2313
2314 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64 * codegen,DataType::Type type,Address field_addr,Location value,bool is_cmpxchg,bool byte_swap)2315 static void GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64* codegen,
2316 DataType::Type type,
2317 Address field_addr,
2318 Location value,
2319 bool is_cmpxchg,
2320 bool byte_swap) {
2321 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2322 InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2323
2324 if (byte_swap) {
2325 instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2326 instr_codegen->Bswap(value, type);
2327 }
2328
2329 switch (type) {
2330 case DataType::Type::kBool:
2331 case DataType::Type::kInt8:
2332 __ LockCmpxchgb(field_addr, value.AsRegister<CpuRegister>());
2333 break;
2334 case DataType::Type::kInt16:
2335 case DataType::Type::kUint16:
2336 __ LockCmpxchgw(field_addr, value.AsRegister<CpuRegister>());
2337 break;
2338 case DataType::Type::kInt32:
2339 case DataType::Type::kUint32:
2340 __ LockCmpxchgl(field_addr, value.AsRegister<CpuRegister>());
2341 break;
2342 case DataType::Type::kInt64:
2343 case DataType::Type::kUint64:
2344 __ LockCmpxchgq(field_addr, value.AsRegister<CpuRegister>());
2345 break;
2346 default:
2347 LOG(FATAL) << "Unexpected non-integral CAS type " << type;
2348 }
2349 // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2350
2351 if (byte_swap) {
2352 // Restore byte order for value.
2353 instr_codegen->Bswap(value, type);
2354 }
2355
2356 CpuRegister rax(RAX);
2357 if (is_cmpxchg) {
2358 if (byte_swap) {
2359 instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2360 }
2361 // Sign-extend or zero-extend the result as necessary.
2362 switch (type) {
2363 case DataType::Type::kBool:
2364 __ movzxb(rax, rax);
2365 break;
2366 case DataType::Type::kInt8:
2367 __ movsxb(rax, rax);
2368 break;
2369 case DataType::Type::kInt16:
2370 __ movsxw(rax, rax);
2371 break;
2372 case DataType::Type::kUint16:
2373 __ movzxw(rax, rax);
2374 break;
2375 default:
2376 break; // No need to do anything.
2377 }
2378 } else {
2379 GenZFlagToResult(assembler, rax);
2380 }
2381 }
2382
GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64 * codegen,Address field_addr,CpuRegister temp,Location value,Location expected,Location out,bool is64bit,bool is_cmpxchg,bool byte_swap)2383 static void GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64* codegen,
2384 Address field_addr,
2385 CpuRegister temp,
2386 Location value,
2387 Location expected,
2388 Location out,
2389 bool is64bit,
2390 bool is_cmpxchg,
2391 bool byte_swap) {
2392 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2393 InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2394
2395 Location rax_loc = Location::RegisterLocation(RAX);
2396 Location temp_loc = Location::RegisterLocation(temp.AsRegister());
2397
2398 DataType::Type type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
2399
2400 // Copy `expected` to RAX (required by the CMPXCHG instruction).
2401 codegen->Move(rax_loc, expected);
2402
2403 // Copy value to some other register (ensure it's not RAX).
2404 DCHECK_NE(temp.AsRegister(), RAX);
2405 codegen->Move(temp_loc, value);
2406
2407 if (byte_swap) {
2408 instr_codegen->Bswap(rax_loc, type);
2409 instr_codegen->Bswap(temp_loc, type);
2410 }
2411
2412 if (is64bit) {
2413 __ LockCmpxchgq(field_addr, temp);
2414 } else {
2415 __ LockCmpxchgl(field_addr, temp);
2416 }
2417 // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2418 // No need to restore byte order for temporary register.
2419
2420 if (is_cmpxchg) {
2421 if (byte_swap) {
2422 instr_codegen->Bswap(rax_loc, type);
2423 }
2424 __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
2425 } else {
2426 GenZFlagToResult(assembler, out.AsRegister<CpuRegister>());
2427 }
2428 }
2429
2430 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64 * codegen,HInvoke * invoke,CpuRegister base,CpuRegister offset,CpuRegister value,CpuRegister temp1,CpuRegister temp2,CpuRegister temp3,bool is_cmpxchg)2431 static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
2432 HInvoke* invoke,
2433 CpuRegister base,
2434 CpuRegister offset,
2435 CpuRegister value,
2436 CpuRegister temp1,
2437 CpuRegister temp2,
2438 CpuRegister temp3,
2439 bool is_cmpxchg) {
2440 // The only supported read barrier implementation is the Baker-style read barriers.
2441 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
2442
2443 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2444
2445 // Mark card for object assuming new value is stored.
2446 bool value_can_be_null = true; // TODO: Worth finding out this information?
2447 codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
2448
2449 Address field_addr(base, offset, TIMES_1, 0);
2450 if (gUseReadBarrier && kUseBakerReadBarrier) {
2451 // Need to make sure the reference stored in the field is a to-space
2452 // one before attempting the CAS or the CAS could fail incorrectly.
2453 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2454 invoke,
2455 Location::RegisterLocation(temp3.AsRegister()),
2456 base,
2457 field_addr,
2458 /* needs_null_check= */ false,
2459 /* always_update_field= */ true,
2460 &temp1,
2461 &temp2);
2462 } else {
2463 // Nothing to do, the value will be loaded into the out register by CMPXCHG.
2464 }
2465
2466 bool base_equals_value = (base.AsRegister() == value.AsRegister());
2467 Register value_reg = value.AsRegister();
2468 if (kPoisonHeapReferences) {
2469 if (base_equals_value) {
2470 // If `base` and `value` are the same register location, move `value_reg` to a temporary
2471 // register. This way, poisoning `value_reg` won't invalidate `base`.
2472 value_reg = temp1.AsRegister();
2473 __ movl(CpuRegister(value_reg), base);
2474 }
2475
2476 // Check that the register allocator did not assign the location of expected value (RAX) to
2477 // `value` nor to `base`, so that heap poisoning (when enabled) works as intended below.
2478 // - If `value` were equal to RAX, both references would be poisoned twice, meaning they would
2479 // not be poisoned at all, as heap poisoning uses address negation.
2480 // - If `base` were equal to RAX, poisoning RAX would invalidate `base`.
2481 DCHECK_NE(RAX, value_reg);
2482 DCHECK_NE(RAX, base.AsRegister());
2483
2484 __ PoisonHeapReference(CpuRegister(RAX));
2485 __ PoisonHeapReference(CpuRegister(value_reg));
2486 }
2487
2488 __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
2489 // LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
2490
2491 if (is_cmpxchg) {
2492 // Output is in RAX, so we can rely on CMPXCHG and do nothing.
2493 __ MaybeUnpoisonHeapReference(CpuRegister(RAX));
2494 } else {
2495 GenZFlagToResult(assembler, CpuRegister(RAX));
2496 }
2497
2498 // If heap poisoning is enabled, we need to unpoison the values that were poisoned earlier.
2499 if (kPoisonHeapReferences) {
2500 if (base_equals_value) {
2501 // `value_reg` has been moved to a temporary register, no need to unpoison it.
2502 } else {
2503 // Ensure `value` is not RAX, so that unpoisoning the former does not invalidate the latter.
2504 DCHECK_NE(RAX, value_reg);
2505 __ UnpoisonHeapReference(CpuRegister(value_reg));
2506 }
2507 }
2508 }
2509
2510 // In debug mode, return true if all registers are pairwise different. In release mode, do nothing
2511 // and always return true.
RegsAreAllDifferent(const std::vector<CpuRegister> & regs)2512 static bool RegsAreAllDifferent(const std::vector<CpuRegister>& regs) {
2513 if (kIsDebugBuild) {
2514 for (size_t i = 0; i < regs.size(); ++i) {
2515 for (size_t j = 0; j < i; ++j) {
2516 if (regs[i].AsRegister() == regs[j].AsRegister()) {
2517 return false;
2518 }
2519 }
2520 }
2521 }
2522 return true;
2523 }
2524
2525 // GenCompareAndSetOrExchange handles all value types and therefore accepts generic locations and
2526 // temporary indices that may not correspond to real registers for code paths that do not use them.
GenCompareAndSetOrExchange(CodeGeneratorX86_64 * codegen,HInvoke * invoke,DataType::Type type,CpuRegister base,CpuRegister offset,uint32_t temp1_index,uint32_t temp2_index,uint32_t temp3_index,Location new_value,Location expected,Location out,bool is_cmpxchg,bool byte_swap)2527 static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen,
2528 HInvoke* invoke,
2529 DataType::Type type,
2530 CpuRegister base,
2531 CpuRegister offset,
2532 uint32_t temp1_index,
2533 uint32_t temp2_index,
2534 uint32_t temp3_index,
2535 Location new_value,
2536 Location expected,
2537 Location out,
2538 bool is_cmpxchg,
2539 bool byte_swap) {
2540 LocationSummary* locations = invoke->GetLocations();
2541 Address field_address(base, offset, TIMES_1, 0);
2542
2543 if (DataType::IsFloatingPointType(type)) {
2544 bool is64bit = (type == DataType::Type::kFloat64);
2545 CpuRegister temp = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2546 DCHECK(RegsAreAllDifferent({base, offset, temp, CpuRegister(RAX)}));
2547
2548 GenCompareAndSetOrExchangeFP(
2549 codegen, field_address, temp, new_value, expected, out, is64bit, is_cmpxchg, byte_swap);
2550 } else {
2551 // Both the expected value for CMPXCHG and the output are in RAX.
2552 DCHECK_EQ(RAX, expected.AsRegister<Register>());
2553 DCHECK_EQ(RAX, out.AsRegister<Register>());
2554
2555 if (type == DataType::Type::kReference) {
2556 CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>();
2557 CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2558 CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>();
2559 CpuRegister temp3 = gUseReadBarrier
2560 ? locations->GetTemp(temp3_index).AsRegister<CpuRegister>()
2561 : CpuRegister(kNoRegister);
2562 DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3}));
2563
2564 DCHECK(!byte_swap);
2565 GenCompareAndSetOrExchangeRef(
2566 codegen, invoke, base, offset, new_value_reg, temp1, temp2, temp3, is_cmpxchg);
2567 } else {
2568 GenCompareAndSetOrExchangeInt(codegen, type, field_address, new_value, is_cmpxchg, byte_swap);
2569 }
2570 }
2571 }
2572
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2573 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2574 LocationSummary* locations = invoke->GetLocations();
2575 GenCompareAndSetOrExchange(codegen,
2576 invoke,
2577 type,
2578 /*base=*/ locations->InAt(1).AsRegister<CpuRegister>(),
2579 /*offset=*/ locations->InAt(2).AsRegister<CpuRegister>(),
2580 /*temp1_index=*/ 0,
2581 /*temp2_index=*/ 1,
2582 /*temp3_index=*/ 2,
2583 /*new_value=*/ locations->InAt(4),
2584 /*expected=*/ locations->InAt(3),
2585 locations->Out(),
2586 /*is_cmpxchg=*/ false,
2587 /*byte_swap=*/ false);
2588 }
2589
VisitUnsafeCASInt(HInvoke * invoke)2590 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2591 VisitJdkUnsafeCASInt(invoke);
2592 }
2593
VisitUnsafeCASLong(HInvoke * invoke)2594 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2595 VisitJdkUnsafeCASLong(invoke);
2596 }
2597
VisitUnsafeCASObject(HInvoke * invoke)2598 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2599 VisitJdkUnsafeCASObject(invoke);
2600 }
2601
VisitJdkUnsafeCASInt(HInvoke * invoke)2602 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2603 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2604 VisitJdkUnsafeCompareAndSetInt(invoke);
2605 }
2606
VisitJdkUnsafeCASLong(HInvoke * invoke)2607 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2608 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2609 VisitJdkUnsafeCompareAndSetLong(invoke);
2610 }
2611
VisitJdkUnsafeCASObject(HInvoke * invoke)2612 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2613 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2614 VisitJdkUnsafeCompareAndSetObject(invoke);
2615 }
2616
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2617 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2618 GenCAS(DataType::Type::kInt32, invoke, codegen_);
2619 }
2620
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2621 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2622 GenCAS(DataType::Type::kInt64, invoke, codegen_);
2623 }
2624
VisitJdkUnsafeCompareAndSetObject(HInvoke * invoke)2625 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
2626 // The only supported read barrier implementation is the Baker-style read barriers.
2627 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
2628
2629 GenCAS(DataType::Type::kReference, invoke, codegen_);
2630 }
2631
VisitIntegerReverse(HInvoke * invoke)2632 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2633 LocationSummary* locations =
2634 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2635 locations->SetInAt(0, Location::RequiresRegister());
2636 locations->SetOut(Location::SameAsFirstInput());
2637 locations->AddTemp(Location::RequiresRegister());
2638 }
2639
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2640 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2641 X86_64Assembler* assembler) {
2642 Immediate imm_shift(shift);
2643 Immediate imm_mask(mask);
2644 __ movl(temp, reg);
2645 __ shrl(reg, imm_shift);
2646 __ andl(temp, imm_mask);
2647 __ andl(reg, imm_mask);
2648 __ shll(temp, imm_shift);
2649 __ orl(reg, temp);
2650 }
2651
VisitIntegerReverse(HInvoke * invoke)2652 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2653 X86_64Assembler* assembler = GetAssembler();
2654 LocationSummary* locations = invoke->GetLocations();
2655
2656 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2657 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2658
2659 /*
2660 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2661 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2662 * compared to generic luni implementation which has 5 rounds of swapping bits.
2663 * x = bswap x
2664 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2665 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2666 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2667 */
2668 __ bswapl(reg);
2669 SwapBits(reg, temp, 1, 0x55555555, assembler);
2670 SwapBits(reg, temp, 2, 0x33333333, assembler);
2671 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2672 }
2673
VisitLongReverse(HInvoke * invoke)2674 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2675 LocationSummary* locations =
2676 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2677 locations->SetInAt(0, Location::RequiresRegister());
2678 locations->SetOut(Location::SameAsFirstInput());
2679 locations->AddTemp(Location::RequiresRegister());
2680 locations->AddTemp(Location::RequiresRegister());
2681 }
2682
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2683 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2684 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2685 Immediate imm_shift(shift);
2686 __ movq(temp_mask, Immediate(mask));
2687 __ movq(temp, reg);
2688 __ shrq(reg, imm_shift);
2689 __ andq(temp, temp_mask);
2690 __ andq(reg, temp_mask);
2691 __ shlq(temp, imm_shift);
2692 __ orq(reg, temp);
2693 }
2694
VisitLongReverse(HInvoke * invoke)2695 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2696 X86_64Assembler* assembler = GetAssembler();
2697 LocationSummary* locations = invoke->GetLocations();
2698
2699 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2700 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2701 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2702
2703 /*
2704 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2705 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2706 * compared to generic luni implementation which has 5 rounds of swapping bits.
2707 * x = bswap x
2708 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2709 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2710 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2711 */
2712 __ bswapq(reg);
2713 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2714 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2715 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
2716 }
2717
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen,HInvoke * invoke)2718 static void CreateBitCountLocations(
2719 ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
2720 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2721 // Do nothing if there is no popcnt support. This results in generating
2722 // a call for the intrinsic rather than direct code.
2723 return;
2724 }
2725 LocationSummary* locations =
2726 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2727 locations->SetInAt(0, Location::Any());
2728 locations->SetOut(Location::RequiresRegister());
2729 }
2730
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2731 static void GenBitCount(X86_64Assembler* assembler,
2732 CodeGeneratorX86_64* codegen,
2733 HInvoke* invoke,
2734 bool is_long) {
2735 LocationSummary* locations = invoke->GetLocations();
2736 Location src = locations->InAt(0);
2737 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2738
2739 if (invoke->InputAt(0)->IsConstant()) {
2740 // Evaluate this at compile time.
2741 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2742 int32_t result = is_long
2743 ? POPCOUNT(static_cast<uint64_t>(value))
2744 : POPCOUNT(static_cast<uint32_t>(value));
2745 codegen->Load32BitValue(out, result);
2746 return;
2747 }
2748
2749 if (src.IsRegister()) {
2750 if (is_long) {
2751 __ popcntq(out, src.AsRegister<CpuRegister>());
2752 } else {
2753 __ popcntl(out, src.AsRegister<CpuRegister>());
2754 }
2755 } else if (is_long) {
2756 DCHECK(src.IsDoubleStackSlot());
2757 __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2758 } else {
2759 DCHECK(src.IsStackSlot());
2760 __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2761 }
2762 }
2763
VisitIntegerBitCount(HInvoke * invoke)2764 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2765 CreateBitCountLocations(allocator_, codegen_, invoke);
2766 }
2767
VisitIntegerBitCount(HInvoke * invoke)2768 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2769 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2770 }
2771
VisitLongBitCount(HInvoke * invoke)2772 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
2773 CreateBitCountLocations(allocator_, codegen_, invoke);
2774 }
2775
VisitLongBitCount(HInvoke * invoke)2776 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
2777 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2778 }
2779
CreateOneBitLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_high)2780 static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) {
2781 LocationSummary* locations =
2782 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2783 locations->SetInAt(0, Location::Any());
2784 locations->SetOut(Location::RequiresRegister());
2785 locations->AddTemp(is_high ? Location::RegisterLocation(RCX) // needs CL
2786 : Location::RequiresRegister()); // any will do
2787 }
2788
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)2789 static void GenOneBit(X86_64Assembler* assembler,
2790 CodeGeneratorX86_64* codegen,
2791 HInvoke* invoke,
2792 bool is_high, bool is_long) {
2793 LocationSummary* locations = invoke->GetLocations();
2794 Location src = locations->InAt(0);
2795 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2796
2797 if (invoke->InputAt(0)->IsConstant()) {
2798 // Evaluate this at compile time.
2799 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2800 if (value == 0) {
2801 __ xorl(out, out); // Clears upper bits too.
2802 return;
2803 }
2804 // Nonzero value.
2805 if (is_high) {
2806 value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
2807 : 31 - CLZ(static_cast<uint32_t>(value));
2808 } else {
2809 value = is_long ? CTZ(static_cast<uint64_t>(value))
2810 : CTZ(static_cast<uint32_t>(value));
2811 }
2812 if (is_long) {
2813 codegen->Load64BitValue(out, 1ULL << value);
2814 } else {
2815 codegen->Load32BitValue(out, 1 << value);
2816 }
2817 return;
2818 }
2819
2820 // Handle the non-constant cases.
2821 if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() &&
2822 src.IsRegister()) {
2823 __ blsi(out, src.AsRegister<CpuRegister>());
2824 } else {
2825 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
2826 if (is_high) {
2827 // Use architectural support: basically 1 << bsr.
2828 if (src.IsRegister()) {
2829 if (is_long) {
2830 __ bsrq(tmp, src.AsRegister<CpuRegister>());
2831 } else {
2832 __ bsrl(tmp, src.AsRegister<CpuRegister>());
2833 }
2834 } else if (is_long) {
2835 DCHECK(src.IsDoubleStackSlot());
2836 __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2837 } else {
2838 DCHECK(src.IsStackSlot());
2839 __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2840 }
2841 // BSR sets ZF if the input was zero.
2842 NearLabel is_zero, done;
2843 __ j(kEqual, &is_zero);
2844 __ movl(out, Immediate(1)); // Clears upper bits too.
2845 if (is_long) {
2846 __ shlq(out, tmp);
2847 } else {
2848 __ shll(out, tmp);
2849 }
2850 __ jmp(&done);
2851 __ Bind(&is_zero);
2852 __ xorl(out, out); // Clears upper bits too.
2853 __ Bind(&done);
2854 } else {
2855 // Copy input into temporary.
2856 if (src.IsRegister()) {
2857 if (is_long) {
2858 __ movq(tmp, src.AsRegister<CpuRegister>());
2859 } else {
2860 __ movl(tmp, src.AsRegister<CpuRegister>());
2861 }
2862 } else if (is_long) {
2863 DCHECK(src.IsDoubleStackSlot());
2864 __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2865 } else {
2866 DCHECK(src.IsStackSlot());
2867 __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2868 }
2869 // Do the bit twiddling: basically tmp & -tmp;
2870 if (is_long) {
2871 __ movq(out, tmp);
2872 __ negq(tmp);
2873 __ andq(out, tmp);
2874 } else {
2875 __ movl(out, tmp);
2876 __ negl(tmp);
2877 __ andl(out, tmp);
2878 }
2879 }
2880 }
2881 }
2882
VisitIntegerHighestOneBit(HInvoke * invoke)2883 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2884 CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
2885 }
2886
VisitIntegerHighestOneBit(HInvoke * invoke)2887 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2888 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false);
2889 }
2890
VisitLongHighestOneBit(HInvoke * invoke)2891 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2892 CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
2893 }
2894
VisitLongHighestOneBit(HInvoke * invoke)2895 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2896 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true);
2897 }
2898
VisitIntegerLowestOneBit(HInvoke * invoke)2899 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2900 CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
2901 }
2902
VisitIntegerLowestOneBit(HInvoke * invoke)2903 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2904 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false);
2905 }
2906
VisitLongLowestOneBit(HInvoke * invoke)2907 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2908 CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
2909 }
2910
VisitLongLowestOneBit(HInvoke * invoke)2911 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2912 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true);
2913 }
2914
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)2915 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2916 LocationSummary* locations =
2917 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2918 locations->SetInAt(0, Location::Any());
2919 locations->SetOut(Location::RequiresRegister());
2920 }
2921
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2922 static void GenLeadingZeros(X86_64Assembler* assembler,
2923 CodeGeneratorX86_64* codegen,
2924 HInvoke* invoke, bool is_long) {
2925 LocationSummary* locations = invoke->GetLocations();
2926 Location src = locations->InAt(0);
2927 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2928
2929 int zero_value_result = is_long ? 64 : 32;
2930 if (invoke->InputAt(0)->IsConstant()) {
2931 // Evaluate this at compile time.
2932 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2933 if (value == 0) {
2934 value = zero_value_result;
2935 } else {
2936 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2937 }
2938 codegen->Load32BitValue(out, value);
2939 return;
2940 }
2941
2942 // Handle the non-constant cases.
2943 if (src.IsRegister()) {
2944 if (is_long) {
2945 __ bsrq(out, src.AsRegister<CpuRegister>());
2946 } else {
2947 __ bsrl(out, src.AsRegister<CpuRegister>());
2948 }
2949 } else if (is_long) {
2950 DCHECK(src.IsDoubleStackSlot());
2951 __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2952 } else {
2953 DCHECK(src.IsStackSlot());
2954 __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2955 }
2956
2957 // BSR sets ZF if the input was zero, and the output is undefined.
2958 NearLabel is_zero, done;
2959 __ j(kEqual, &is_zero);
2960
2961 // Correct the result from BSR to get the CLZ result.
2962 __ xorl(out, Immediate(zero_value_result - 1));
2963 __ jmp(&done);
2964
2965 // Fix the zero case with the expected result.
2966 __ Bind(&is_zero);
2967 __ movl(out, Immediate(zero_value_result));
2968
2969 __ Bind(&done);
2970 }
2971
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2972 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2973 CreateLeadingZeroLocations(allocator_, invoke);
2974 }
2975
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2976 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2977 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2978 }
2979
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2980 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2981 CreateLeadingZeroLocations(allocator_, invoke);
2982 }
2983
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2984 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2985 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2986 }
2987
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)2988 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2989 LocationSummary* locations =
2990 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2991 locations->SetInAt(0, Location::Any());
2992 locations->SetOut(Location::RequiresRegister());
2993 }
2994
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2995 static void GenTrailingZeros(X86_64Assembler* assembler,
2996 CodeGeneratorX86_64* codegen,
2997 HInvoke* invoke, bool is_long) {
2998 LocationSummary* locations = invoke->GetLocations();
2999 Location src = locations->InAt(0);
3000 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3001
3002 int zero_value_result = is_long ? 64 : 32;
3003 if (invoke->InputAt(0)->IsConstant()) {
3004 // Evaluate this at compile time.
3005 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3006 if (value == 0) {
3007 value = zero_value_result;
3008 } else {
3009 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
3010 }
3011 codegen->Load32BitValue(out, value);
3012 return;
3013 }
3014
3015 // Handle the non-constant cases.
3016 if (src.IsRegister()) {
3017 if (is_long) {
3018 __ bsfq(out, src.AsRegister<CpuRegister>());
3019 } else {
3020 __ bsfl(out, src.AsRegister<CpuRegister>());
3021 }
3022 } else if (is_long) {
3023 DCHECK(src.IsDoubleStackSlot());
3024 __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3025 } else {
3026 DCHECK(src.IsStackSlot());
3027 __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3028 }
3029
3030 // BSF sets ZF if the input was zero, and the output is undefined.
3031 NearLabel done;
3032 __ j(kNotEqual, &done);
3033
3034 // Fix the zero case with the expected result.
3035 __ movl(out, Immediate(zero_value_result));
3036
3037 __ Bind(&done);
3038 }
3039
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3040 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3041 CreateTrailingZeroLocations(allocator_, invoke);
3042 }
3043
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3044 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3045 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3046 }
3047
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3048 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3049 CreateTrailingZeroLocations(allocator_, invoke);
3050 }
3051
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3052 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3053 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3054 }
3055
VisitIntegerValueOf(HInvoke * invoke)3056 void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
3057 InvokeRuntimeCallingConvention calling_convention;
3058 IntrinsicVisitor::ComputeIntegerValueOfLocations(
3059 invoke,
3060 codegen_,
3061 Location::RegisterLocation(RAX),
3062 Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3063 }
3064
VisitIntegerValueOf(HInvoke * invoke)3065 void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
3066 IntrinsicVisitor::IntegerValueOfInfo info =
3067 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
3068 LocationSummary* locations = invoke->GetLocations();
3069 X86_64Assembler* assembler = GetAssembler();
3070
3071 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3072 InvokeRuntimeCallingConvention calling_convention;
3073 CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
3074 auto allocate_instance = [&]() {
3075 codegen_->LoadIntrinsicDeclaringClass(argument, invoke);
3076 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3077 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3078 };
3079 if (invoke->InputAt(0)->IsIntConstant()) {
3080 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3081 if (static_cast<uint32_t>(value - info.low) < info.length) {
3082 // Just embed the j.l.Integer in the code.
3083 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
3084 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
3085 } else {
3086 DCHECK(locations->CanCall());
3087 // Allocate and initialize a new j.l.Integer.
3088 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3089 // JIT object table.
3090 allocate_instance();
3091 __ movl(Address(out, info.value_offset), Immediate(value));
3092 }
3093 } else {
3094 DCHECK(locations->CanCall());
3095 CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
3096 // Check bounds of our cache.
3097 __ leal(out, Address(in, -info.low));
3098 __ cmpl(out, Immediate(info.length));
3099 NearLabel allocate, done;
3100 __ j(kAboveEqual, &allocate);
3101 // If the value is within the bounds, load the j.l.Integer directly from the array.
3102 DCHECK_NE(out.AsRegister(), argument.AsRegister());
3103 codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference);
3104 static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3105 "Check heap reference size.");
3106 __ movl(out, Address(argument, out, TIMES_4, 0));
3107 __ MaybeUnpoisonHeapReference(out);
3108 __ jmp(&done);
3109 __ Bind(&allocate);
3110 // Otherwise allocate and initialize a new j.l.Integer.
3111 allocate_instance();
3112 __ movl(Address(out, info.value_offset), in);
3113 __ Bind(&done);
3114 }
3115 }
3116
VisitReferenceGetReferent(HInvoke * invoke)3117 void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3118 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3119 }
3120
VisitReferenceGetReferent(HInvoke * invoke)3121 void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3122 X86_64Assembler* assembler = GetAssembler();
3123 LocationSummary* locations = invoke->GetLocations();
3124
3125 Location obj = locations->InAt(0);
3126 Location out = locations->Out();
3127
3128 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
3129 codegen_->AddSlowPath(slow_path);
3130
3131 if (gUseReadBarrier) {
3132 // Check self->GetWeakRefAccessEnabled().
3133 ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
3134 __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true),
3135 Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3136 __ j(kNotEqual, slow_path->GetEntryLabel());
3137 }
3138
3139 // Load the java.lang.ref.Reference class, use the output register as a temporary.
3140 codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<CpuRegister>(), invoke);
3141
3142 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3143 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3144 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3145 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3146 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3147 __ cmpw(Address(out.AsRegister<CpuRegister>(), disable_intrinsic_offset.Uint32Value()),
3148 Immediate(0));
3149 __ j(kNotEqual, slow_path->GetEntryLabel());
3150
3151 // Load the value from the field.
3152 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3153 if (gUseReadBarrier && kUseBakerReadBarrier) {
3154 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3155 out,
3156 obj.AsRegister<CpuRegister>(),
3157 referent_offset,
3158 /*needs_null_check=*/ true);
3159 // Note that the fence is a no-op, thanks to the x86-64 memory model.
3160 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3161 } else {
3162 __ movl(out.AsRegister<CpuRegister>(), Address(obj.AsRegister<CpuRegister>(), referent_offset));
3163 codegen_->MaybeRecordImplicitNullCheck(invoke);
3164 // Note that the fence is a no-op, thanks to the x86-64 memory model.
3165 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3166 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3167 }
3168 __ Bind(slow_path->GetExitLabel());
3169 }
3170
VisitReferenceRefersTo(HInvoke * invoke)3171 void IntrinsicLocationsBuilderX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3172 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
3173 }
3174
VisitReferenceRefersTo(HInvoke * invoke)3175 void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3176 X86_64Assembler* assembler = GetAssembler();
3177 LocationSummary* locations = invoke->GetLocations();
3178
3179 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
3180 CpuRegister other = locations->InAt(1).AsRegister<CpuRegister>();
3181 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3182
3183 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3184 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3185
3186 __ movl(out, Address(obj, referent_offset));
3187 codegen_->MaybeRecordImplicitNullCheck(invoke);
3188 __ MaybeUnpoisonHeapReference(out);
3189 // Note that the fence is a no-op, thanks to the x86-64 memory model.
3190 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3191
3192 __ cmpl(out, other);
3193
3194 if (gUseReadBarrier) {
3195 DCHECK(kUseBakerReadBarrier);
3196
3197 NearLabel calculate_result;
3198 __ j(kEqual, &calculate_result); // ZF set if taken.
3199
3200 // Check if the loaded reference is null in a way that leaves ZF clear for null.
3201 __ cmpl(out, Immediate(1));
3202 __ j(kBelow, &calculate_result); // ZF clear if taken.
3203
3204 // For correct memory visibility, we need a barrier before loading the lock word
3205 // but we already have the barrier emitted for volatile load above which is sufficient.
3206
3207 // Load the lockword and check if it is a forwarding address.
3208 static_assert(LockWord::kStateShift == 30u);
3209 static_assert(LockWord::kStateForwardingAddress == 3u);
3210 __ movl(out, Address(out, monitor_offset));
3211 __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3212 __ j(kBelow, &calculate_result); // ZF clear if taken.
3213
3214 // Extract the forwarding address and compare with `other`.
3215 __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3216 __ cmpl(out, other);
3217
3218 __ Bind(&calculate_result);
3219 }
3220
3221 // Convert ZF into the Boolean result.
3222 __ setcc(kEqual, out);
3223 __ movzxb(out, out);
3224 }
3225
VisitThreadInterrupted(HInvoke * invoke)3226 void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3227 LocationSummary* locations =
3228 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3229 locations->SetOut(Location::RequiresRegister());
3230 }
3231
VisitThreadInterrupted(HInvoke * invoke)3232 void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3233 X86_64Assembler* assembler = GetAssembler();
3234 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
3235 Address address = Address::Absolute
3236 (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true);
3237 NearLabel done;
3238 __ gs()->movl(out, address);
3239 __ testl(out, out);
3240 __ j(kEqual, &done);
3241 __ gs()->movl(address, Immediate(0));
3242 codegen_->MemoryFence();
3243 __ Bind(&done);
3244 }
3245
VisitReachabilityFence(HInvoke * invoke)3246 void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
3247 LocationSummary* locations =
3248 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3249 locations->SetInAt(0, Location::Any());
3250 }
3251
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3252 void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3253
CreateDivideUnsignedLocations(HInvoke * invoke,ArenaAllocator * allocator)3254 static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) {
3255 LocationSummary* locations =
3256 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3257 locations->SetInAt(0, Location::RegisterLocation(RAX));
3258 locations->SetInAt(1, Location::RequiresRegister());
3259 locations->SetOut(Location::SameAsFirstInput());
3260 // Intel uses edx:eax as the dividend.
3261 locations->AddTemp(Location::RegisterLocation(RDX));
3262 }
3263
GenerateDivideUnsigned(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type data_type)3264 static void GenerateDivideUnsigned(HInvoke* invoke,
3265 CodeGeneratorX86_64* codegen,
3266 DataType::Type data_type) {
3267 LocationSummary* locations = invoke->GetLocations();
3268 Location out = locations->Out();
3269 Location first = locations->InAt(0);
3270 Location second = locations->InAt(1);
3271 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3272 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3273
3274 DCHECK_EQ(RAX, first.AsRegister<Register>());
3275 DCHECK_EQ(RAX, out.AsRegister<Register>());
3276 DCHECK_EQ(RDX, rdx.AsRegister());
3277
3278 // We check if the divisor is zero and bail to the slow path to handle if so.
3279 auto* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
3280 codegen->AddSlowPath(slow_path);
3281
3282 X86_64Assembler* assembler = codegen->GetAssembler();
3283 if (data_type == DataType::Type::kInt32) {
3284 __ testl(second_reg, second_reg);
3285 __ j(kEqual, slow_path->GetEntryLabel());
3286 __ xorl(rdx, rdx);
3287 __ divl(second_reg);
3288 } else {
3289 DCHECK(data_type == DataType::Type::kInt64);
3290 __ testq(second_reg, second_reg);
3291 __ j(kEqual, slow_path->GetEntryLabel());
3292 __ xorq(rdx, rdx);
3293 __ divq(second_reg);
3294 }
3295 __ Bind(slow_path->GetExitLabel());
3296 }
3297
VisitIntegerDivideUnsigned(HInvoke * invoke)3298 void IntrinsicLocationsBuilderX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3299 CreateDivideUnsignedLocations(invoke, allocator_);
3300 }
3301
VisitIntegerDivideUnsigned(HInvoke * invoke)3302 void IntrinsicCodeGeneratorX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3303 GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt32);
3304 }
3305
VisitLongDivideUnsigned(HInvoke * invoke)3306 void IntrinsicLocationsBuilderX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3307 CreateDivideUnsignedLocations(invoke, allocator_);
3308 }
3309
VisitLongDivideUnsigned(HInvoke * invoke)3310 void IntrinsicCodeGeneratorX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3311 GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt64);
3312 }
3313
VisitMathMultiplyHigh(HInvoke * invoke)3314 void IntrinsicLocationsBuilderX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3315 LocationSummary* locations =
3316 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3317 locations->SetInAt(0, Location::RegisterLocation(RAX));
3318 locations->SetInAt(1, Location::RequiresRegister());
3319 locations->SetOut(Location::RegisterLocation(RDX));
3320 locations->AddTemp(Location::RegisterLocation(RAX));
3321 }
3322
VisitMathMultiplyHigh(HInvoke * invoke)3323 void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3324 X86_64Assembler* assembler = GetAssembler();
3325 LocationSummary* locations = invoke->GetLocations();
3326
3327 CpuRegister y = locations->InAt(1).AsRegister<CpuRegister>();
3328
3329 DCHECK_EQ(locations->InAt(0).AsRegister<Register>(), RAX);
3330 DCHECK_EQ(locations->Out().AsRegister<Register>(), RDX);
3331
3332 __ imulq(y);
3333 }
3334
3335 enum class GetAndUpdateOp {
3336 kSet,
3337 kAdd,
3338 kBitwiseAnd,
3339 kBitwiseOr,
3340 kBitwiseXor
3341 };
3342
3343 class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
3344 public:
VarHandleSlowPathX86_64(HInvoke * invoke)3345 explicit VarHandleSlowPathX86_64(HInvoke* invoke)
3346 : IntrinsicSlowPathX86_64(invoke) {
3347 }
3348
SetVolatile(bool is_volatile)3349 void SetVolatile(bool is_volatile) {
3350 is_volatile_ = is_volatile;
3351 }
3352
SetAtomic(bool is_atomic)3353 void SetAtomic(bool is_atomic) {
3354 is_atomic_ = is_atomic;
3355 }
3356
SetNeedAnyStoreBarrier(bool need_any_store_barrier)3357 void SetNeedAnyStoreBarrier(bool need_any_store_barrier) {
3358 need_any_store_barrier_ = need_any_store_barrier;
3359 }
3360
SetNeedAnyAnyBarrier(bool need_any_any_barrier)3361 void SetNeedAnyAnyBarrier(bool need_any_any_barrier) {
3362 need_any_any_barrier_ = need_any_any_barrier;
3363 }
3364
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3365 void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3366 get_and_update_op_ = get_and_update_op;
3367 }
3368
GetByteArrayViewCheckLabel()3369 Label* GetByteArrayViewCheckLabel() {
3370 return &byte_array_view_check_label_;
3371 }
3372
GetNativeByteOrderLabel()3373 Label* GetNativeByteOrderLabel() {
3374 return &native_byte_order_label_;
3375 }
3376
EmitNativeCode(CodeGenerator * codegen)3377 void EmitNativeCode(CodeGenerator* codegen) override {
3378 if (GetByteArrayViewCheckLabel()->IsLinked()) {
3379 EmitByteArrayViewCode(down_cast<CodeGeneratorX86_64*>(codegen));
3380 }
3381 IntrinsicSlowPathX86_64::EmitNativeCode(codegen);
3382 }
3383
3384 private:
GetInvoke() const3385 HInvoke* GetInvoke() const {
3386 return GetInstruction()->AsInvoke();
3387 }
3388
GetAccessModeTemplate() const3389 mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3390 return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3391 }
3392
3393 void EmitByteArrayViewCode(CodeGeneratorX86_64* codegen);
3394
3395 Label byte_array_view_check_label_;
3396 Label native_byte_order_label_;
3397
3398 // Arguments forwarded to specific methods.
3399 bool is_volatile_;
3400 bool is_atomic_;
3401 bool need_any_store_barrier_;
3402 bool need_any_any_barrier_;
3403 GetAndUpdateOp get_and_update_op_;
3404 };
3405
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3406 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3407 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
3408 X86_64Assembler* assembler = codegen->GetAssembler();
3409 LocationSummary* locations = invoke->GetLocations();
3410 DCHECK(locations->InAt(0).Equals(locations->Out()));
3411 XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
3412 XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
3413 XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
3414 if (invoke->GetType() == DataType::Type::kFloat32) {
3415 __ vfmadd213ss(left, right, accumulator);
3416 } else {
3417 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
3418 __ vfmadd213sd(left, right, accumulator);
3419 }
3420 }
3421
VisitMathFmaDouble(HInvoke * invoke)3422 void IntrinsicCodeGeneratorX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3423 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3424 GenerateMathFma(invoke, codegen_);
3425 }
3426
VisitMathFmaDouble(HInvoke * invoke)3427 void IntrinsicLocationsBuilderX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3428 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3429 CreateFPFPFPToFPCallLocations(allocator_, invoke);
3430 }
3431 }
3432
VisitMathFmaFloat(HInvoke * invoke)3433 void IntrinsicCodeGeneratorX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3434 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3435 GenerateMathFma(invoke, codegen_);
3436 }
3437
VisitMathFmaFloat(HInvoke * invoke)3438 void IntrinsicLocationsBuilderX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3439 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3440 CreateFPFPFPToFPCallLocations(allocator_, invoke);
3441 }
3442 }
3443
3444 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path,CpuRegister object,CpuRegister temp,Address type_address,bool object_can_be_null=true)3445 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64* codegen,
3446 VarHandleSlowPathX86_64* slow_path,
3447 CpuRegister object,
3448 CpuRegister temp,
3449 Address type_address,
3450 bool object_can_be_null = true) {
3451 X86_64Assembler* assembler = codegen->GetAssembler();
3452
3453 const MemberOffset class_offset = mirror::Object::ClassOffset();
3454 const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3455
3456 NearLabel check_type_compatibility, type_matched;
3457
3458 // If the object is null, there is no need to check the type
3459 if (object_can_be_null) {
3460 __ testl(object, object);
3461 __ j(kZero, &type_matched);
3462 }
3463
3464 // Do not unpoison for in-memory comparison.
3465 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3466 __ movl(temp, Address(object, class_offset));
3467 __ Bind(&check_type_compatibility);
3468 __ cmpl(temp, type_address);
3469 __ j(kEqual, &type_matched);
3470 // Load the super class.
3471 __ MaybeUnpoisonHeapReference(temp);
3472 __ movl(temp, Address(temp, super_class_offset));
3473 // If the super class is null, we reached the root of the hierarchy without a match.
3474 // We let the slow path handle uncovered cases (e.g. interfaces).
3475 __ testl(temp, temp);
3476 __ j(kEqual, slow_path->GetEntryLabel());
3477 __ jmp(&check_type_compatibility);
3478 __ Bind(&type_matched);
3479 }
3480
3481 // Check access mode and the primitive type from VarHandle.varType.
3482 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3483 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path,DataType::Type type)3484 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3485 CodeGeneratorX86_64* codegen,
3486 VarHandleSlowPathX86_64* slow_path,
3487 DataType::Type type) {
3488 X86_64Assembler* assembler = codegen->GetAssembler();
3489
3490 LocationSummary* locations = invoke->GetLocations();
3491 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3492 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3493
3494 mirror::VarHandle::AccessMode access_mode =
3495 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3496 Primitive::Type primitive_type = DataTypeToPrimitive(type);
3497
3498 const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3499 const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3500 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3501
3502 // Check that the operation is permitted.
3503 __ testl(Address(varhandle, access_mode_bit_mask_offset),
3504 Immediate(1u << static_cast<uint32_t>(access_mode)));
3505 __ j(kZero, slow_path->GetEntryLabel());
3506
3507 // For primitive types, we do not need a read barrier when loading a reference only for loading
3508 // constant field through the reference. For reference types, we deliberately avoid the read
3509 // barrier, letting the slow path handle the false negatives.
3510 __ movl(temp, Address(varhandle, var_type_offset));
3511 __ MaybeUnpoisonHeapReference(temp);
3512
3513 // Check check the varType.primitiveType field against the type we're trying to retrieve.
3514 __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3515 __ j(kNotEqual, slow_path->GetEntryLabel());
3516
3517 if (type == DataType::Type::kReference) {
3518 // Check reference arguments against the varType.
3519 // False negatives due to varType being an interface or array type
3520 // or due to the missing read barrier are handled by the slow path.
3521 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3522 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3523 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3524 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3525 HInstruction* arg = invoke->InputAt(arg_index);
3526 DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3527 if (!arg->IsNullConstant()) {
3528 CpuRegister arg_reg = invoke->GetLocations()->InAt(arg_index).AsRegister<CpuRegister>();
3529 Address type_addr(varhandle, var_type_offset);
3530 GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp, type_addr);
3531 }
3532 }
3533 }
3534 }
3535
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3536 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3537 CodeGeneratorX86_64* codegen,
3538 VarHandleSlowPathX86_64* slow_path) {
3539 X86_64Assembler* assembler = codegen->GetAssembler();
3540
3541 LocationSummary* locations = invoke->GetLocations();
3542 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3543
3544 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3545
3546 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3547 // Do not emit read barrier (or unpoison the reference) for comparing to null.
3548 __ cmpl(Address(varhandle, coordinate_type0_offset), Immediate(0));
3549 __ j(kNotEqual, slow_path->GetEntryLabel());
3550 }
3551
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3552 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3553 CodeGeneratorX86_64* codegen,
3554 VarHandleSlowPathX86_64* slow_path) {
3555 VarHandleOptimizations optimizations(invoke);
3556 X86_64Assembler* assembler = codegen->GetAssembler();
3557
3558 LocationSummary* locations = invoke->GetLocations();
3559 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3560 CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3561 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3562
3563 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3564 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3565
3566 // Null-check the object.
3567 if (!optimizations.GetSkipObjectNullCheck()) {
3568 __ testl(object, object);
3569 __ j(kZero, slow_path->GetEntryLabel());
3570 }
3571
3572 if (!optimizations.GetUseKnownBootImageVarHandle()) {
3573 // Check that the VarHandle references an instance field by checking that
3574 // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3575 // type compatibility check with the source object's type, which will fail for null.
3576 __ cmpl(Address(varhandle, coordinate_type1_offset), Immediate(0));
3577 __ j(kNotEqual, slow_path->GetEntryLabel());
3578
3579 // Check that the object has the correct type.
3580 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3581 GenerateSubTypeObjectCheckNoReadBarrier(codegen,
3582 slow_path,
3583 object,
3584 temp,
3585 Address(varhandle, coordinate_type0_offset),
3586 /*object_can_be_null=*/ false);
3587 }
3588 }
3589
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3590 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3591 CodeGeneratorX86_64* codegen,
3592 VarHandleSlowPathX86_64* slow_path) {
3593 VarHandleOptimizations optimizations(invoke);
3594 X86_64Assembler* assembler = codegen->GetAssembler();
3595 LocationSummary* locations = invoke->GetLocations();
3596
3597 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3598 CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3599 CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
3600 DataType::Type value_type =
3601 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3602 Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3603
3604 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3605 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3606 const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3607 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3608 const MemberOffset class_offset = mirror::Object::ClassOffset();
3609 const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3610
3611 // Null-check the object.
3612 if (!optimizations.GetSkipObjectNullCheck()) {
3613 __ testl(object, object);
3614 __ j(kZero, slow_path->GetEntryLabel());
3615 }
3616
3617 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3618
3619 // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3620 // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3621 // coordinateType0 shall not be null but we do not explicitly verify that.
3622 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3623 __ cmpl(Address(varhandle, coordinate_type1_offset.Int32Value()), Immediate(0));
3624 __ j(kEqual, slow_path->GetEntryLabel());
3625
3626 // Check object class against componentType0.
3627 //
3628 // This is an exact check and we defer other cases to the runtime. This includes
3629 // conversion to array of superclass references, which is valid but subsequently
3630 // requires all update operations to check that the value can indeed be stored.
3631 // We do not want to perform such extra checks in the intrinsified code.
3632 //
3633 // We do this check without read barrier, so there can be false negatives which we
3634 // defer to the slow path. There shall be no false negatives for array classes in the
3635 // boot image (including Object[] and primitive arrays) because they are non-movable.
3636 __ movl(temp, Address(object, class_offset.Int32Value()));
3637 __ cmpl(temp, Address(varhandle, coordinate_type0_offset.Int32Value()));
3638 __ j(kNotEqual, slow_path->GetEntryLabel());
3639
3640 // Check that the coordinateType0 is an array type. We do not need a read barrier
3641 // for loading constant reference fields (or chains of them) for comparison with null,
3642 // nor for finally loading a constant primitive field (primitive type) below.
3643 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3644 __ movl(temp, Address(temp, component_type_offset.Int32Value()));
3645 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3646 __ testl(temp, temp);
3647 __ j(kZero, slow_path->GetEntryLabel());
3648
3649 // Check that the array component type matches the primitive type.
3650 Label* slow_path_label;
3651 if (primitive_type == Primitive::kPrimNot) {
3652 slow_path_label = slow_path->GetEntryLabel();
3653 } else {
3654 // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
3655 // we shall check for a byte array view in the slow path.
3656 // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
3657 // so we cannot emit that if we're JITting without boot image.
3658 bool boot_image_available =
3659 codegen->GetCompilerOptions().IsBootImage() ||
3660 !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
3661 bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
3662 slow_path_label =
3663 can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
3664 }
3665 __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3666 __ j(kNotEqual, slow_path_label);
3667
3668 // Check for array index out of bounds.
3669 __ cmpl(index, Address(object, array_length_offset.Int32Value()));
3670 __ j(kAboveEqual, slow_path->GetEntryLabel());
3671 }
3672
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3673 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
3674 CodeGeneratorX86_64* codegen,
3675 VarHandleSlowPathX86_64* slow_path) {
3676 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3677 if (expected_coordinates_count == 0u) {
3678 GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
3679 } else if (expected_coordinates_count == 1u) {
3680 GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
3681 } else {
3682 DCHECK_EQ(expected_coordinates_count, 2u);
3683 GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
3684 }
3685 }
3686
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)3687 static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
3688 CodeGeneratorX86_64* codegen,
3689 DataType::Type type) {
3690 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3691 VarHandleOptimizations optimizations(invoke);
3692 if (optimizations.GetUseKnownBootImageVarHandle()) {
3693 DCHECK_NE(expected_coordinates_count, 2u);
3694 if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
3695 return nullptr;
3696 }
3697 }
3698
3699 VarHandleSlowPathX86_64* slow_path =
3700 new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
3701 codegen->AddSlowPath(slow_path);
3702
3703 if (!optimizations.GetUseKnownBootImageVarHandle()) {
3704 GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
3705 }
3706 GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
3707
3708 return slow_path;
3709 }
3710
3711 struct VarHandleTarget {
3712 Register object; // The object holding the value to operate on.
3713 Register offset; // The offset of the value to operate on.
3714 };
3715
GetVarHandleTarget(HInvoke * invoke)3716 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
3717 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3718 LocationSummary* locations = invoke->GetLocations();
3719
3720 VarHandleTarget target;
3721 // The temporary allocated for loading the offset.
3722 target.offset = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
3723 // The reference to the object that holds the value to operate on.
3724 target.object = (expected_coordinates_count == 0u)
3725 ? locations->GetTemp(1).AsRegister<CpuRegister>().AsRegister()
3726 : locations->InAt(1).AsRegister<CpuRegister>().AsRegister();
3727 return target;
3728 }
3729
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorX86_64 * codegen)3730 static void GenerateVarHandleTarget(HInvoke* invoke,
3731 const VarHandleTarget& target,
3732 CodeGeneratorX86_64* codegen) {
3733 LocationSummary* locations = invoke->GetLocations();
3734 X86_64Assembler* assembler = codegen->GetAssembler();
3735 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3736
3737 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3738
3739 if (expected_coordinates_count <= 1u) {
3740 if (VarHandleOptimizations(invoke).GetUseKnownBootImageVarHandle()) {
3741 ScopedObjectAccess soa(Thread::Current());
3742 ArtField* target_field = GetBootImageVarHandleField(invoke);
3743 if (expected_coordinates_count == 0u) {
3744 ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
3745 __ movl(CpuRegister(target.object),
3746 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /*no_rip=*/ false));
3747 if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
3748 codegen->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(declaring_class));
3749 } else {
3750 codegen->RecordBootImageTypePatch(declaring_class->GetDexFile(),
3751 declaring_class->GetDexTypeIndex());
3752 }
3753 }
3754 __ movl(CpuRegister(target.offset), Immediate(target_field->GetOffset().Uint32Value()));
3755 } else {
3756 // For static fields, we need to fill the `target.object` with the declaring class,
3757 // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
3758 // we do not need the declaring class, so we can forget the `ArtMethod*` when
3759 // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
3760 CpuRegister method((expected_coordinates_count == 0) ? target.object : target.offset);
3761
3762 const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
3763 const MemberOffset offset_offset = ArtField::OffsetOffset();
3764
3765 // Load the ArtField, the offset and, if needed, declaring class.
3766 __ movq(method, Address(varhandle, art_field_offset));
3767 __ movl(CpuRegister(target.offset), Address(method, offset_offset));
3768 if (expected_coordinates_count == 0u) {
3769 InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
3770 instr_codegen->GenerateGcRootFieldLoad(invoke,
3771 Location::RegisterLocation(target.object),
3772 Address(method, ArtField::DeclaringClassOffset()),
3773 /*fixup_label=*/ nullptr,
3774 gCompilerReadBarrierOption);
3775 }
3776 }
3777 } else {
3778 DCHECK_EQ(expected_coordinates_count, 2u);
3779
3780 DataType::Type value_type =
3781 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3782 ScaleFactor scale = CodeGenerator::ScaleFactorForType(value_type);
3783 MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
3784 CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
3785
3786 // The effect of LEA is `target.offset = index * scale + data_offset`.
3787 __ leal(CpuRegister(target.offset), Address(index, scale, data_offset.Int32Value()));
3788 }
3789 }
3790
HasVarHandleIntrinsicImplementation(HInvoke * invoke)3791 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
3792 // The only supported read barrier implementation is the Baker-style read barriers.
3793 if (gUseReadBarrier && !kUseBakerReadBarrier) {
3794 return false;
3795 }
3796
3797 VarHandleOptimizations optimizations(invoke);
3798 if (optimizations.GetDoNotIntrinsify()) {
3799 return false;
3800 }
3801
3802 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3803 DCHECK_LE(expected_coordinates_count, 2u); // Filtered by the `DoNotIntrinsify` flag above.
3804 return true;
3805 }
3806
CreateVarHandleCommonLocations(HInvoke * invoke)3807 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
3808 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3809 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3810 LocationSummary* locations = new (allocator) LocationSummary(
3811 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3812
3813 locations->SetInAt(0, Location::RequiresRegister());
3814 // Require coordinates in registers. These are the object holding the value
3815 // to operate on (except for static fields) and index (for arrays and views).
3816 for (size_t i = 0; i != expected_coordinates_count; ++i) {
3817 locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
3818 }
3819
3820 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3821 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3822 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3823 HInstruction* arg = invoke->InputAt(arg_index);
3824 if (DataType::IsFloatingPointType(arg->GetType())) {
3825 locations->SetInAt(arg_index, Location::FpuRegisterOrConstant(arg));
3826 } else {
3827 locations->SetInAt(arg_index, Location::RegisterOrConstant(arg));
3828 }
3829 }
3830
3831 // Add a temporary for offset.
3832 locations->AddTemp(Location::RequiresRegister());
3833
3834 if (expected_coordinates_count == 0u) {
3835 // Add a temporary to hold the declaring class.
3836 locations->AddTemp(Location::RequiresRegister());
3837 }
3838
3839 return locations;
3840 }
3841
CreateVarHandleGetLocations(HInvoke * invoke)3842 static void CreateVarHandleGetLocations(HInvoke* invoke) {
3843 if (!HasVarHandleIntrinsicImplementation(invoke)) {
3844 return;
3845 }
3846
3847 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
3848 if (DataType::IsFloatingPointType(invoke->GetType())) {
3849 locations->SetOut(Location::RequiresFpuRegister());
3850 } else {
3851 locations->SetOut(Location::RequiresRegister());
3852 }
3853 }
3854
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool byte_swap=false)3855 static void GenerateVarHandleGet(HInvoke* invoke,
3856 CodeGeneratorX86_64* codegen,
3857 bool byte_swap = false) {
3858 DataType::Type type = invoke->GetType();
3859 DCHECK_NE(type, DataType::Type::kVoid);
3860
3861 LocationSummary* locations = invoke->GetLocations();
3862 X86_64Assembler* assembler = codegen->GetAssembler();
3863
3864 VarHandleTarget target = GetVarHandleTarget(invoke);
3865 VarHandleSlowPathX86_64* slow_path = nullptr;
3866 if (!byte_swap) {
3867 slow_path = GenerateVarHandleChecks(invoke, codegen, type);
3868 GenerateVarHandleTarget(invoke, target, codegen);
3869 if (slow_path != nullptr) {
3870 __ Bind(slow_path->GetNativeByteOrderLabel());
3871 }
3872 }
3873
3874 // Load the value from the field
3875 Address src(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
3876 Location out = locations->Out();
3877
3878 if (type == DataType::Type::kReference) {
3879 if (gUseReadBarrier) {
3880 DCHECK(kUseBakerReadBarrier);
3881 codegen->GenerateReferenceLoadWithBakerReadBarrier(
3882 invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false);
3883 } else {
3884 __ movl(out.AsRegister<CpuRegister>(), src);
3885 __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
3886 }
3887 DCHECK(!byte_swap);
3888 } else {
3889 codegen->LoadFromMemoryNoReference(type, out, src);
3890 if (byte_swap) {
3891 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3892 codegen->GetInstructionCodegen()->Bswap(out, type, &temp);
3893 }
3894 }
3895
3896 if (slow_path != nullptr) {
3897 DCHECK(!byte_swap);
3898 __ Bind(slow_path->GetExitLabel());
3899 }
3900 }
3901
VisitVarHandleGet(HInvoke * invoke)3902 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGet(HInvoke* invoke) {
3903 CreateVarHandleGetLocations(invoke);
3904 }
3905
VisitVarHandleGet(HInvoke * invoke)3906 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGet(HInvoke* invoke) {
3907 GenerateVarHandleGet(invoke, codegen_);
3908 }
3909
VisitVarHandleGetAcquire(HInvoke * invoke)3910 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
3911 CreateVarHandleGetLocations(invoke);
3912 }
3913
VisitVarHandleGetAcquire(HInvoke * invoke)3914 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
3915 // VarHandleGetAcquire is the same as VarHandleGet on x86-64 due to the x86 memory model.
3916 GenerateVarHandleGet(invoke, codegen_);
3917 }
3918
VisitVarHandleGetOpaque(HInvoke * invoke)3919 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
3920 CreateVarHandleGetLocations(invoke);
3921 }
3922
VisitVarHandleGetOpaque(HInvoke * invoke)3923 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
3924 // VarHandleGetOpaque is the same as VarHandleGet on x86-64 due to the x86 memory model.
3925 GenerateVarHandleGet(invoke, codegen_);
3926 }
3927
VisitVarHandleGetVolatile(HInvoke * invoke)3928 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
3929 CreateVarHandleGetLocations(invoke);
3930 }
3931
VisitVarHandleGetVolatile(HInvoke * invoke)3932 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
3933 // VarHandleGetVolatile is the same as VarHandleGet on x86-64 due to the x86 memory model.
3934 GenerateVarHandleGet(invoke, codegen_);
3935 }
3936
CreateVarHandleSetLocations(HInvoke * invoke)3937 static void CreateVarHandleSetLocations(HInvoke* invoke) {
3938 if (!HasVarHandleIntrinsicImplementation(invoke)) {
3939 return;
3940 }
3941
3942 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
3943
3944 // Extra temporary is used for card in MarkGCCard and to move 64-bit constants to memory.
3945 locations->AddTemp(Location::RequiresRegister());
3946 }
3947
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_volatile,bool is_atomic,bool byte_swap=false)3948 static void GenerateVarHandleSet(HInvoke* invoke,
3949 CodeGeneratorX86_64* codegen,
3950 bool is_volatile,
3951 bool is_atomic,
3952 bool byte_swap = false) {
3953 X86_64Assembler* assembler = codegen->GetAssembler();
3954
3955 LocationSummary* locations = invoke->GetLocations();
3956 const uint32_t last_temp_index = locations->GetTempCount() - 1;
3957
3958 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3959 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3960
3961 VarHandleTarget target = GetVarHandleTarget(invoke);
3962 VarHandleSlowPathX86_64* slow_path = nullptr;
3963 if (!byte_swap) {
3964 slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
3965 GenerateVarHandleTarget(invoke, target, codegen);
3966 if (slow_path != nullptr) {
3967 slow_path->SetVolatile(is_volatile);
3968 slow_path->SetAtomic(is_atomic);
3969 __ Bind(slow_path->GetNativeByteOrderLabel());
3970 }
3971 }
3972
3973 switch (invoke->GetIntrinsic()) {
3974 case Intrinsics::kVarHandleSetRelease:
3975 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
3976 break;
3977 case Intrinsics::kVarHandleSetVolatile:
3978 // setVolatile needs kAnyStore barrier, but HandleFieldSet takes care of that.
3979 break;
3980 default:
3981 // Other intrinsics don't need a barrier.
3982 break;
3983 }
3984
3985 Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
3986
3987 // Store the value to the field.
3988 codegen->GetInstructionCodegen()->HandleFieldSet(
3989 invoke,
3990 value_index,
3991 last_temp_index,
3992 value_type,
3993 dst,
3994 CpuRegister(target.object),
3995 is_volatile,
3996 is_atomic,
3997 /*value_can_be_null=*/true,
3998 byte_swap,
3999 // Value can be null, and this write barrier is not being relied on for other sets.
4000 WriteBarrierKind::kEmitWithNullCheck);
4001
4002 // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
4003
4004 if (slow_path != nullptr) {
4005 DCHECK(!byte_swap);
4006 __ Bind(slow_path->GetExitLabel());
4007 }
4008 }
4009
VisitVarHandleSet(HInvoke * invoke)4010 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) {
4011 CreateVarHandleSetLocations(invoke);
4012 }
4013
VisitVarHandleSet(HInvoke * invoke)4014 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
4015 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4016 }
4017
VisitVarHandleSetOpaque(HInvoke * invoke)4018 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4019 CreateVarHandleSetLocations(invoke);
4020 }
4021
VisitVarHandleSetOpaque(HInvoke * invoke)4022 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4023 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4024 }
4025
VisitVarHandleSetRelease(HInvoke * invoke)4026 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4027 CreateVarHandleSetLocations(invoke);
4028 }
4029
VisitVarHandleSetRelease(HInvoke * invoke)4030 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4031 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4032 }
4033
VisitVarHandleSetVolatile(HInvoke * invoke)4034 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4035 CreateVarHandleSetLocations(invoke);
4036 }
4037
VisitVarHandleSetVolatile(HInvoke * invoke)4038 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4039 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ true, /*is_atomic=*/ true);
4040 }
4041
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke)4042 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
4043 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4044 return;
4045 }
4046
4047 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4048 uint32_t expected_value_index = number_of_arguments - 2;
4049 uint32_t new_value_index = number_of_arguments - 1;
4050 DataType::Type return_type = invoke->GetType();
4051 DataType::Type expected_type = GetDataTypeFromShorty(invoke, expected_value_index);
4052 DCHECK_EQ(expected_type, GetDataTypeFromShorty(invoke, new_value_index));
4053
4054 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4055
4056 if (DataType::IsFloatingPointType(return_type)) {
4057 locations->SetOut(Location::RequiresFpuRegister());
4058 } else {
4059 // Take advantage of the fact that CMPXCHG writes result to RAX.
4060 locations->SetOut(Location::RegisterLocation(RAX));
4061 }
4062
4063 if (DataType::IsFloatingPointType(expected_type)) {
4064 // RAX is needed to load the expected floating-point value into a register for CMPXCHG.
4065 locations->AddTemp(Location::RegisterLocation(RAX));
4066 // Another temporary is needed to load the new floating-point value into a register for CMPXCHG.
4067 locations->AddTemp(Location::RequiresRegister());
4068 } else {
4069 // Ensure that expected value is in RAX, as required by CMPXCHG.
4070 locations->SetInAt(expected_value_index, Location::RegisterLocation(RAX));
4071 locations->SetInAt(new_value_index, Location::RequiresRegister());
4072 if (expected_type == DataType::Type::kReference) {
4073 // Need two temporaries for MarkGCCard.
4074 locations->AddTemp(Location::RequiresRegister());
4075 locations->AddTemp(Location::RequiresRegister());
4076 if (gUseReadBarrier) {
4077 // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
4078 DCHECK(kUseBakerReadBarrier);
4079 locations->AddTemp(Location::RequiresRegister());
4080 }
4081 }
4082 // RAX is clobbered in CMPXCHG, but no need to mark it as temporary as it's the output register.
4083 DCHECK_EQ(RAX, locations->Out().AsRegister<Register>());
4084 }
4085 }
4086
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_cmpxchg,bool byte_swap=false)4087 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4088 CodeGeneratorX86_64* codegen,
4089 bool is_cmpxchg,
4090 bool byte_swap = false) {
4091 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
4092
4093 X86_64Assembler* assembler = codegen->GetAssembler();
4094 LocationSummary* locations = invoke->GetLocations();
4095
4096 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4097 uint32_t expected_value_index = number_of_arguments - 2;
4098 uint32_t new_value_index = number_of_arguments - 1;
4099 DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4100
4101 VarHandleSlowPathX86_64* slow_path = nullptr;
4102 VarHandleTarget target = GetVarHandleTarget(invoke);
4103 if (!byte_swap) {
4104 slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4105 GenerateVarHandleTarget(invoke, target, codegen);
4106 if (slow_path != nullptr) {
4107 __ Bind(slow_path->GetNativeByteOrderLabel());
4108 }
4109 }
4110
4111 uint32_t temp_count = locations->GetTempCount();
4112 GenCompareAndSetOrExchange(codegen,
4113 invoke,
4114 type,
4115 CpuRegister(target.object),
4116 CpuRegister(target.offset),
4117 /*temp1_index=*/ temp_count - 1,
4118 /*temp2_index=*/ temp_count - 2,
4119 /*temp3_index=*/ temp_count - 3,
4120 locations->InAt(new_value_index),
4121 locations->InAt(expected_value_index),
4122 locations->Out(),
4123 is_cmpxchg,
4124 byte_swap);
4125
4126 // We are using LOCK CMPXCHG in all cases because there is no CAS equivalent that has weak
4127 // failure semantics. LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
4128
4129 if (slow_path != nullptr) {
4130 DCHECK(!byte_swap);
4131 __ Bind(slow_path->GetExitLabel());
4132 }
4133 }
4134
VisitVarHandleCompareAndSet(HInvoke * invoke)4135 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4136 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4137 }
4138
VisitVarHandleCompareAndSet(HInvoke * invoke)4139 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4140 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4141 }
4142
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4143 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4144 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4145 }
4146
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4147 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4148 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4149 }
4150
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4151 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4152 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4153 }
4154
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4155 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4156 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4157 }
4158
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4159 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4160 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4161 }
4162
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4163 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4164 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4165 }
4166
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4167 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4168 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4169 }
4170
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4171 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4172 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4173 }
4174
VisitVarHandleCompareAndExchange(HInvoke * invoke)4175 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4176 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4177 }
4178
VisitVarHandleCompareAndExchange(HInvoke * invoke)4179 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4180 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4181 }
4182
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4183 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4184 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4185 }
4186
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4187 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4188 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4189 }
4190
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4191 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4192 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4193 }
4194
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4195 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4196 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4197 }
4198
CreateVarHandleGetAndSetLocations(HInvoke * invoke)4199 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
4200 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4201 return;
4202 }
4203
4204 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4205 uint32_t new_value_index = number_of_arguments - 1;
4206 DataType::Type type = invoke->GetType();
4207 DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4208
4209 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4210
4211 if (DataType::IsFloatingPointType(type)) {
4212 locations->SetOut(Location::RequiresFpuRegister());
4213 // A temporary is needed to load the new floating-point value into a register for XCHG.
4214 locations->AddTemp(Location::RequiresRegister());
4215 } else {
4216 // Use the same register for both the new value and output to take advantage of XCHG.
4217 // It doesn't have to be RAX, but we need to choose some to make sure it's the same.
4218 locations->SetOut(Location::RegisterLocation(RAX));
4219 locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
4220 if (type == DataType::Type::kReference) {
4221 // Need two temporaries for MarkGCCard.
4222 locations->AddTemp(Location::RequiresRegister());
4223 locations->AddTemp(Location::RequiresRegister());
4224 if (gUseReadBarrier) {
4225 // Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier.
4226 DCHECK(kUseBakerReadBarrier);
4227 locations->AddTemp(Location::RequiresRegister());
4228 }
4229 }
4230 }
4231 }
4232
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,CpuRegister ref,bool byte_swap)4233 static void GenerateVarHandleGetAndSet(HInvoke* invoke,
4234 CodeGeneratorX86_64* codegen,
4235 Location value,
4236 DataType::Type type,
4237 Address field_addr,
4238 CpuRegister ref,
4239 bool byte_swap) {
4240 X86_64Assembler* assembler = codegen->GetAssembler();
4241 LocationSummary* locations = invoke->GetLocations();
4242 Location out = locations->Out();
4243 uint32_t temp_count = locations->GetTempCount();
4244
4245 if (DataType::IsFloatingPointType(type)) {
4246 // `getAndSet` for floating-point types: move the new FP value into a register, atomically
4247 // exchange it with the field, and move the old value into the output FP register.
4248 Location temp = locations->GetTemp(temp_count - 1);
4249 codegen->Move(temp, value);
4250 bool is64bit = (type == DataType::Type::kFloat64);
4251 DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
4252 if (byte_swap) {
4253 codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4254 }
4255 if (is64bit) {
4256 __ xchgq(temp.AsRegister<CpuRegister>(), field_addr);
4257 } else {
4258 __ xchgl(temp.AsRegister<CpuRegister>(), field_addr);
4259 }
4260 if (byte_swap) {
4261 codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4262 }
4263 __ movd(out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit);
4264 } else if (type == DataType::Type::kReference) {
4265 // `getAndSet` for references: load reference and atomically exchange it with the field.
4266 // Output register is the same as the one holding new value, so no need to move the result.
4267 DCHECK(!byte_swap);
4268
4269 CpuRegister temp1 = locations->GetTemp(temp_count - 1).AsRegister<CpuRegister>();
4270 CpuRegister temp2 = locations->GetTemp(temp_count - 2).AsRegister<CpuRegister>();
4271 CpuRegister valreg = value.AsRegister<CpuRegister>();
4272
4273 if (gUseReadBarrier && kUseBakerReadBarrier) {
4274 codegen->GenerateReferenceLoadWithBakerReadBarrier(
4275 invoke,
4276 locations->GetTemp(temp_count - 3),
4277 ref,
4278 field_addr,
4279 /*needs_null_check=*/ false,
4280 /*always_update_field=*/ true,
4281 &temp1,
4282 &temp2);
4283 }
4284 codegen->MarkGCCard(temp1, temp2, ref, valreg, /* emit_null_check= */ false);
4285
4286 DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
4287 if (kPoisonHeapReferences) {
4288 // Use a temp to avoid poisoning base of the field address, which might happen if `valreg` is
4289 // the same as `target.object` (for code like `vh.getAndSet(obj, obj)`).
4290 __ movl(temp1, valreg);
4291 __ PoisonHeapReference(temp1);
4292 __ xchgl(temp1, field_addr);
4293 __ UnpoisonHeapReference(temp1);
4294 __ movl(valreg, temp1);
4295 } else {
4296 __ xchgl(valreg, field_addr);
4297 }
4298 } else {
4299 // `getAndSet` for integral types: atomically exchange the new value with the field. Output
4300 // register is the same as the one holding new value. Do sign extend / zero extend as needed.
4301 if (byte_swap) {
4302 codegen->GetInstructionCodegen()->Bswap(value, type);
4303 }
4304 CpuRegister valreg = value.AsRegister<CpuRegister>();
4305 DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
4306 switch (type) {
4307 case DataType::Type::kBool:
4308 case DataType::Type::kUint8:
4309 __ xchgb(valreg, field_addr);
4310 __ movzxb(valreg, valreg);
4311 break;
4312 case DataType::Type::kInt8:
4313 __ xchgb(valreg, field_addr);
4314 __ movsxb(valreg, valreg);
4315 break;
4316 case DataType::Type::kUint16:
4317 __ xchgw(valreg, field_addr);
4318 __ movzxw(valreg, valreg);
4319 break;
4320 case DataType::Type::kInt16:
4321 __ xchgw(valreg, field_addr);
4322 __ movsxw(valreg, valreg);
4323 break;
4324 case DataType::Type::kInt32:
4325 case DataType::Type::kUint32:
4326 __ xchgl(valreg, field_addr);
4327 break;
4328 case DataType::Type::kInt64:
4329 case DataType::Type::kUint64:
4330 __ xchgq(valreg, field_addr);
4331 break;
4332 default:
4333 DCHECK(false) << "unexpected type in getAndSet intrinsic";
4334 UNREACHABLE();
4335 }
4336 if (byte_swap) {
4337 codegen->GetInstructionCodegen()->Bswap(value, type);
4338 }
4339 }
4340 }
4341
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke)4342 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) {
4343 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4344 return;
4345 }
4346
4347 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4348 uint32_t new_value_index = number_of_arguments - 1;
4349 DataType::Type type = invoke->GetType();
4350 DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4351
4352 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4353
4354 DCHECK_NE(DataType::Type::kReference, type);
4355 DCHECK(!DataType::IsFloatingPointType(type));
4356
4357 // A temporary to compute the bitwise operation on the old and the new values.
4358 locations->AddTemp(Location::RequiresRegister());
4359 // We need value to be either in a register, or a 32-bit constant (as there are no arithmetic
4360 // instructions that accept 64-bit immediate on x86_64).
4361 locations->SetInAt(new_value_index, DataType::Is64BitType(type)
4362 ? Location::RequiresRegister()
4363 : Location::RegisterOrConstant(invoke->InputAt(new_value_index)));
4364 // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary.
4365 locations->SetOut(Location::RegisterLocation(RAX));
4366 }
4367
GenerateVarHandleGetAndOp(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,GetAndUpdateOp get_and_update_op,bool byte_swap)4368 static void GenerateVarHandleGetAndOp(HInvoke* invoke,
4369 CodeGeneratorX86_64* codegen,
4370 Location value,
4371 DataType::Type type,
4372 Address field_addr,
4373 GetAndUpdateOp get_and_update_op,
4374 bool byte_swap) {
4375 X86_64Assembler* assembler = codegen->GetAssembler();
4376 LocationSummary* locations = invoke->GetLocations();
4377 Location temp_loc = locations->GetTemp(locations->GetTempCount() - 1);
4378 Location rax_loc = locations->Out();
4379 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4380 CpuRegister rax = rax_loc.AsRegister<CpuRegister>();
4381 DCHECK_EQ(rax.AsRegister(), RAX);
4382 bool is64Bit = DataType::Is64BitType(type);
4383
4384 NearLabel retry;
4385 __ Bind(&retry);
4386
4387 // Load field value into RAX and copy it into a temporary register for the operation.
4388 codegen->LoadFromMemoryNoReference(type, Location::RegisterLocation(RAX), field_addr);
4389 codegen->Move(temp_loc, rax_loc);
4390 if (byte_swap) {
4391 // Byte swap the temporary, since we need to perform operation in native endianness.
4392 codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4393 }
4394
4395 DCHECK_IMPLIES(value.IsConstant(), !is64Bit);
4396 int32_t const_value = value.IsConstant()
4397 ? CodeGenerator::GetInt32ValueOf(value.GetConstant())
4398 : 0;
4399
4400 // Use 32-bit registers for 8/16/32-bit types to save on the REX prefix.
4401 switch (get_and_update_op) {
4402 case GetAndUpdateOp::kAdd:
4403 DCHECK(byte_swap); // The non-byte-swapping path should use a faster XADD instruction.
4404 if (is64Bit) {
4405 __ addq(temp, value.AsRegister<CpuRegister>());
4406 } else if (value.IsConstant()) {
4407 __ addl(temp, Immediate(const_value));
4408 } else {
4409 __ addl(temp, value.AsRegister<CpuRegister>());
4410 }
4411 break;
4412 case GetAndUpdateOp::kBitwiseAnd:
4413 if (is64Bit) {
4414 __ andq(temp, value.AsRegister<CpuRegister>());
4415 } else if (value.IsConstant()) {
4416 __ andl(temp, Immediate(const_value));
4417 } else {
4418 __ andl(temp, value.AsRegister<CpuRegister>());
4419 }
4420 break;
4421 case GetAndUpdateOp::kBitwiseOr:
4422 if (is64Bit) {
4423 __ orq(temp, value.AsRegister<CpuRegister>());
4424 } else if (value.IsConstant()) {
4425 __ orl(temp, Immediate(const_value));
4426 } else {
4427 __ orl(temp, value.AsRegister<CpuRegister>());
4428 }
4429 break;
4430 case GetAndUpdateOp::kBitwiseXor:
4431 if (is64Bit) {
4432 __ xorq(temp, value.AsRegister<CpuRegister>());
4433 } else if (value.IsConstant()) {
4434 __ xorl(temp, Immediate(const_value));
4435 } else {
4436 __ xorl(temp, value.AsRegister<CpuRegister>());
4437 }
4438 break;
4439 default:
4440 DCHECK(false) << "unexpected operation";
4441 UNREACHABLE();
4442 }
4443
4444 if (byte_swap) {
4445 // RAX still contains the original value, but we need to byte swap the temporary back.
4446 codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4447 }
4448
4449 switch (type) {
4450 case DataType::Type::kBool:
4451 case DataType::Type::kUint8:
4452 case DataType::Type::kInt8:
4453 __ LockCmpxchgb(field_addr, temp);
4454 break;
4455 case DataType::Type::kUint16:
4456 case DataType::Type::kInt16:
4457 __ LockCmpxchgw(field_addr, temp);
4458 break;
4459 case DataType::Type::kInt32:
4460 case DataType::Type::kUint32:
4461 __ LockCmpxchgl(field_addr, temp);
4462 break;
4463 case DataType::Type::kInt64:
4464 case DataType::Type::kUint64:
4465 __ LockCmpxchgq(field_addr, temp);
4466 break;
4467 default:
4468 DCHECK(false) << "unexpected type in getAndBitwiseOp intrinsic";
4469 UNREACHABLE();
4470 }
4471
4472 __ j(kNotZero, &retry);
4473
4474 // The result is in RAX after CMPXCHG. Byte swap if necessary, but do not sign/zero extend,
4475 // as it has already been done by `LoadFromMemoryNoReference` above (and not altered by CMPXCHG).
4476 if (byte_swap) {
4477 codegen->GetInstructionCodegen()->Bswap(rax_loc, type);
4478 }
4479 }
4480
CreateVarHandleGetAndAddLocations(HInvoke * invoke)4481 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
4482 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4483 return;
4484 }
4485
4486 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4487 uint32_t new_value_index = number_of_arguments - 1;
4488 DataType::Type type = invoke->GetType();
4489 DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4490
4491 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4492
4493 if (DataType::IsFloatingPointType(type)) {
4494 locations->SetOut(Location::RequiresFpuRegister());
4495 // Require that the new FP value is in a register (and not a constant) for ADDSS/ADDSD.
4496 locations->SetInAt(new_value_index, Location::RequiresFpuRegister());
4497 // CMPXCHG clobbers RAX.
4498 locations->AddTemp(Location::RegisterLocation(RAX));
4499 // An FP temporary to load the old value from the field and perform FP addition.
4500 locations->AddTemp(Location::RequiresFpuRegister());
4501 // A temporary to hold the new value for CMPXCHG.
4502 locations->AddTemp(Location::RequiresRegister());
4503 } else {
4504 DCHECK_NE(type, DataType::Type::kReference);
4505 // Use the same register for both the new value and output to take advantage of XADD.
4506 // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls
4507 // back to GenerateVarHandleGetAndOp that expects out in RAX.
4508 locations->SetOut(Location::RegisterLocation(RAX));
4509 locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
4510 if (GetExpectedVarHandleCoordinatesCount(invoke) == 2) {
4511 // For byte array views with non-native endianness we need extra BSWAP operations, so we
4512 // cannot use XADD and have to fallback to a generic implementation based on CMPXCH. In that
4513 // case we need two temporary registers: one to hold value instead of RAX (which may get
4514 // clobbered by repeated CMPXCHG) and one for performing the operation. At compile time we
4515 // cannot distinguish this case from arrays or native-endian byte array views.
4516 locations->AddTemp(Location::RequiresRegister());
4517 locations->AddTemp(Location::RequiresRegister());
4518 }
4519 }
4520 }
4521
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,bool byte_swap)4522 static void GenerateVarHandleGetAndAdd(HInvoke* invoke,
4523 CodeGeneratorX86_64* codegen,
4524 Location value,
4525 DataType::Type type,
4526 Address field_addr,
4527 bool byte_swap) {
4528 X86_64Assembler* assembler = codegen->GetAssembler();
4529 LocationSummary* locations = invoke->GetLocations();
4530 Location out = locations->Out();
4531 uint32_t temp_count = locations->GetTempCount();
4532
4533 if (DataType::IsFloatingPointType(type)) {
4534 if (byte_swap) {
4535 // This code should never be executed: it is the case of a byte array view (since it requires
4536 // a byte swap), and varhandles for byte array views support numeric atomic update access mode
4537 // only for int and long, but not for floating-point types (see javadoc comments for
4538 // java.lang.invoke.MethodHandles.byteArrayViewVarHandle()). But ART varhandle implementation
4539 // for byte array views treats floating-point types them as numeric types in
4540 // ByteArrayViewVarHandle::Access(). Terefore we do generate intrinsic code, but it always
4541 // fails access mode check at runtime prior to reaching this point. Illegal instruction UD2
4542 // ensures that if control flow gets here by mistake, we will notice.
4543 __ ud2();
4544 }
4545
4546 // `getAndAdd` for floating-point types: load the old FP value into a temporary FP register and
4547 // in RAX for CMPXCHG, add the new FP value to the old one, move it to a non-FP temporary for
4548 // CMPXCHG and loop until CMPXCHG succeeds. Move the result from RAX to the output FP register.
4549 bool is64bit = (type == DataType::Type::kFloat64);
4550 DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
4551 XmmRegister fptemp = locations->GetTemp(temp_count - 2).AsFpuRegister<XmmRegister>();
4552 Location rax_loc = Location::RegisterLocation(RAX);
4553 Location temp_loc = locations->GetTemp(temp_count - 1);
4554 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4555
4556 NearLabel retry;
4557 __ Bind(&retry);
4558
4559 // Read value from memory into an FP register and copy in into RAX.
4560 if (is64bit) {
4561 __ movsd(fptemp, field_addr);
4562 } else {
4563 __ movss(fptemp, field_addr);
4564 }
4565 __ movd(CpuRegister(RAX), fptemp, is64bit);
4566 // If necessary, byte swap RAX and update the value in FP register to also be byte-swapped.
4567 if (byte_swap) {
4568 codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
4569 __ movd(fptemp, CpuRegister(RAX), is64bit);
4570 }
4571 // Perform the FP addition and move it to a temporary register to prepare for CMPXCHG.
4572 if (is64bit) {
4573 __ addsd(fptemp, value.AsFpuRegister<XmmRegister>());
4574 } else {
4575 __ addss(fptemp, value.AsFpuRegister<XmmRegister>());
4576 }
4577 __ movd(temp, fptemp, is64bit);
4578 // If necessary, byte swap RAX before CMPXCHG and the temporary before copying to FP register.
4579 if (byte_swap) {
4580 codegen->GetInstructionCodegen()->Bswap(temp_loc, bswap_type);
4581 codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
4582 }
4583 if (is64bit) {
4584 __ LockCmpxchgq(field_addr, temp);
4585 } else {
4586 __ LockCmpxchgl(field_addr, temp);
4587 }
4588
4589 __ j(kNotZero, &retry);
4590
4591 // The old value is in RAX, byte swap if necessary.
4592 if (byte_swap) {
4593 codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
4594 }
4595 __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
4596 } else {
4597 if (byte_swap) {
4598 // We cannot use XADD since we need to byte-swap the old value when reading it from memory,
4599 // and then byte-swap the sum before writing it to memory. So fallback to the slower generic
4600 // implementation that is also used for bitwise operations.
4601 // Move value from RAX to a temporary register, as RAX may get clobbered by repeated CMPXCHG.
4602 DCHECK_EQ(GetExpectedVarHandleCoordinatesCount(invoke), 2u);
4603 Location temp = locations->GetTemp(temp_count - 2);
4604 codegen->Move(temp, value);
4605 GenerateVarHandleGetAndOp(
4606 invoke, codegen, temp, type, field_addr, GetAndUpdateOp::kAdd, byte_swap);
4607 } else {
4608 // `getAndAdd` for integral types: atomically exchange the new value with the field and add
4609 // the old value to the field. Output register is the same as the one holding new value. Do
4610 // sign extend / zero extend as needed.
4611 CpuRegister valreg = value.AsRegister<CpuRegister>();
4612 DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
4613 switch (type) {
4614 case DataType::Type::kBool:
4615 case DataType::Type::kUint8:
4616 __ LockXaddb(field_addr, valreg);
4617 __ movzxb(valreg, valreg);
4618 break;
4619 case DataType::Type::kInt8:
4620 __ LockXaddb(field_addr, valreg);
4621 __ movsxb(valreg, valreg);
4622 break;
4623 case DataType::Type::kUint16:
4624 __ LockXaddw(field_addr, valreg);
4625 __ movzxw(valreg, valreg);
4626 break;
4627 case DataType::Type::kInt16:
4628 __ LockXaddw(field_addr, valreg);
4629 __ movsxw(valreg, valreg);
4630 break;
4631 case DataType::Type::kInt32:
4632 case DataType::Type::kUint32:
4633 __ LockXaddl(field_addr, valreg);
4634 break;
4635 case DataType::Type::kInt64:
4636 case DataType::Type::kUint64:
4637 __ LockXaddq(field_addr, valreg);
4638 break;
4639 default:
4640 DCHECK(false) << "unexpected type in getAndAdd intrinsic";
4641 UNREACHABLE();
4642 }
4643 }
4644 }
4645 }
4646
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op,bool need_any_store_barrier,bool need_any_any_barrier,bool byte_swap=false)4647 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
4648 CodeGeneratorX86_64* codegen,
4649 GetAndUpdateOp get_and_update_op,
4650 bool need_any_store_barrier,
4651 bool need_any_any_barrier,
4652 bool byte_swap = false) {
4653 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
4654
4655 X86_64Assembler* assembler = codegen->GetAssembler();
4656 LocationSummary* locations = invoke->GetLocations();
4657
4658 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4659 Location value = locations->InAt(number_of_arguments - 1);
4660 DataType::Type type = invoke->GetType();
4661
4662 VarHandleSlowPathX86_64* slow_path = nullptr;
4663 VarHandleTarget target = GetVarHandleTarget(invoke);
4664 if (!byte_swap) {
4665 slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4666 GenerateVarHandleTarget(invoke, target, codegen);
4667 if (slow_path != nullptr) {
4668 slow_path->SetGetAndUpdateOp(get_and_update_op);
4669 slow_path->SetNeedAnyStoreBarrier(need_any_store_barrier);
4670 slow_path->SetNeedAnyAnyBarrier(need_any_any_barrier);
4671 __ Bind(slow_path->GetNativeByteOrderLabel());
4672 }
4673 }
4674
4675 CpuRegister ref(target.object);
4676 Address field_addr(ref, CpuRegister(target.offset), TIMES_1, 0);
4677
4678 if (need_any_store_barrier) {
4679 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4680 }
4681
4682 switch (get_and_update_op) {
4683 case GetAndUpdateOp::kSet:
4684 GenerateVarHandleGetAndSet(invoke, codegen, value, type, field_addr, ref, byte_swap);
4685 break;
4686 case GetAndUpdateOp::kAdd:
4687 GenerateVarHandleGetAndAdd(invoke, codegen, value, type, field_addr, byte_swap);
4688 break;
4689 case GetAndUpdateOp::kBitwiseAnd:
4690 case GetAndUpdateOp::kBitwiseOr:
4691 case GetAndUpdateOp::kBitwiseXor:
4692 GenerateVarHandleGetAndOp(
4693 invoke, codegen, value, type, field_addr, get_and_update_op, byte_swap);
4694 break;
4695 }
4696
4697 if (need_any_any_barrier) {
4698 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4699 }
4700
4701 if (slow_path != nullptr) {
4702 DCHECK(!byte_swap);
4703 __ Bind(slow_path->GetExitLabel());
4704 }
4705 }
4706
VisitVarHandleGetAndSet(HInvoke * invoke)4707 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4708 CreateVarHandleGetAndSetLocations(invoke);
4709 }
4710
VisitVarHandleGetAndSet(HInvoke * invoke)4711 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
4712 // `getAndSet` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4713 GenerateVarHandleGetAndUpdate(invoke,
4714 codegen_,
4715 GetAndUpdateOp::kSet,
4716 /*need_any_store_barrier=*/ true,
4717 /*need_any_any_barrier=*/ true);
4718 }
4719
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4720 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4721 CreateVarHandleGetAndSetLocations(invoke);
4722 }
4723
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4724 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4725 // `getAndSetAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4726 GenerateVarHandleGetAndUpdate(invoke,
4727 codegen_,
4728 GetAndUpdateOp::kSet,
4729 /*need_any_store_barrier=*/ false,
4730 /*need_any_any_barrier=*/ false);
4731 }
4732
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4733 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4734 CreateVarHandleGetAndSetLocations(invoke);
4735 }
4736
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4737 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4738 // `getAndSetRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4739 GenerateVarHandleGetAndUpdate(invoke,
4740 codegen_,
4741 GetAndUpdateOp::kSet,
4742 /*need_any_store_barrier=*/ true,
4743 /*need_any_any_barrier=*/ false);
4744 }
4745
VisitVarHandleGetAndAdd(HInvoke * invoke)4746 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4747 CreateVarHandleGetAndAddLocations(invoke);
4748 }
4749
VisitVarHandleGetAndAdd(HInvoke * invoke)4750 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4751 // `getAndAdd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4752 GenerateVarHandleGetAndUpdate(invoke,
4753 codegen_,
4754 GetAndUpdateOp::kAdd,
4755 /*need_any_store_barrier=*/ true,
4756 /*need_any_any_barrier=*/ true);
4757 }
4758
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4759 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4760 CreateVarHandleGetAndAddLocations(invoke);
4761 }
4762
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4763 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4764 // `getAndAddAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4765 GenerateVarHandleGetAndUpdate(invoke,
4766 codegen_,
4767 GetAndUpdateOp::kAdd,
4768 /*need_any_store_barrier=*/ false,
4769 /*need_any_any_barrier=*/ false);
4770 }
4771
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4772 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4773 CreateVarHandleGetAndAddLocations(invoke);
4774 }
4775
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4776 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4777 // `getAndAddRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4778 GenerateVarHandleGetAndUpdate(invoke,
4779 codegen_,
4780 GetAndUpdateOp::kAdd,
4781 /*need_any_store_barrier=*/ true,
4782 /*need_any_any_barrier=*/ false);
4783 }
4784
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4785 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4786 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4787 }
4788
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4789 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4790 // `getAndBitwiseAnd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4791 GenerateVarHandleGetAndUpdate(invoke,
4792 codegen_,
4793 GetAndUpdateOp::kBitwiseAnd,
4794 /*need_any_store_barrier=*/ true,
4795 /*need_any_any_barrier=*/ true);
4796 }
4797
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4798 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4799 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4800 }
4801
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4802 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4803 // `getAndBitwiseAndAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4804 GenerateVarHandleGetAndUpdate(invoke,
4805 codegen_,
4806 GetAndUpdateOp::kBitwiseAnd,
4807 /*need_any_store_barrier=*/ false,
4808 /*need_any_any_barrier=*/ false);
4809 }
4810
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4811 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4812 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4813 }
4814
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4815 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4816 // `getAndBitwiseAndRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4817 GenerateVarHandleGetAndUpdate(invoke,
4818 codegen_,
4819 GetAndUpdateOp::kBitwiseAnd,
4820 /*need_any_store_barrier=*/ true,
4821 /*need_any_any_barrier=*/ false);
4822 }
4823
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4824 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4825 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4826 }
4827
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4828 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4829 // `getAndBitwiseOr` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4830 GenerateVarHandleGetAndUpdate(invoke,
4831 codegen_,
4832 GetAndUpdateOp::kBitwiseOr,
4833 /*need_any_store_barrier=*/ true,
4834 /*need_any_any_barrier=*/ true);
4835 }
4836
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4837 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4838 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4839 }
4840
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4841 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4842 // `getAndBitwiseOrAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4843 GenerateVarHandleGetAndUpdate(invoke,
4844 codegen_,
4845 GetAndUpdateOp::kBitwiseOr,
4846 /*need_any_store_barrier=*/ false,
4847 /*need_any_any_barrier=*/ false);
4848 }
4849
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4850 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4851 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4852 }
4853
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4854 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4855 // `getAndBitwiseOrRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4856 GenerateVarHandleGetAndUpdate(invoke,
4857 codegen_,
4858 GetAndUpdateOp::kBitwiseOr,
4859 /*need_any_store_barrier=*/ true,
4860 /*need_any_any_barrier=*/ false);
4861 }
4862
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4863 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4864 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4865 }
4866
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4867 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4868 // `getAndBitwiseXor` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
4869 GenerateVarHandleGetAndUpdate(invoke,
4870 codegen_,
4871 GetAndUpdateOp::kBitwiseXor,
4872 /*need_any_store_barrier=*/ true,
4873 /*need_any_any_barrier=*/ true);
4874 }
4875
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4876 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4877 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4878 }
4879
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4880 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4881 // `getAndBitwiseXorAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
4882 GenerateVarHandleGetAndUpdate(invoke,
4883 codegen_,
4884 GetAndUpdateOp::kBitwiseXor,
4885 /*need_any_store_barrier=*/ false,
4886 /*need_any_any_barrier=*/ false);
4887 }
4888
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4889 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4890 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4891 }
4892
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4893 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4894 // `getAndBitwiseXorRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
4895 GenerateVarHandleGetAndUpdate(invoke,
4896 codegen_,
4897 GetAndUpdateOp::kBitwiseXor,
4898 /*need_any_store_barrier=*/ true,
4899 /*need_any_any_barrier=*/ false);
4900 }
4901
EmitByteArrayViewCode(CodeGeneratorX86_64 * codegen)4902 void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen) {
4903 DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
4904 X86_64Assembler* assembler = codegen->GetAssembler();
4905
4906 HInvoke* invoke = GetInvoke();
4907 LocationSummary* locations = invoke->GetLocations();
4908 mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
4909 DataType::Type value_type =
4910 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4911 DCHECK_NE(value_type, DataType::Type::kReference);
4912 size_t size = DataType::Size(value_type);
4913 DCHECK_GT(size, 1u);
4914
4915 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
4916 CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
4917 CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
4918 CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1).AsRegister<CpuRegister>();
4919
4920 MemberOffset class_offset = mirror::Object::ClassOffset();
4921 MemberOffset array_length_offset = mirror::Array::LengthOffset();
4922 MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
4923 MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
4924
4925 VarHandleTarget target = GetVarHandleTarget(invoke);
4926
4927 __ Bind(GetByteArrayViewCheckLabel());
4928
4929 // The main path checked that the coordinateType0 is an array class that matches
4930 // the class of the actual coordinate argument but it does not match the value type.
4931 // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
4932 codegen->LoadClassRootForIntrinsic(temp, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
4933 assembler->MaybePoisonHeapReference(temp);
4934 __ cmpl(temp, Address(varhandle, class_offset.Int32Value()));
4935 __ j(kNotEqual, GetEntryLabel());
4936
4937 // Check for array index out of bounds.
4938 __ movl(temp, Address(object, array_length_offset.Int32Value()));
4939 // SUB sets flags in the same way as CMP.
4940 __ subl(temp, index);
4941 __ j(kBelowEqual, GetEntryLabel());
4942 // The difference between index and array length must be enough for the `value_type` size.
4943 __ cmpl(temp, Immediate(size));
4944 __ j(kBelow, GetEntryLabel());
4945
4946 // Construct the target.
4947 __ leal(CpuRegister(target.offset), Address(index, TIMES_1, data_offset.Int32Value()));
4948
4949 // Alignment check. For unaligned access, go to the runtime.
4950 DCHECK(IsPowerOfTwo(size));
4951 __ testl(CpuRegister(target.offset), Immediate(size - 1u));
4952 __ j(kNotZero, GetEntryLabel());
4953
4954 // Byte order check. For native byte order return to the main path.
4955 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
4956 IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
4957 // There is no reason to differentiate between native byte order and byte-swap
4958 // for setting a zero bit pattern. Just return to the main path.
4959 __ jmp(GetNativeByteOrderLabel());
4960 return;
4961 }
4962 __ cmpl(Address(varhandle, native_byte_order_offset.Int32Value()), Immediate(0));
4963 __ j(kNotEqual, GetNativeByteOrderLabel());
4964
4965 switch (access_mode_template) {
4966 case mirror::VarHandle::AccessModeTemplate::kGet:
4967 GenerateVarHandleGet(invoke, codegen, /*byte_swap=*/ true);
4968 break;
4969 case mirror::VarHandle::AccessModeTemplate::kSet:
4970 GenerateVarHandleSet(invoke, codegen, is_volatile_, is_atomic_, /*byte_swap=*/ true);
4971 break;
4972 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
4973 GenerateVarHandleCompareAndSetOrExchange(
4974 invoke, codegen, /*is_cmpxchg=*/ false, /*byte_swap=*/ true);
4975 break;
4976 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
4977 GenerateVarHandleCompareAndSetOrExchange(
4978 invoke, codegen, /*is_cmpxchg=*/ true, /*byte_swap=*/ true);
4979 break;
4980 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
4981 GenerateVarHandleGetAndUpdate(invoke,
4982 codegen,
4983 get_and_update_op_,
4984 need_any_store_barrier_,
4985 need_any_any_barrier_,
4986 /*byte_swap=*/ true);
4987 break;
4988 }
4989
4990 __ jmp(GetExitLabel());
4991 }
4992
4993 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name)
4994 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED);
4995 #undef MARK_UNIMPLEMENTED
4996
4997 UNREACHABLE_INTRINSICS(X86_64)
4998
4999 #undef __
5000
5001 } // namespace x86_64
5002 } // namespace art
5003