1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86_64.h"
18
19 #include <limits>
20
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "art_method-inl.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86_64.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "intrinsics.h"
27 #include "intrinsics_utils.h"
28 #include "mirror/array-inl.h"
29 #include "mirror/string.h"
30 #include "thread.h"
31 #include "utils/x86_64/assembler_x86_64.h"
32 #include "utils/x86_64/constants_x86_64.h"
33
34 namespace art {
35
36 namespace x86_64 {
37
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)38 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
39 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
40 }
41
42
GetAssembler()43 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
44 return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
45 }
46
GetAllocator()47 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
48 return codegen_->GetGraph()->GetArena();
49 }
50
TryDispatch(HInvoke * invoke)51 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
52 Dispatch(invoke);
53 LocationSummary* res = invoke->GetLocations();
54 if (res == nullptr) {
55 return false;
56 }
57 if (kEmitCompilerReadBarrier && res->CanCall()) {
58 // Generating an intrinsic for this HInvoke may produce an
59 // IntrinsicSlowPathX86_64 slow path. Currently this approach
60 // does not work when using read barriers, as the emitted
61 // calling sequence will make use of another slow path
62 // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect,
63 // ReadBarrierSlowPathX86_64 for HInvokeVirtual). So we bail
64 // out in this case.
65 //
66 // TODO: Find a way to have intrinsics work with read barriers.
67 invoke->SetLocations(nullptr);
68 return false;
69 }
70 return res->Intrinsified();
71 }
72
MoveArguments(HInvoke * invoke,CodeGeneratorX86_64 * codegen)73 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
74 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
75 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
76 }
77
78 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
79
80 #define __ assembler->
81
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke)82 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
83 LocationSummary* locations = new (arena) LocationSummary(invoke,
84 LocationSummary::kNoCall,
85 kIntrinsified);
86 locations->SetInAt(0, Location::RequiresFpuRegister());
87 locations->SetOut(Location::RequiresRegister());
88 }
89
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke)90 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
91 LocationSummary* locations = new (arena) LocationSummary(invoke,
92 LocationSummary::kNoCall,
93 kIntrinsified);
94 locations->SetInAt(0, Location::RequiresRegister());
95 locations->SetOut(Location::RequiresFpuRegister());
96 }
97
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)98 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
99 Location input = locations->InAt(0);
100 Location output = locations->Out();
101 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
102 }
103
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)104 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
105 Location input = locations->InAt(0);
106 Location output = locations->Out();
107 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
108 }
109
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)110 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
111 CreateFPToIntLocations(arena_, invoke);
112 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)113 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
114 CreateIntToFPLocations(arena_, invoke);
115 }
116
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)117 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
118 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
119 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)120 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
121 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
122 }
123
VisitFloatFloatToRawIntBits(HInvoke * invoke)124 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
125 CreateFPToIntLocations(arena_, invoke);
126 }
VisitFloatIntBitsToFloat(HInvoke * invoke)127 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
128 CreateIntToFPLocations(arena_, invoke);
129 }
130
VisitFloatFloatToRawIntBits(HInvoke * invoke)131 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
132 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
133 }
VisitFloatIntBitsToFloat(HInvoke * invoke)134 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
135 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
136 }
137
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)138 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
139 LocationSummary* locations = new (arena) LocationSummary(invoke,
140 LocationSummary::kNoCall,
141 kIntrinsified);
142 locations->SetInAt(0, Location::RequiresRegister());
143 locations->SetOut(Location::SameAsFirstInput());
144 }
145
GenReverseBytes(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)146 static void GenReverseBytes(LocationSummary* locations,
147 Primitive::Type size,
148 X86_64Assembler* assembler) {
149 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
150
151 switch (size) {
152 case Primitive::kPrimShort:
153 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
154 __ bswapl(out);
155 __ sarl(out, Immediate(16));
156 break;
157 case Primitive::kPrimInt:
158 __ bswapl(out);
159 break;
160 case Primitive::kPrimLong:
161 __ bswapq(out);
162 break;
163 default:
164 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
165 UNREACHABLE();
166 }
167 }
168
VisitIntegerReverseBytes(HInvoke * invoke)169 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
170 CreateIntToIntLocations(arena_, invoke);
171 }
172
VisitIntegerReverseBytes(HInvoke * invoke)173 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
174 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
175 }
176
VisitLongReverseBytes(HInvoke * invoke)177 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
178 CreateIntToIntLocations(arena_, invoke);
179 }
180
VisitLongReverseBytes(HInvoke * invoke)181 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
182 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
183 }
184
VisitShortReverseBytes(HInvoke * invoke)185 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
186 CreateIntToIntLocations(arena_, invoke);
187 }
188
VisitShortReverseBytes(HInvoke * invoke)189 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
190 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
191 }
192
193
194 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
195 // need is 64b.
196
CreateFloatToFloatPlusTemps(ArenaAllocator * arena,HInvoke * invoke)197 static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
198 // TODO: Enable memory operations when the assembler supports them.
199 LocationSummary* locations = new (arena) LocationSummary(invoke,
200 LocationSummary::kNoCall,
201 kIntrinsified);
202 locations->SetInAt(0, Location::RequiresFpuRegister());
203 locations->SetOut(Location::SameAsFirstInput());
204 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
205 }
206
MathAbsFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen)207 static void MathAbsFP(LocationSummary* locations,
208 bool is64bit,
209 X86_64Assembler* assembler,
210 CodeGeneratorX86_64* codegen) {
211 Location output = locations->Out();
212
213 DCHECK(output.IsFpuRegister());
214 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
215
216 // TODO: Can mask directly with constant area using pand if we can guarantee
217 // that the literal is aligned on a 16 byte boundary. This will avoid a
218 // temporary.
219 if (is64bit) {
220 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
221 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
222 } else {
223 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
224 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
225 }
226 }
227
VisitMathAbsDouble(HInvoke * invoke)228 void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
229 CreateFloatToFloatPlusTemps(arena_, invoke);
230 }
231
VisitMathAbsDouble(HInvoke * invoke)232 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
233 MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
234 }
235
VisitMathAbsFloat(HInvoke * invoke)236 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
237 CreateFloatToFloatPlusTemps(arena_, invoke);
238 }
239
VisitMathAbsFloat(HInvoke * invoke)240 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
241 MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
242 }
243
CreateIntToIntPlusTemp(ArenaAllocator * arena,HInvoke * invoke)244 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
245 LocationSummary* locations = new (arena) LocationSummary(invoke,
246 LocationSummary::kNoCall,
247 kIntrinsified);
248 locations->SetInAt(0, Location::RequiresRegister());
249 locations->SetOut(Location::SameAsFirstInput());
250 locations->AddTemp(Location::RequiresRegister());
251 }
252
GenAbsInteger(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)253 static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
254 Location output = locations->Out();
255 CpuRegister out = output.AsRegister<CpuRegister>();
256 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
257
258 if (is64bit) {
259 // Create mask.
260 __ movq(mask, out);
261 __ sarq(mask, Immediate(63));
262 // Add mask.
263 __ addq(out, mask);
264 __ xorq(out, mask);
265 } else {
266 // Create mask.
267 __ movl(mask, out);
268 __ sarl(mask, Immediate(31));
269 // Add mask.
270 __ addl(out, mask);
271 __ xorl(out, mask);
272 }
273 }
274
VisitMathAbsInt(HInvoke * invoke)275 void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
276 CreateIntToIntPlusTemp(arena_, invoke);
277 }
278
VisitMathAbsInt(HInvoke * invoke)279 void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
280 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
281 }
282
VisitMathAbsLong(HInvoke * invoke)283 void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
284 CreateIntToIntPlusTemp(arena_, invoke);
285 }
286
VisitMathAbsLong(HInvoke * invoke)287 void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
288 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
289 }
290
GenMinMaxFP(LocationSummary * locations,bool is_min,bool is_double,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen)291 static void GenMinMaxFP(LocationSummary* locations,
292 bool is_min,
293 bool is_double,
294 X86_64Assembler* assembler,
295 CodeGeneratorX86_64* codegen) {
296 Location op1_loc = locations->InAt(0);
297 Location op2_loc = locations->InAt(1);
298 Location out_loc = locations->Out();
299 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
300
301 // Shortcut for same input locations.
302 if (op1_loc.Equals(op2_loc)) {
303 DCHECK(out_loc.Equals(op1_loc));
304 return;
305 }
306
307 // (out := op1)
308 // out <=? op2
309 // if Nan jmp Nan_label
310 // if out is min jmp done
311 // if op2 is min jmp op2_label
312 // handle -0/+0
313 // jmp done
314 // Nan_label:
315 // out := NaN
316 // op2_label:
317 // out := op2
318 // done:
319 //
320 // This removes one jmp, but needs to copy one input (op1) to out.
321 //
322 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
323
324 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
325
326 NearLabel nan, done, op2_label;
327 if (is_double) {
328 __ ucomisd(out, op2);
329 } else {
330 __ ucomiss(out, op2);
331 }
332
333 __ j(Condition::kParityEven, &nan);
334
335 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
336 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
337
338 // Handle 0.0/-0.0.
339 if (is_min) {
340 if (is_double) {
341 __ orpd(out, op2);
342 } else {
343 __ orps(out, op2);
344 }
345 } else {
346 if (is_double) {
347 __ andpd(out, op2);
348 } else {
349 __ andps(out, op2);
350 }
351 }
352 __ jmp(&done);
353
354 // NaN handling.
355 __ Bind(&nan);
356 if (is_double) {
357 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
358 } else {
359 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
360 }
361 __ jmp(&done);
362
363 // out := op2;
364 __ Bind(&op2_label);
365 if (is_double) {
366 __ movsd(out, op2);
367 } else {
368 __ movss(out, op2);
369 }
370
371 // Done.
372 __ Bind(&done);
373 }
374
CreateFPFPToFP(ArenaAllocator * arena,HInvoke * invoke)375 static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
376 LocationSummary* locations = new (arena) LocationSummary(invoke,
377 LocationSummary::kNoCall,
378 kIntrinsified);
379 locations->SetInAt(0, Location::RequiresFpuRegister());
380 locations->SetInAt(1, Location::RequiresFpuRegister());
381 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
382 // the second input to be the output (we can simply swap inputs).
383 locations->SetOut(Location::SameAsFirstInput());
384 }
385
VisitMathMinDoubleDouble(HInvoke * invoke)386 void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
387 CreateFPFPToFP(arena_, invoke);
388 }
389
VisitMathMinDoubleDouble(HInvoke * invoke)390 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
391 GenMinMaxFP(
392 invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
393 }
394
VisitMathMinFloatFloat(HInvoke * invoke)395 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
396 CreateFPFPToFP(arena_, invoke);
397 }
398
VisitMathMinFloatFloat(HInvoke * invoke)399 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
400 GenMinMaxFP(
401 invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
402 }
403
VisitMathMaxDoubleDouble(HInvoke * invoke)404 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
405 CreateFPFPToFP(arena_, invoke);
406 }
407
VisitMathMaxDoubleDouble(HInvoke * invoke)408 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
409 GenMinMaxFP(
410 invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
411 }
412
VisitMathMaxFloatFloat(HInvoke * invoke)413 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
414 CreateFPFPToFP(arena_, invoke);
415 }
416
VisitMathMaxFloatFloat(HInvoke * invoke)417 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
418 GenMinMaxFP(
419 invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
420 }
421
GenMinMax(LocationSummary * locations,bool is_min,bool is_long,X86_64Assembler * assembler)422 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
423 X86_64Assembler* assembler) {
424 Location op1_loc = locations->InAt(0);
425 Location op2_loc = locations->InAt(1);
426
427 // Shortcut for same input locations.
428 if (op1_loc.Equals(op2_loc)) {
429 // Can return immediately, as op1_loc == out_loc.
430 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
431 // a copy here.
432 DCHECK(locations->Out().Equals(op1_loc));
433 return;
434 }
435
436 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
437 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
438
439 // (out := op1)
440 // out <=? op2
441 // if out is min jmp done
442 // out := op2
443 // done:
444
445 if (is_long) {
446 __ cmpq(out, op2);
447 } else {
448 __ cmpl(out, op2);
449 }
450
451 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
452 }
453
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)454 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
455 LocationSummary* locations = new (arena) LocationSummary(invoke,
456 LocationSummary::kNoCall,
457 kIntrinsified);
458 locations->SetInAt(0, Location::RequiresRegister());
459 locations->SetInAt(1, Location::RequiresRegister());
460 locations->SetOut(Location::SameAsFirstInput());
461 }
462
VisitMathMinIntInt(HInvoke * invoke)463 void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
464 CreateIntIntToIntLocations(arena_, invoke);
465 }
466
VisitMathMinIntInt(HInvoke * invoke)467 void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
468 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
469 }
470
VisitMathMinLongLong(HInvoke * invoke)471 void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
472 CreateIntIntToIntLocations(arena_, invoke);
473 }
474
VisitMathMinLongLong(HInvoke * invoke)475 void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
476 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
477 }
478
VisitMathMaxIntInt(HInvoke * invoke)479 void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
480 CreateIntIntToIntLocations(arena_, invoke);
481 }
482
VisitMathMaxIntInt(HInvoke * invoke)483 void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
484 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
485 }
486
VisitMathMaxLongLong(HInvoke * invoke)487 void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
488 CreateIntIntToIntLocations(arena_, invoke);
489 }
490
VisitMathMaxLongLong(HInvoke * invoke)491 void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
492 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
493 }
494
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)495 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
496 LocationSummary* locations = new (arena) LocationSummary(invoke,
497 LocationSummary::kNoCall,
498 kIntrinsified);
499 locations->SetInAt(0, Location::RequiresFpuRegister());
500 locations->SetOut(Location::RequiresFpuRegister());
501 }
502
VisitMathSqrt(HInvoke * invoke)503 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
504 CreateFPToFPLocations(arena_, invoke);
505 }
506
VisitMathSqrt(HInvoke * invoke)507 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
508 LocationSummary* locations = invoke->GetLocations();
509 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
510 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
511
512 GetAssembler()->sqrtsd(out, in);
513 }
514
InvokeOutOfLineIntrinsic(CodeGeneratorX86_64 * codegen,HInvoke * invoke)515 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
516 MoveArguments(invoke, codegen);
517
518 DCHECK(invoke->IsInvokeStaticOrDirect());
519 codegen->GenerateStaticOrDirectCall(
520 invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
521 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
522
523 // Copy the result back to the expected output.
524 Location out = invoke->GetLocations()->Out();
525 if (out.IsValid()) {
526 DCHECK(out.IsRegister());
527 codegen->MoveFromReturnRegister(out, invoke->GetType());
528 }
529 }
530
CreateSSE41FPToFPLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86_64 * codegen)531 static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
532 HInvoke* invoke,
533 CodeGeneratorX86_64* codegen) {
534 // Do we have instruction support?
535 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
536 CreateFPToFPLocations(arena, invoke);
537 return;
538 }
539
540 // We have to fall back to a call to the intrinsic.
541 LocationSummary* locations = new (arena) LocationSummary(invoke,
542 LocationSummary::kCall);
543 InvokeRuntimeCallingConvention calling_convention;
544 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
545 locations->SetOut(Location::FpuRegisterLocation(XMM0));
546 // Needs to be RDI for the invoke.
547 locations->AddTemp(Location::RegisterLocation(RDI));
548 }
549
GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64 * codegen,HInvoke * invoke,X86_64Assembler * assembler,int round_mode)550 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
551 HInvoke* invoke,
552 X86_64Assembler* assembler,
553 int round_mode) {
554 LocationSummary* locations = invoke->GetLocations();
555 if (locations->WillCall()) {
556 InvokeOutOfLineIntrinsic(codegen, invoke);
557 } else {
558 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
559 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
560 __ roundsd(out, in, Immediate(round_mode));
561 }
562 }
563
VisitMathCeil(HInvoke * invoke)564 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
565 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
566 }
567
VisitMathCeil(HInvoke * invoke)568 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
569 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
570 }
571
VisitMathFloor(HInvoke * invoke)572 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
573 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
574 }
575
VisitMathFloor(HInvoke * invoke)576 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
577 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
578 }
579
VisitMathRint(HInvoke * invoke)580 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
581 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
582 }
583
VisitMathRint(HInvoke * invoke)584 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
585 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
586 }
587
CreateSSE41FPToIntLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86_64 * codegen)588 static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
589 HInvoke* invoke,
590 CodeGeneratorX86_64* codegen) {
591 // Do we have instruction support?
592 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
593 LocationSummary* locations = new (arena) LocationSummary(invoke,
594 LocationSummary::kNoCall,
595 kIntrinsified);
596 locations->SetInAt(0, Location::RequiresFpuRegister());
597 locations->SetOut(Location::RequiresRegister());
598 locations->AddTemp(Location::RequiresFpuRegister());
599 return;
600 }
601
602 // We have to fall back to a call to the intrinsic.
603 LocationSummary* locations = new (arena) LocationSummary(invoke,
604 LocationSummary::kCall);
605 InvokeRuntimeCallingConvention calling_convention;
606 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
607 locations->SetOut(Location::RegisterLocation(RAX));
608 // Needs to be RDI for the invoke.
609 locations->AddTemp(Location::RegisterLocation(RDI));
610 }
611
VisitMathRoundFloat(HInvoke * invoke)612 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
613 // See intrinsics.h.
614 if (kRoundIsPlusPointFive) {
615 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
616 }
617 }
618
VisitMathRoundFloat(HInvoke * invoke)619 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
620 LocationSummary* locations = invoke->GetLocations();
621 if (locations->WillCall()) {
622 InvokeOutOfLineIntrinsic(codegen_, invoke);
623 return;
624 }
625
626 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
627 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
628 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
629 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
630 NearLabel done, nan;
631 X86_64Assembler* assembler = GetAssembler();
632
633 // Load 0.5 into inPlusPointFive.
634 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
635
636 // Add in the input.
637 __ addss(inPlusPointFive, in);
638
639 // And truncate to an integer.
640 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
641
642 // Load maxInt into out.
643 codegen_->Load64BitValue(out, kPrimIntMax);
644
645 // if inPlusPointFive >= maxInt goto done
646 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
647 __ j(kAboveEqual, &done);
648
649 // if input == NaN goto nan
650 __ j(kUnordered, &nan);
651
652 // output = float-to-int-truncate(input)
653 __ cvttss2si(out, inPlusPointFive);
654 __ jmp(&done);
655 __ Bind(&nan);
656
657 // output = 0
658 __ xorl(out, out);
659 __ Bind(&done);
660 }
661
VisitMathRoundDouble(HInvoke * invoke)662 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
663 // See intrinsics.h.
664 if (kRoundIsPlusPointFive) {
665 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
666 }
667 }
668
VisitMathRoundDouble(HInvoke * invoke)669 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
670 LocationSummary* locations = invoke->GetLocations();
671 if (locations->WillCall()) {
672 InvokeOutOfLineIntrinsic(codegen_, invoke);
673 return;
674 }
675
676 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
677 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
678 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
679 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
680 NearLabel done, nan;
681 X86_64Assembler* assembler = GetAssembler();
682
683 // Load 0.5 into inPlusPointFive.
684 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
685
686 // Add in the input.
687 __ addsd(inPlusPointFive, in);
688
689 // And truncate to an integer.
690 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
691
692 // Load maxLong into out.
693 codegen_->Load64BitValue(out, kPrimLongMax);
694
695 // if inPlusPointFive >= maxLong goto done
696 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
697 __ j(kAboveEqual, &done);
698
699 // if input == NaN goto nan
700 __ j(kUnordered, &nan);
701
702 // output = double-to-long-truncate(input)
703 __ cvttsd2si(out, inPlusPointFive, /* is64bit */ true);
704 __ jmp(&done);
705 __ Bind(&nan);
706
707 // output = 0
708 __ xorl(out, out);
709 __ Bind(&done);
710 }
711
CreateFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)712 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
713 HInvoke* invoke) {
714 LocationSummary* locations = new (arena) LocationSummary(invoke,
715 LocationSummary::kCall,
716 kIntrinsified);
717 InvokeRuntimeCallingConvention calling_convention;
718 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
719 locations->SetOut(Location::FpuRegisterLocation(XMM0));
720
721 // We have to ensure that the native code doesn't clobber the XMM registers which are
722 // non-volatile for ART, but volatile for Native calls. This will ensure that they are
723 // saved in the prologue and properly restored.
724 for (auto fp_reg : non_volatile_xmm_regs) {
725 locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
726 }
727 }
728
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)729 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
730 QuickEntrypointEnum entry) {
731 LocationSummary* locations = invoke->GetLocations();
732 DCHECK(locations->WillCall());
733 DCHECK(invoke->IsInvokeStaticOrDirect());
734 X86_64Assembler* assembler = codegen->GetAssembler();
735
736 __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64WordSize>(entry), true));
737 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
738 }
739
VisitMathCos(HInvoke * invoke)740 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
741 CreateFPToFPCallLocations(arena_, invoke);
742 }
743
VisitMathCos(HInvoke * invoke)744 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
745 GenFPToFPCall(invoke, codegen_, kQuickCos);
746 }
747
VisitMathSin(HInvoke * invoke)748 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
749 CreateFPToFPCallLocations(arena_, invoke);
750 }
751
VisitMathSin(HInvoke * invoke)752 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
753 GenFPToFPCall(invoke, codegen_, kQuickSin);
754 }
755
VisitMathAcos(HInvoke * invoke)756 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
757 CreateFPToFPCallLocations(arena_, invoke);
758 }
759
VisitMathAcos(HInvoke * invoke)760 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
761 GenFPToFPCall(invoke, codegen_, kQuickAcos);
762 }
763
VisitMathAsin(HInvoke * invoke)764 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
765 CreateFPToFPCallLocations(arena_, invoke);
766 }
767
VisitMathAsin(HInvoke * invoke)768 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
769 GenFPToFPCall(invoke, codegen_, kQuickAsin);
770 }
771
VisitMathAtan(HInvoke * invoke)772 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
773 CreateFPToFPCallLocations(arena_, invoke);
774 }
775
VisitMathAtan(HInvoke * invoke)776 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
777 GenFPToFPCall(invoke, codegen_, kQuickAtan);
778 }
779
VisitMathCbrt(HInvoke * invoke)780 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
781 CreateFPToFPCallLocations(arena_, invoke);
782 }
783
VisitMathCbrt(HInvoke * invoke)784 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
785 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
786 }
787
VisitMathCosh(HInvoke * invoke)788 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
789 CreateFPToFPCallLocations(arena_, invoke);
790 }
791
VisitMathCosh(HInvoke * invoke)792 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
793 GenFPToFPCall(invoke, codegen_, kQuickCosh);
794 }
795
VisitMathExp(HInvoke * invoke)796 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
797 CreateFPToFPCallLocations(arena_, invoke);
798 }
799
VisitMathExp(HInvoke * invoke)800 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
801 GenFPToFPCall(invoke, codegen_, kQuickExp);
802 }
803
VisitMathExpm1(HInvoke * invoke)804 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
805 CreateFPToFPCallLocations(arena_, invoke);
806 }
807
VisitMathExpm1(HInvoke * invoke)808 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
809 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
810 }
811
VisitMathLog(HInvoke * invoke)812 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
813 CreateFPToFPCallLocations(arena_, invoke);
814 }
815
VisitMathLog(HInvoke * invoke)816 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
817 GenFPToFPCall(invoke, codegen_, kQuickLog);
818 }
819
VisitMathLog10(HInvoke * invoke)820 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
821 CreateFPToFPCallLocations(arena_, invoke);
822 }
823
VisitMathLog10(HInvoke * invoke)824 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
825 GenFPToFPCall(invoke, codegen_, kQuickLog10);
826 }
827
VisitMathSinh(HInvoke * invoke)828 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
829 CreateFPToFPCallLocations(arena_, invoke);
830 }
831
VisitMathSinh(HInvoke * invoke)832 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
833 GenFPToFPCall(invoke, codegen_, kQuickSinh);
834 }
835
VisitMathTan(HInvoke * invoke)836 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
837 CreateFPToFPCallLocations(arena_, invoke);
838 }
839
VisitMathTan(HInvoke * invoke)840 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
841 GenFPToFPCall(invoke, codegen_, kQuickTan);
842 }
843
VisitMathTanh(HInvoke * invoke)844 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
845 CreateFPToFPCallLocations(arena_, invoke);
846 }
847
VisitMathTanh(HInvoke * invoke)848 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
849 GenFPToFPCall(invoke, codegen_, kQuickTanh);
850 }
851
CreateFPFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)852 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
853 HInvoke* invoke) {
854 LocationSummary* locations = new (arena) LocationSummary(invoke,
855 LocationSummary::kCall,
856 kIntrinsified);
857 InvokeRuntimeCallingConvention calling_convention;
858 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
859 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
860 locations->SetOut(Location::FpuRegisterLocation(XMM0));
861
862 // We have to ensure that the native code doesn't clobber the XMM registers which are
863 // non-volatile for ART, but volatile for Native calls. This will ensure that they are
864 // saved in the prologue and properly restored.
865 for (auto fp_reg : non_volatile_xmm_regs) {
866 locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
867 }
868 }
869
VisitMathAtan2(HInvoke * invoke)870 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
871 CreateFPFPToFPCallLocations(arena_, invoke);
872 }
873
VisitMathAtan2(HInvoke * invoke)874 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
875 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
876 }
877
VisitMathHypot(HInvoke * invoke)878 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
879 CreateFPFPToFPCallLocations(arena_, invoke);
880 }
881
VisitMathHypot(HInvoke * invoke)882 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
883 GenFPToFPCall(invoke, codegen_, kQuickHypot);
884 }
885
VisitMathNextAfter(HInvoke * invoke)886 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
887 CreateFPFPToFPCallLocations(arena_, invoke);
888 }
889
VisitMathNextAfter(HInvoke * invoke)890 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
891 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
892 }
893
VisitStringCharAt(HInvoke * invoke)894 void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
895 // The inputs plus one temp.
896 LocationSummary* locations = new (arena_) LocationSummary(invoke,
897 LocationSummary::kCallOnSlowPath,
898 kIntrinsified);
899 locations->SetInAt(0, Location::RequiresRegister());
900 locations->SetInAt(1, Location::RequiresRegister());
901 locations->SetOut(Location::SameAsFirstInput());
902 locations->AddTemp(Location::RequiresRegister());
903 }
904
VisitStringCharAt(HInvoke * invoke)905 void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
906 LocationSummary* locations = invoke->GetLocations();
907
908 // Location of reference to data array.
909 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
910 // Location of count.
911 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
912
913 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
914 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
915 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
916
917 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
918 // the cost.
919 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
920 // we will not optimize the code for constants (which would save a register).
921
922 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
923 codegen_->AddSlowPath(slow_path);
924
925 X86_64Assembler* assembler = GetAssembler();
926
927 __ cmpl(idx, Address(obj, count_offset));
928 codegen_->MaybeRecordImplicitNullCheck(invoke);
929 __ j(kAboveEqual, slow_path->GetEntryLabel());
930
931 // out = out[2*idx].
932 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
933
934 __ Bind(slow_path->GetExitLabel());
935 }
936
VisitSystemArrayCopyChar(HInvoke * invoke)937 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
938 // Check to see if we have known failures that will cause us to have to bail out
939 // to the runtime, and just generate the runtime call directly.
940 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
941 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
942
943 // The positions must be non-negative.
944 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
945 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
946 // We will have to fail anyways.
947 return;
948 }
949
950 // The length must be > 0.
951 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
952 if (length != nullptr) {
953 int32_t len = length->GetValue();
954 if (len < 0) {
955 // Just call as normal.
956 return;
957 }
958 }
959
960 LocationSummary* locations = new (arena_) LocationSummary(invoke,
961 LocationSummary::kCallOnSlowPath,
962 kIntrinsified);
963 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
964 locations->SetInAt(0, Location::RequiresRegister());
965 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
966 locations->SetInAt(2, Location::RequiresRegister());
967 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
968 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
969
970 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
971 locations->AddTemp(Location::RegisterLocation(RSI));
972 locations->AddTemp(Location::RegisterLocation(RDI));
973 locations->AddTemp(Location::RegisterLocation(RCX));
974 }
975
CheckPosition(X86_64Assembler * assembler,Location pos,CpuRegister input,Location length,SlowPathCode * slow_path,CpuRegister input_len,CpuRegister temp,bool length_is_input_length=false)976 static void CheckPosition(X86_64Assembler* assembler,
977 Location pos,
978 CpuRegister input,
979 Location length,
980 SlowPathCode* slow_path,
981 CpuRegister input_len,
982 CpuRegister temp,
983 bool length_is_input_length = false) {
984 // Where is the length in the Array?
985 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
986
987 if (pos.IsConstant()) {
988 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
989 if (pos_const == 0) {
990 if (!length_is_input_length) {
991 // Check that length(input) >= length.
992 if (length.IsConstant()) {
993 __ cmpl(Address(input, length_offset),
994 Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
995 } else {
996 __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>());
997 }
998 __ j(kLess, slow_path->GetEntryLabel());
999 }
1000 } else {
1001 // Check that length(input) >= pos.
1002 __ movl(input_len, Address(input, length_offset));
1003 __ cmpl(input_len, Immediate(pos_const));
1004 __ j(kLess, slow_path->GetEntryLabel());
1005
1006 // Check that (length(input) - pos) >= length.
1007 __ leal(temp, Address(input_len, -pos_const));
1008 if (length.IsConstant()) {
1009 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1010 } else {
1011 __ cmpl(temp, length.AsRegister<CpuRegister>());
1012 }
1013 __ j(kLess, slow_path->GetEntryLabel());
1014 }
1015 } else if (length_is_input_length) {
1016 // The only way the copy can succeed is if pos is zero.
1017 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
1018 __ testl(pos_reg, pos_reg);
1019 __ j(kNotEqual, slow_path->GetEntryLabel());
1020 } else {
1021 // Check that pos >= 0.
1022 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
1023 __ testl(pos_reg, pos_reg);
1024 __ j(kLess, slow_path->GetEntryLabel());
1025
1026 // Check that pos <= length(input).
1027 __ cmpl(Address(input, length_offset), pos_reg);
1028 __ j(kLess, slow_path->GetEntryLabel());
1029
1030 // Check that (length(input) - pos) >= length.
1031 __ movl(temp, Address(input, length_offset));
1032 __ subl(temp, pos_reg);
1033 if (length.IsConstant()) {
1034 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1035 } else {
1036 __ cmpl(temp, length.AsRegister<CpuRegister>());
1037 }
1038 __ j(kLess, slow_path->GetEntryLabel());
1039 }
1040 }
1041
VisitSystemArrayCopyChar(HInvoke * invoke)1042 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
1043 X86_64Assembler* assembler = GetAssembler();
1044 LocationSummary* locations = invoke->GetLocations();
1045
1046 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
1047 Location src_pos = locations->InAt(1);
1048 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
1049 Location dest_pos = locations->InAt(3);
1050 Location length = locations->InAt(4);
1051
1052 // Temporaries that we need for MOVSW.
1053 CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
1054 DCHECK_EQ(src_base.AsRegister(), RSI);
1055 CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
1056 DCHECK_EQ(dest_base.AsRegister(), RDI);
1057 CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
1058 DCHECK_EQ(count.AsRegister(), RCX);
1059
1060 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1061 codegen_->AddSlowPath(slow_path);
1062
1063 // Bail out if the source and destination are the same.
1064 __ cmpl(src, dest);
1065 __ j(kEqual, slow_path->GetEntryLabel());
1066
1067 // Bail out if the source is null.
1068 __ testl(src, src);
1069 __ j(kEqual, slow_path->GetEntryLabel());
1070
1071 // Bail out if the destination is null.
1072 __ testl(dest, dest);
1073 __ j(kEqual, slow_path->GetEntryLabel());
1074
1075 // If the length is negative, bail out.
1076 // We have already checked in the LocationsBuilder for the constant case.
1077 if (!length.IsConstant()) {
1078 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1079 __ j(kLess, slow_path->GetEntryLabel());
1080 }
1081
1082 // Validity checks: source.
1083 CheckPosition(assembler, src_pos, src, length, slow_path, src_base, dest_base);
1084
1085 // Validity checks: dest.
1086 CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base, dest_base);
1087
1088 // We need the count in RCX.
1089 if (length.IsConstant()) {
1090 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1091 } else {
1092 __ movl(count, length.AsRegister<CpuRegister>());
1093 }
1094
1095 // Okay, everything checks out. Finally time to do the copy.
1096 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1097 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1098 DCHECK_EQ(char_size, 2u);
1099
1100 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1101
1102 if (src_pos.IsConstant()) {
1103 int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
1104 __ leal(src_base, Address(src, char_size * src_pos_const + data_offset));
1105 } else {
1106 __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(),
1107 ScaleFactor::TIMES_2, data_offset));
1108 }
1109 if (dest_pos.IsConstant()) {
1110 int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1111 __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset));
1112 } else {
1113 __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(),
1114 ScaleFactor::TIMES_2, data_offset));
1115 }
1116
1117 // Do the move.
1118 __ rep_movsw();
1119
1120 __ Bind(slow_path->GetExitLabel());
1121 }
1122
1123
VisitSystemArrayCopy(HInvoke * invoke)1124 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
1125 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1126 }
1127
1128 // TODO: Implement read barriers in the SystemArrayCopy intrinsic.
1129 // Note that this code path is not used (yet) because we do not
1130 // intrinsify methods that can go into the IntrinsicSlowPathX86_64
1131 // slow path.
VisitSystemArrayCopy(HInvoke * invoke)1132 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
1133 X86_64Assembler* assembler = GetAssembler();
1134 LocationSummary* locations = invoke->GetLocations();
1135
1136 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1137 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1138 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1139 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1140
1141 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
1142 Location src_pos = locations->InAt(1);
1143 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
1144 Location dest_pos = locations->InAt(3);
1145 Location length = locations->InAt(4);
1146 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1147 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1148 CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
1149
1150 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1151 codegen_->AddSlowPath(slow_path);
1152
1153 NearLabel conditions_on_positions_validated;
1154 SystemArrayCopyOptimizations optimizations(invoke);
1155
1156 // If source and destination are the same, we go to slow path if we need to do
1157 // forward copying.
1158 if (src_pos.IsConstant()) {
1159 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1160 if (dest_pos.IsConstant()) {
1161 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1162 if (optimizations.GetDestinationIsSource()) {
1163 // Checked when building locations.
1164 DCHECK_GE(src_pos_constant, dest_pos_constant);
1165 } else if (src_pos_constant < dest_pos_constant) {
1166 __ cmpl(src, dest);
1167 __ j(kEqual, slow_path->GetEntryLabel());
1168 }
1169 } else {
1170 if (!optimizations.GetDestinationIsSource()) {
1171 __ cmpl(src, dest);
1172 __ j(kNotEqual, &conditions_on_positions_validated);
1173 }
1174 __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
1175 __ j(kGreater, slow_path->GetEntryLabel());
1176 }
1177 } else {
1178 if (!optimizations.GetDestinationIsSource()) {
1179 __ cmpl(src, dest);
1180 __ j(kNotEqual, &conditions_on_positions_validated);
1181 }
1182 if (dest_pos.IsConstant()) {
1183 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1184 __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
1185 __ j(kLess, slow_path->GetEntryLabel());
1186 } else {
1187 __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
1188 __ j(kLess, slow_path->GetEntryLabel());
1189 }
1190 }
1191
1192 __ Bind(&conditions_on_positions_validated);
1193
1194 if (!optimizations.GetSourceIsNotNull()) {
1195 // Bail out if the source is null.
1196 __ testl(src, src);
1197 __ j(kEqual, slow_path->GetEntryLabel());
1198 }
1199
1200 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1201 // Bail out if the destination is null.
1202 __ testl(dest, dest);
1203 __ j(kEqual, slow_path->GetEntryLabel());
1204 }
1205
1206 // If the length is negative, bail out.
1207 // We have already checked in the LocationsBuilder for the constant case.
1208 if (!length.IsConstant() &&
1209 !optimizations.GetCountIsSourceLength() &&
1210 !optimizations.GetCountIsDestinationLength()) {
1211 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1212 __ j(kLess, slow_path->GetEntryLabel());
1213 }
1214
1215 // Validity checks: source.
1216 CheckPosition(assembler,
1217 src_pos,
1218 src,
1219 length,
1220 slow_path,
1221 temp1,
1222 temp2,
1223 optimizations.GetCountIsSourceLength());
1224
1225 // Validity checks: dest.
1226 CheckPosition(assembler,
1227 dest_pos,
1228 dest,
1229 length,
1230 slow_path,
1231 temp1,
1232 temp2,
1233 optimizations.GetCountIsDestinationLength());
1234
1235 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1236 // Check whether all elements of the source array are assignable to the component
1237 // type of the destination array. We do two checks: the classes are the same,
1238 // or the destination is Object[]. If none of these checks succeed, we go to the
1239 // slow path.
1240 __ movl(temp1, Address(dest, class_offset));
1241 __ movl(temp2, Address(src, class_offset));
1242 bool did_unpoison = false;
1243 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1244 !optimizations.GetSourceIsNonPrimitiveArray()) {
1245 // One or two of the references need to be unpoisoned. Unpoison them
1246 // both to make the identity check valid.
1247 __ MaybeUnpoisonHeapReference(temp1);
1248 __ MaybeUnpoisonHeapReference(temp2);
1249 did_unpoison = true;
1250 }
1251
1252 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1253 // Bail out if the destination is not a non primitive array.
1254 // /* HeapReference<Class> */ TMP = temp1->component_type_
1255 __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1256 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1257 __ j(kEqual, slow_path->GetEntryLabel());
1258 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1259 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1260 __ j(kNotEqual, slow_path->GetEntryLabel());
1261 }
1262
1263 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1264 // Bail out if the source is not a non primitive array.
1265 // /* HeapReference<Class> */ TMP = temp2->component_type_
1266 __ movl(CpuRegister(TMP), Address(temp2, component_offset));
1267 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1268 __ j(kEqual, slow_path->GetEntryLabel());
1269 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1270 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1271 __ j(kNotEqual, slow_path->GetEntryLabel());
1272 }
1273
1274 __ cmpl(temp1, temp2);
1275
1276 if (optimizations.GetDestinationIsTypedObjectArray()) {
1277 NearLabel do_copy;
1278 __ j(kEqual, &do_copy);
1279 if (!did_unpoison) {
1280 __ MaybeUnpoisonHeapReference(temp1);
1281 }
1282 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1283 __ movl(temp1, Address(temp1, component_offset));
1284 __ MaybeUnpoisonHeapReference(temp1);
1285 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1286 __ movl(temp1, Address(temp1, super_offset));
1287 // No need to unpoison the result, we're comparing against null.
1288 __ testl(temp1, temp1);
1289 __ j(kNotEqual, slow_path->GetEntryLabel());
1290 __ Bind(&do_copy);
1291 } else {
1292 __ j(kNotEqual, slow_path->GetEntryLabel());
1293 }
1294 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1295 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1296 // Bail out if the source is not a non primitive array.
1297 // /* HeapReference<Class> */ temp1 = src->klass_
1298 __ movl(temp1, Address(src, class_offset));
1299 __ MaybeUnpoisonHeapReference(temp1);
1300 // /* HeapReference<Class> */ TMP = temp1->component_type_
1301 __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1302 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1303 __ j(kEqual, slow_path->GetEntryLabel());
1304 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1305 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1306 __ j(kNotEqual, slow_path->GetEntryLabel());
1307 }
1308
1309 // Compute base source address, base destination address, and end source address.
1310
1311 uint32_t element_size = sizeof(int32_t);
1312 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
1313 if (src_pos.IsConstant()) {
1314 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
1315 __ leal(temp1, Address(src, element_size * constant + offset));
1316 } else {
1317 __ leal(temp1, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
1318 }
1319
1320 if (dest_pos.IsConstant()) {
1321 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
1322 __ leal(temp2, Address(dest, element_size * constant + offset));
1323 } else {
1324 __ leal(temp2, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
1325 }
1326
1327 if (length.IsConstant()) {
1328 int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
1329 __ leal(temp3, Address(temp1, element_size * constant));
1330 } else {
1331 __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
1332 }
1333
1334 // Iterate over the arrays and do a raw copy of the objects. We don't need to
1335 // poison/unpoison, nor do any read barrier as the next uses of the destination
1336 // array will do it.
1337 NearLabel loop, done;
1338 __ cmpl(temp1, temp3);
1339 __ j(kEqual, &done);
1340 __ Bind(&loop);
1341 __ movl(CpuRegister(TMP), Address(temp1, 0));
1342 __ movl(Address(temp2, 0), CpuRegister(TMP));
1343 __ addl(temp1, Immediate(element_size));
1344 __ addl(temp2, Immediate(element_size));
1345 __ cmpl(temp1, temp3);
1346 __ j(kNotEqual, &loop);
1347 __ Bind(&done);
1348
1349 // We only need one card marking on the destination array.
1350 codegen_->MarkGCCard(temp1,
1351 temp2,
1352 dest,
1353 CpuRegister(kNoRegister),
1354 /* value_can_be_null */ false);
1355
1356 __ Bind(slow_path->GetExitLabel());
1357 }
1358
VisitStringCompareTo(HInvoke * invoke)1359 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1360 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1361 LocationSummary::kCall,
1362 kIntrinsified);
1363 InvokeRuntimeCallingConvention calling_convention;
1364 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1365 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1366 locations->SetOut(Location::RegisterLocation(RAX));
1367 }
1368
VisitStringCompareTo(HInvoke * invoke)1369 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1370 X86_64Assembler* assembler = GetAssembler();
1371 LocationSummary* locations = invoke->GetLocations();
1372
1373 // Note that the null check must have been done earlier.
1374 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1375
1376 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1377 __ testl(argument, argument);
1378 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1379 codegen_->AddSlowPath(slow_path);
1380 __ j(kEqual, slow_path->GetEntryLabel());
1381
1382 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo),
1383 /* no_rip */ true));
1384 __ Bind(slow_path->GetExitLabel());
1385 }
1386
VisitStringEquals(HInvoke * invoke)1387 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1388 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1389 LocationSummary::kNoCall,
1390 kIntrinsified);
1391 locations->SetInAt(0, Location::RequiresRegister());
1392 locations->SetInAt(1, Location::RequiresRegister());
1393
1394 // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1395 locations->AddTemp(Location::RegisterLocation(RCX));
1396 locations->AddTemp(Location::RegisterLocation(RDI));
1397
1398 // Set output, RSI needed for repe_cmpsq instruction anyways.
1399 locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1400 }
1401
VisitStringEquals(HInvoke * invoke)1402 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1403 X86_64Assembler* assembler = GetAssembler();
1404 LocationSummary* locations = invoke->GetLocations();
1405
1406 CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1407 CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1408 CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1409 CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1410 CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1411
1412 NearLabel end, return_true, return_false;
1413
1414 // Get offsets of count, value, and class fields within a string object.
1415 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1416 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1417 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1418
1419 // Note that the null check must have been done earlier.
1420 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1421
1422 // Check if input is null, return false if it is.
1423 __ testl(arg, arg);
1424 __ j(kEqual, &return_false);
1425
1426 // Instanceof check for the argument by comparing class fields.
1427 // All string objects must have the same type since String cannot be subclassed.
1428 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1429 // If the argument is a string object, its class field must be equal to receiver's class field.
1430 __ movl(rcx, Address(str, class_offset));
1431 __ cmpl(rcx, Address(arg, class_offset));
1432 __ j(kNotEqual, &return_false);
1433
1434 // Reference equality check, return true if same reference.
1435 __ cmpl(str, arg);
1436 __ j(kEqual, &return_true);
1437
1438 // Load length of receiver string.
1439 __ movl(rcx, Address(str, count_offset));
1440 // Check if lengths are equal, return false if they're not.
1441 __ cmpl(rcx, Address(arg, count_offset));
1442 __ j(kNotEqual, &return_false);
1443 // Return true if both strings are empty.
1444 __ jrcxz(&return_true);
1445
1446 // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1447 __ leal(rsi, Address(str, value_offset));
1448 __ leal(rdi, Address(arg, value_offset));
1449
1450 // Divide string length by 4 and adjust for lengths not divisible by 4.
1451 __ addl(rcx, Immediate(3));
1452 __ shrl(rcx, Immediate(2));
1453
1454 // Assertions that must hold in order to compare strings 4 characters at a time.
1455 DCHECK_ALIGNED(value_offset, 8);
1456 static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1457
1458 // Loop to compare strings four characters at a time starting at the beginning of the string.
1459 __ repe_cmpsq();
1460 // If strings are not equal, zero flag will be cleared.
1461 __ j(kNotEqual, &return_false);
1462
1463 // Return true and exit the function.
1464 // If loop does not result in returning false, we return true.
1465 __ Bind(&return_true);
1466 __ movl(rsi, Immediate(1));
1467 __ jmp(&end);
1468
1469 // Return false and exit the function.
1470 __ Bind(&return_false);
1471 __ xorl(rsi, rsi);
1472 __ Bind(&end);
1473 }
1474
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1475 static void CreateStringIndexOfLocations(HInvoke* invoke,
1476 ArenaAllocator* allocator,
1477 bool start_at_zero) {
1478 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1479 LocationSummary::kCallOnSlowPath,
1480 kIntrinsified);
1481 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1482 locations->SetInAt(0, Location::RegisterLocation(RDI));
1483 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1484 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1485 // of the instruction explicitly.
1486 // Note: This works as we don't clobber RAX anywhere.
1487 locations->SetInAt(1, Location::RegisterLocation(RAX));
1488 if (!start_at_zero) {
1489 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1490 }
1491 // As we clobber RDI during execution anyways, also use it as the output.
1492 locations->SetOut(Location::SameAsFirstInput());
1493
1494 // repne scasw uses RCX as the counter.
1495 locations->AddTemp(Location::RegisterLocation(RCX));
1496 // Need another temporary to be able to compute the result.
1497 locations->AddTemp(Location::RequiresRegister());
1498 }
1499
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,ArenaAllocator * allocator,bool start_at_zero)1500 static void GenerateStringIndexOf(HInvoke* invoke,
1501 X86_64Assembler* assembler,
1502 CodeGeneratorX86_64* codegen,
1503 ArenaAllocator* allocator,
1504 bool start_at_zero) {
1505 LocationSummary* locations = invoke->GetLocations();
1506
1507 // Note that the null check must have been done earlier.
1508 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1509
1510 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1511 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1512 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1513 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1514 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1515
1516 // Check our assumptions for registers.
1517 DCHECK_EQ(string_obj.AsRegister(), RDI);
1518 DCHECK_EQ(search_value.AsRegister(), RAX);
1519 DCHECK_EQ(counter.AsRegister(), RCX);
1520 DCHECK_EQ(out.AsRegister(), RDI);
1521
1522 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1523 // or directly dispatch if we have a constant.
1524 SlowPathCode* slow_path = nullptr;
1525 if (invoke->InputAt(1)->IsIntConstant()) {
1526 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
1527 std::numeric_limits<uint16_t>::max()) {
1528 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1529 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1530 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
1531 codegen->AddSlowPath(slow_path);
1532 __ jmp(slow_path->GetEntryLabel());
1533 __ Bind(slow_path->GetExitLabel());
1534 return;
1535 }
1536 } else {
1537 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1538 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
1539 codegen->AddSlowPath(slow_path);
1540 __ j(kAbove, slow_path->GetEntryLabel());
1541 }
1542
1543 // From here down, we know that we are looking for a char that fits in 16 bits.
1544 // Location of reference to data array within the String object.
1545 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1546 // Location of count within the String object.
1547 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1548
1549 // Load string length, i.e., the count field of the string.
1550 __ movl(string_length, Address(string_obj, count_offset));
1551
1552 // Do a length check.
1553 // TODO: Support jecxz.
1554 NearLabel not_found_label;
1555 __ testl(string_length, string_length);
1556 __ j(kEqual, ¬_found_label);
1557
1558 if (start_at_zero) {
1559 // Number of chars to scan is the same as the string length.
1560 __ movl(counter, string_length);
1561
1562 // Move to the start of the string.
1563 __ addq(string_obj, Immediate(value_offset));
1564 } else {
1565 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1566
1567 // Do a start_index check.
1568 __ cmpl(start_index, string_length);
1569 __ j(kGreaterEqual, ¬_found_label);
1570
1571 // Ensure we have a start index >= 0;
1572 __ xorl(counter, counter);
1573 __ cmpl(start_index, Immediate(0));
1574 __ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough.
1575
1576 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1577 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1578
1579 // Now update ecx, the work counter: it's gonna be string.length - start_index.
1580 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
1581 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1582 }
1583
1584 // Everything is set up for repne scasw:
1585 // * Comparison address in RDI.
1586 // * Counter in ECX.
1587 __ repne_scasw();
1588
1589 // Did we find a match?
1590 __ j(kNotEqual, ¬_found_label);
1591
1592 // Yes, we matched. Compute the index of the result.
1593 __ subl(string_length, counter);
1594 __ leal(out, Address(string_length, -1));
1595
1596 NearLabel done;
1597 __ jmp(&done);
1598
1599 // Failed to match; return -1.
1600 __ Bind(¬_found_label);
1601 __ movl(out, Immediate(-1));
1602
1603 // And join up at the end.
1604 __ Bind(&done);
1605 if (slow_path != nullptr) {
1606 __ Bind(slow_path->GetExitLabel());
1607 }
1608 }
1609
VisitStringIndexOf(HInvoke * invoke)1610 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1611 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
1612 }
1613
VisitStringIndexOf(HInvoke * invoke)1614 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1615 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1616 }
1617
VisitStringIndexOfAfter(HInvoke * invoke)1618 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1619 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
1620 }
1621
VisitStringIndexOfAfter(HInvoke * invoke)1622 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1623 GenerateStringIndexOf(
1624 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1625 }
1626
VisitStringNewStringFromBytes(HInvoke * invoke)1627 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1628 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1629 LocationSummary::kCall,
1630 kIntrinsified);
1631 InvokeRuntimeCallingConvention calling_convention;
1632 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1633 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1634 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1635 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1636 locations->SetOut(Location::RegisterLocation(RAX));
1637 }
1638
VisitStringNewStringFromBytes(HInvoke * invoke)1639 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1640 X86_64Assembler* assembler = GetAssembler();
1641 LocationSummary* locations = invoke->GetLocations();
1642
1643 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1644 __ testl(byte_array, byte_array);
1645 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1646 codegen_->AddSlowPath(slow_path);
1647 __ j(kEqual, slow_path->GetEntryLabel());
1648
1649 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes),
1650 /* no_rip */ true));
1651 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1652 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1653 __ Bind(slow_path->GetExitLabel());
1654 }
1655
VisitStringNewStringFromChars(HInvoke * invoke)1656 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1657 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1658 LocationSummary::kCall,
1659 kIntrinsified);
1660 InvokeRuntimeCallingConvention calling_convention;
1661 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1662 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1663 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1664 locations->SetOut(Location::RegisterLocation(RAX));
1665 }
1666
VisitStringNewStringFromChars(HInvoke * invoke)1667 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1668 X86_64Assembler* assembler = GetAssembler();
1669
1670 // No need to emit code checking whether `locations->InAt(2)` is a null
1671 // pointer, as callers of the native method
1672 //
1673 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1674 //
1675 // all include a null check on `data` before calling that method.
1676 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars),
1677 /* no_rip */ true));
1678 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1679 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1680 }
1681
VisitStringNewStringFromString(HInvoke * invoke)1682 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1683 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1684 LocationSummary::kCall,
1685 kIntrinsified);
1686 InvokeRuntimeCallingConvention calling_convention;
1687 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1688 locations->SetOut(Location::RegisterLocation(RAX));
1689 }
1690
VisitStringNewStringFromString(HInvoke * invoke)1691 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1692 X86_64Assembler* assembler = GetAssembler();
1693 LocationSummary* locations = invoke->GetLocations();
1694
1695 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1696 __ testl(string_to_copy, string_to_copy);
1697 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1698 codegen_->AddSlowPath(slow_path);
1699 __ j(kEqual, slow_path->GetEntryLabel());
1700
1701 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString),
1702 /* no_rip */ true));
1703 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1704 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1705 __ Bind(slow_path->GetExitLabel());
1706 }
1707
VisitStringGetCharsNoCheck(HInvoke * invoke)1708 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1709 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1710 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1711 LocationSummary::kNoCall,
1712 kIntrinsified);
1713 locations->SetInAt(0, Location::RequiresRegister());
1714 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1715 locations->SetInAt(2, Location::RequiresRegister());
1716 locations->SetInAt(3, Location::RequiresRegister());
1717 locations->SetInAt(4, Location::RequiresRegister());
1718
1719 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1720 locations->AddTemp(Location::RegisterLocation(RSI));
1721 locations->AddTemp(Location::RegisterLocation(RDI));
1722 locations->AddTemp(Location::RegisterLocation(RCX));
1723 }
1724
VisitStringGetCharsNoCheck(HInvoke * invoke)1725 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1726 X86_64Assembler* assembler = GetAssembler();
1727 LocationSummary* locations = invoke->GetLocations();
1728
1729 size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
1730 // Location of data in char array buffer.
1731 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1732 // Location of char array data in string.
1733 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1734
1735 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1736 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1737 Location srcBegin = locations->InAt(1);
1738 int srcBegin_value =
1739 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1740 CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1741 CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1742 CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1743
1744 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1745 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1746 DCHECK_EQ(char_size, 2u);
1747
1748 // Compute the address of the destination buffer.
1749 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1750
1751 // Compute the address of the source string.
1752 if (srcBegin.IsConstant()) {
1753 // Compute the address of the source string by adding the number of chars from
1754 // the source beginning to the value offset of a string.
1755 __ leaq(CpuRegister(RSI), Address(obj, srcBegin_value * char_size + value_offset));
1756 } else {
1757 __ leaq(CpuRegister(RSI), Address(obj, srcBegin.AsRegister<CpuRegister>(),
1758 ScaleFactor::TIMES_2, value_offset));
1759 }
1760
1761 // Compute the number of chars (words) to move.
1762 __ movl(CpuRegister(RCX), srcEnd);
1763 if (srcBegin.IsConstant()) {
1764 if (srcBegin_value != 0) {
1765 __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1766 }
1767 } else {
1768 DCHECK(srcBegin.IsRegister());
1769 __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1770 }
1771
1772 // Do the move.
1773 __ rep_movsw();
1774 }
1775
GenPeek(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)1776 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1777 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1778 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1779 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1780 // to avoid a SIGBUS.
1781 switch (size) {
1782 case Primitive::kPrimByte:
1783 __ movsxb(out, Address(address, 0));
1784 break;
1785 case Primitive::kPrimShort:
1786 __ movsxw(out, Address(address, 0));
1787 break;
1788 case Primitive::kPrimInt:
1789 __ movl(out, Address(address, 0));
1790 break;
1791 case Primitive::kPrimLong:
1792 __ movq(out, Address(address, 0));
1793 break;
1794 default:
1795 LOG(FATAL) << "Type not recognized for peek: " << size;
1796 UNREACHABLE();
1797 }
1798 }
1799
VisitMemoryPeekByte(HInvoke * invoke)1800 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1801 CreateIntToIntLocations(arena_, invoke);
1802 }
1803
VisitMemoryPeekByte(HInvoke * invoke)1804 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1805 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1806 }
1807
VisitMemoryPeekIntNative(HInvoke * invoke)1808 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1809 CreateIntToIntLocations(arena_, invoke);
1810 }
1811
VisitMemoryPeekIntNative(HInvoke * invoke)1812 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1813 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1814 }
1815
VisitMemoryPeekLongNative(HInvoke * invoke)1816 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1817 CreateIntToIntLocations(arena_, invoke);
1818 }
1819
VisitMemoryPeekLongNative(HInvoke * invoke)1820 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1821 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1822 }
1823
VisitMemoryPeekShortNative(HInvoke * invoke)1824 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1825 CreateIntToIntLocations(arena_, invoke);
1826 }
1827
VisitMemoryPeekShortNative(HInvoke * invoke)1828 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1829 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1830 }
1831
CreateIntIntToVoidLocations(ArenaAllocator * arena,HInvoke * invoke)1832 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1833 LocationSummary* locations = new (arena) LocationSummary(invoke,
1834 LocationSummary::kNoCall,
1835 kIntrinsified);
1836 locations->SetInAt(0, Location::RequiresRegister());
1837 locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
1838 }
1839
GenPoke(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)1840 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1841 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1842 Location value = locations->InAt(1);
1843 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1844 // to avoid a SIGBUS.
1845 switch (size) {
1846 case Primitive::kPrimByte:
1847 if (value.IsConstant()) {
1848 __ movb(Address(address, 0),
1849 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1850 } else {
1851 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1852 }
1853 break;
1854 case Primitive::kPrimShort:
1855 if (value.IsConstant()) {
1856 __ movw(Address(address, 0),
1857 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1858 } else {
1859 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1860 }
1861 break;
1862 case Primitive::kPrimInt:
1863 if (value.IsConstant()) {
1864 __ movl(Address(address, 0),
1865 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1866 } else {
1867 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1868 }
1869 break;
1870 case Primitive::kPrimLong:
1871 if (value.IsConstant()) {
1872 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1873 DCHECK(IsInt<32>(v));
1874 int32_t v_32 = v;
1875 __ movq(Address(address, 0), Immediate(v_32));
1876 } else {
1877 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1878 }
1879 break;
1880 default:
1881 LOG(FATAL) << "Type not recognized for poke: " << size;
1882 UNREACHABLE();
1883 }
1884 }
1885
VisitMemoryPokeByte(HInvoke * invoke)1886 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1887 CreateIntIntToVoidLocations(arena_, invoke);
1888 }
1889
VisitMemoryPokeByte(HInvoke * invoke)1890 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1891 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1892 }
1893
VisitMemoryPokeIntNative(HInvoke * invoke)1894 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1895 CreateIntIntToVoidLocations(arena_, invoke);
1896 }
1897
VisitMemoryPokeIntNative(HInvoke * invoke)1898 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1899 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1900 }
1901
VisitMemoryPokeLongNative(HInvoke * invoke)1902 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1903 CreateIntIntToVoidLocations(arena_, invoke);
1904 }
1905
VisitMemoryPokeLongNative(HInvoke * invoke)1906 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1907 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1908 }
1909
VisitMemoryPokeShortNative(HInvoke * invoke)1910 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1911 CreateIntIntToVoidLocations(arena_, invoke);
1912 }
1913
VisitMemoryPokeShortNative(HInvoke * invoke)1914 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1915 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1916 }
1917
VisitThreadCurrentThread(HInvoke * invoke)1918 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1919 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1920 LocationSummary::kNoCall,
1921 kIntrinsified);
1922 locations->SetOut(Location::RequiresRegister());
1923 }
1924
VisitThreadCurrentThread(HInvoke * invoke)1925 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1926 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1927 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(),
1928 /* no_rip */ true));
1929 }
1930
GenUnsafeGet(HInvoke * invoke,Primitive::Type type,bool is_volatile ATTRIBUTE_UNUSED,CodeGeneratorX86_64 * codegen)1931 static void GenUnsafeGet(HInvoke* invoke,
1932 Primitive::Type type,
1933 bool is_volatile ATTRIBUTE_UNUSED,
1934 CodeGeneratorX86_64* codegen) {
1935 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1936 LocationSummary* locations = invoke->GetLocations();
1937 Location base_loc = locations->InAt(1);
1938 CpuRegister base = base_loc.AsRegister<CpuRegister>();
1939 Location offset_loc = locations->InAt(2);
1940 CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1941 Location output_loc = locations->Out();
1942 CpuRegister output = output_loc.AsRegister<CpuRegister>();
1943
1944 switch (type) {
1945 case Primitive::kPrimInt:
1946 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1947 break;
1948
1949 case Primitive::kPrimNot: {
1950 if (kEmitCompilerReadBarrier) {
1951 if (kUseBakerReadBarrier) {
1952 Location temp = locations->GetTemp(0);
1953 codegen->GenerateArrayLoadWithBakerReadBarrier(
1954 invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
1955 } else {
1956 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1957 codegen->GenerateReadBarrierSlow(
1958 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1959 }
1960 } else {
1961 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1962 __ MaybeUnpoisonHeapReference(output);
1963 }
1964 break;
1965 }
1966
1967 case Primitive::kPrimLong:
1968 __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1969 break;
1970
1971 default:
1972 LOG(FATAL) << "Unsupported op size " << type;
1973 UNREACHABLE();
1974 }
1975 }
1976
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type)1977 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
1978 HInvoke* invoke,
1979 Primitive::Type type) {
1980 bool can_call = kEmitCompilerReadBarrier &&
1981 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1982 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1983 LocationSummary* locations = new (arena) LocationSummary(invoke,
1984 can_call ?
1985 LocationSummary::kCallOnSlowPath :
1986 LocationSummary::kNoCall,
1987 kIntrinsified);
1988 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1989 locations->SetInAt(1, Location::RequiresRegister());
1990 locations->SetInAt(2, Location::RequiresRegister());
1991 locations->SetOut(Location::RequiresRegister());
1992 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1993 // We need a temporary register for the read barrier marking slow
1994 // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
1995 locations->AddTemp(Location::RequiresRegister());
1996 }
1997 }
1998
VisitUnsafeGet(HInvoke * invoke)1999 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
2000 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
2001 }
VisitUnsafeGetVolatile(HInvoke * invoke)2002 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2003 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
2004 }
VisitUnsafeGetLong(HInvoke * invoke)2005 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2006 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
2007 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2008 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2009 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
2010 }
VisitUnsafeGetObject(HInvoke * invoke)2011 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2012 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
2013 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2014 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2015 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
2016 }
2017
2018
VisitUnsafeGet(HInvoke * invoke)2019 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
2020 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2021 }
VisitUnsafeGetVolatile(HInvoke * invoke)2022 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2023 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
2024 }
VisitUnsafeGetLong(HInvoke * invoke)2025 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2026 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2027 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2028 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2029 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
2030 }
VisitUnsafeGetObject(HInvoke * invoke)2031 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2032 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2033 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2034 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2035 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
2036 }
2037
2038
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)2039 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
2040 Primitive::Type type,
2041 HInvoke* invoke) {
2042 LocationSummary* locations = new (arena) LocationSummary(invoke,
2043 LocationSummary::kNoCall,
2044 kIntrinsified);
2045 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2046 locations->SetInAt(1, Location::RequiresRegister());
2047 locations->SetInAt(2, Location::RequiresRegister());
2048 locations->SetInAt(3, Location::RequiresRegister());
2049 if (type == Primitive::kPrimNot) {
2050 // Need temp registers for card-marking.
2051 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2052 locations->AddTemp(Location::RequiresRegister());
2053 }
2054 }
2055
VisitUnsafePut(HInvoke * invoke)2056 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
2057 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
2058 }
VisitUnsafePutOrdered(HInvoke * invoke)2059 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2060 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
2061 }
VisitUnsafePutVolatile(HInvoke * invoke)2062 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2063 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
2064 }
VisitUnsafePutObject(HInvoke * invoke)2065 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2066 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
2067 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2068 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2069 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
2070 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2071 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2072 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
2073 }
VisitUnsafePutLong(HInvoke * invoke)2074 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2075 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
2076 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2077 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2078 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
2079 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2080 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2081 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
2082 }
2083
2084 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2085 // memory model.
GenUnsafePut(LocationSummary * locations,Primitive::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2086 static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
2087 CodeGeneratorX86_64* codegen) {
2088 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2089 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2090 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2091 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2092
2093 if (type == Primitive::kPrimLong) {
2094 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2095 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
2096 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2097 __ movl(temp, value);
2098 __ PoisonHeapReference(temp);
2099 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2100 } else {
2101 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2102 }
2103
2104 if (is_volatile) {
2105 codegen->MemoryFence();
2106 }
2107
2108 if (type == Primitive::kPrimNot) {
2109 bool value_can_be_null = true; // TODO: Worth finding out this information?
2110 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2111 locations->GetTemp(1).AsRegister<CpuRegister>(),
2112 base,
2113 value,
2114 value_can_be_null);
2115 }
2116 }
2117
VisitUnsafePut(HInvoke * invoke)2118 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
2119 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2120 }
VisitUnsafePutOrdered(HInvoke * invoke)2121 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
2122 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2123 }
VisitUnsafePutVolatile(HInvoke * invoke)2124 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2125 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
2126 }
VisitUnsafePutObject(HInvoke * invoke)2127 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2128 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2129 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2130 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2131 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2132 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2133 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2134 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
2135 }
VisitUnsafePutLong(HInvoke * invoke)2136 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2137 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2138 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2139 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2140 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2141 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2142 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2143 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
2144 }
2145
CreateIntIntIntIntIntToInt(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)2146 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
2147 HInvoke* invoke) {
2148 LocationSummary* locations = new (arena) LocationSummary(invoke,
2149 LocationSummary::kNoCall,
2150 kIntrinsified);
2151 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2152 locations->SetInAt(1, Location::RequiresRegister());
2153 locations->SetInAt(2, Location::RequiresRegister());
2154 // expected value must be in EAX/RAX.
2155 locations->SetInAt(3, Location::RegisterLocation(RAX));
2156 locations->SetInAt(4, Location::RequiresRegister());
2157
2158 locations->SetOut(Location::RequiresRegister());
2159 if (type == Primitive::kPrimNot) {
2160 // Need temp registers for card-marking.
2161 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2162 locations->AddTemp(Location::RequiresRegister());
2163 }
2164 }
2165
VisitUnsafeCASInt(HInvoke * invoke)2166 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2167 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
2168 }
2169
VisitUnsafeCASLong(HInvoke * invoke)2170 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2171 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
2172 }
2173
VisitUnsafeCASObject(HInvoke * invoke)2174 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2175 // The UnsafeCASObject intrinsic is missing a read barrier, and
2176 // therefore sometimes does not work as expected (b/25883050).
2177 // Turn it off temporarily as a quick fix, until the read barrier is
2178 // implemented.
2179 //
2180 // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
2181 // this intrinsic.
2182 if (kEmitCompilerReadBarrier) {
2183 return;
2184 }
2185
2186 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
2187 }
2188
GenCAS(Primitive::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2189 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2190 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2191 LocationSummary* locations = invoke->GetLocations();
2192
2193 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2194 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2195 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
2196 // Ensure `expected` is in RAX (required by the CMPXCHG instruction).
2197 DCHECK_EQ(expected.AsRegister(), RAX);
2198 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
2199 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2200
2201 if (type == Primitive::kPrimNot) {
2202 // Mark card for object assuming new value is stored.
2203 bool value_can_be_null = true; // TODO: Worth finding out this information?
2204 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2205 locations->GetTemp(1).AsRegister<CpuRegister>(),
2206 base,
2207 value,
2208 value_can_be_null);
2209
2210 bool base_equals_value = (base.AsRegister() == value.AsRegister());
2211 Register value_reg = value.AsRegister();
2212 if (kPoisonHeapReferences) {
2213 if (base_equals_value) {
2214 // If `base` and `value` are the same register location, move
2215 // `value_reg` to a temporary register. This way, poisoning
2216 // `value_reg` won't invalidate `base`.
2217 value_reg = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
2218 __ movl(CpuRegister(value_reg), base);
2219 }
2220
2221 // Check that the register allocator did not assign the location
2222 // of `expected` (RAX) to `value` nor to `base`, so that heap
2223 // poisoning (when enabled) works as intended below.
2224 // - If `value` were equal to `expected`, both references would
2225 // be poisoned twice, meaning they would not be poisoned at
2226 // all, as heap poisoning uses address negation.
2227 // - If `base` were equal to `expected`, poisoning `expected`
2228 // would invalidate `base`.
2229 DCHECK_NE(value_reg, expected.AsRegister());
2230 DCHECK_NE(base.AsRegister(), expected.AsRegister());
2231
2232 __ PoisonHeapReference(expected);
2233 __ PoisonHeapReference(CpuRegister(value_reg));
2234 }
2235
2236 // TODO: Add a read barrier for the reference stored in the object
2237 // before attempting the CAS, similar to the one in the
2238 // art::Unsafe_compareAndSwapObject JNI implementation.
2239 //
2240 // Note that this code is not (yet) used when read barriers are
2241 // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject).
2242 DCHECK(!kEmitCompilerReadBarrier);
2243 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
2244
2245 // LOCK CMPXCHG has full barrier semantics, and we don't need
2246 // scheduling barriers at this time.
2247
2248 // Convert ZF into the boolean result.
2249 __ setcc(kZero, out);
2250 __ movzxb(out, out);
2251
2252 // If heap poisoning is enabled, we need to unpoison the values
2253 // that were poisoned earlier.
2254 if (kPoisonHeapReferences) {
2255 if (base_equals_value) {
2256 // `value_reg` has been moved to a temporary register, no need
2257 // to unpoison it.
2258 } else {
2259 // Ensure `value` is different from `out`, so that unpoisoning
2260 // the former does not invalidate the latter.
2261 DCHECK_NE(value_reg, out.AsRegister());
2262 __ UnpoisonHeapReference(CpuRegister(value_reg));
2263 }
2264 // Ensure `expected` is different from `out`, so that unpoisoning
2265 // the former does not invalidate the latter.
2266 DCHECK_NE(expected.AsRegister(), out.AsRegister());
2267 __ UnpoisonHeapReference(expected);
2268 }
2269 } else {
2270 if (type == Primitive::kPrimInt) {
2271 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
2272 } else if (type == Primitive::kPrimLong) {
2273 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
2274 } else {
2275 LOG(FATAL) << "Unexpected CAS type " << type;
2276 }
2277
2278 // LOCK CMPXCHG has full barrier semantics, and we don't need
2279 // scheduling barriers at this time.
2280
2281 // Convert ZF into the boolean result.
2282 __ setcc(kZero, out);
2283 __ movzxb(out, out);
2284 }
2285 }
2286
VisitUnsafeCASInt(HInvoke * invoke)2287 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2288 GenCAS(Primitive::kPrimInt, invoke, codegen_);
2289 }
2290
VisitUnsafeCASLong(HInvoke * invoke)2291 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2292 GenCAS(Primitive::kPrimLong, invoke, codegen_);
2293 }
2294
VisitUnsafeCASObject(HInvoke * invoke)2295 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2296 GenCAS(Primitive::kPrimNot, invoke, codegen_);
2297 }
2298
VisitIntegerReverse(HInvoke * invoke)2299 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2300 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2301 LocationSummary::kNoCall,
2302 kIntrinsified);
2303 locations->SetInAt(0, Location::RequiresRegister());
2304 locations->SetOut(Location::SameAsFirstInput());
2305 locations->AddTemp(Location::RequiresRegister());
2306 }
2307
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2308 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2309 X86_64Assembler* assembler) {
2310 Immediate imm_shift(shift);
2311 Immediate imm_mask(mask);
2312 __ movl(temp, reg);
2313 __ shrl(reg, imm_shift);
2314 __ andl(temp, imm_mask);
2315 __ andl(reg, imm_mask);
2316 __ shll(temp, imm_shift);
2317 __ orl(reg, temp);
2318 }
2319
VisitIntegerReverse(HInvoke * invoke)2320 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2321 X86_64Assembler* assembler = GetAssembler();
2322 LocationSummary* locations = invoke->GetLocations();
2323
2324 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2325 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2326
2327 /*
2328 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2329 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2330 * compared to generic luni implementation which has 5 rounds of swapping bits.
2331 * x = bswap x
2332 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2333 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2334 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2335 */
2336 __ bswapl(reg);
2337 SwapBits(reg, temp, 1, 0x55555555, assembler);
2338 SwapBits(reg, temp, 2, 0x33333333, assembler);
2339 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2340 }
2341
VisitLongReverse(HInvoke * invoke)2342 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2343 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2344 LocationSummary::kNoCall,
2345 kIntrinsified);
2346 locations->SetInAt(0, Location::RequiresRegister());
2347 locations->SetOut(Location::SameAsFirstInput());
2348 locations->AddTemp(Location::RequiresRegister());
2349 locations->AddTemp(Location::RequiresRegister());
2350 }
2351
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2352 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2353 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2354 Immediate imm_shift(shift);
2355 __ movq(temp_mask, Immediate(mask));
2356 __ movq(temp, reg);
2357 __ shrq(reg, imm_shift);
2358 __ andq(temp, temp_mask);
2359 __ andq(reg, temp_mask);
2360 __ shlq(temp, imm_shift);
2361 __ orq(reg, temp);
2362 }
2363
VisitLongReverse(HInvoke * invoke)2364 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2365 X86_64Assembler* assembler = GetAssembler();
2366 LocationSummary* locations = invoke->GetLocations();
2367
2368 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2369 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2370 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2371
2372 /*
2373 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2374 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2375 * compared to generic luni implementation which has 5 rounds of swapping bits.
2376 * x = bswap x
2377 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2378 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2379 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2380 */
2381 __ bswapq(reg);
2382 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2383 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2384 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
2385 }
2386
CreateBitCountLocations(ArenaAllocator * arena,CodeGeneratorX86_64 * codegen,HInvoke * invoke)2387 static void CreateBitCountLocations(
2388 ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
2389 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2390 // Do nothing if there is no popcnt support. This results in generating
2391 // a call for the intrinsic rather than direct code.
2392 return;
2393 }
2394 LocationSummary* locations = new (arena) LocationSummary(invoke,
2395 LocationSummary::kNoCall,
2396 kIntrinsified);
2397 locations->SetInAt(0, Location::Any());
2398 locations->SetOut(Location::RequiresRegister());
2399 }
2400
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2401 static void GenBitCount(X86_64Assembler* assembler,
2402 CodeGeneratorX86_64* codegen,
2403 HInvoke* invoke,
2404 bool is_long) {
2405 LocationSummary* locations = invoke->GetLocations();
2406 Location src = locations->InAt(0);
2407 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2408
2409 if (invoke->InputAt(0)->IsConstant()) {
2410 // Evaluate this at compile time.
2411 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2412 int32_t result = is_long
2413 ? POPCOUNT(static_cast<uint64_t>(value))
2414 : POPCOUNT(static_cast<uint32_t>(value));
2415 codegen->Load32BitValue(out, result);
2416 return;
2417 }
2418
2419 if (src.IsRegister()) {
2420 if (is_long) {
2421 __ popcntq(out, src.AsRegister<CpuRegister>());
2422 } else {
2423 __ popcntl(out, src.AsRegister<CpuRegister>());
2424 }
2425 } else if (is_long) {
2426 DCHECK(src.IsDoubleStackSlot());
2427 __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2428 } else {
2429 DCHECK(src.IsStackSlot());
2430 __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2431 }
2432 }
2433
VisitIntegerBitCount(HInvoke * invoke)2434 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2435 CreateBitCountLocations(arena_, codegen_, invoke);
2436 }
2437
VisitIntegerBitCount(HInvoke * invoke)2438 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2439 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
2440 }
2441
VisitLongBitCount(HInvoke * invoke)2442 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
2443 CreateBitCountLocations(arena_, codegen_, invoke);
2444 }
2445
VisitLongBitCount(HInvoke * invoke)2446 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
2447 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
2448 }
2449
CreateOneBitLocations(ArenaAllocator * arena,HInvoke * invoke,bool is_high)2450 static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) {
2451 LocationSummary* locations = new (arena) LocationSummary(invoke,
2452 LocationSummary::kNoCall,
2453 kIntrinsified);
2454 locations->SetInAt(0, Location::Any());
2455 locations->SetOut(Location::RequiresRegister());
2456 locations->AddTemp(is_high ? Location::RegisterLocation(RCX) // needs CL
2457 : Location::RequiresRegister()); // any will do
2458 }
2459
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)2460 static void GenOneBit(X86_64Assembler* assembler,
2461 CodeGeneratorX86_64* codegen,
2462 HInvoke* invoke,
2463 bool is_high, bool is_long) {
2464 LocationSummary* locations = invoke->GetLocations();
2465 Location src = locations->InAt(0);
2466 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2467
2468 if (invoke->InputAt(0)->IsConstant()) {
2469 // Evaluate this at compile time.
2470 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2471 if (value == 0) {
2472 __ xorl(out, out); // Clears upper bits too.
2473 return;
2474 }
2475 // Nonzero value.
2476 if (is_high) {
2477 value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
2478 : 31 - CLZ(static_cast<uint32_t>(value));
2479 } else {
2480 value = is_long ? CTZ(static_cast<uint64_t>(value))
2481 : CTZ(static_cast<uint32_t>(value));
2482 }
2483 if (is_long) {
2484 codegen->Load64BitValue(out, 1L << value);
2485 } else {
2486 codegen->Load32BitValue(out, 1 << value);
2487 }
2488 return;
2489 }
2490
2491 // Handle the non-constant cases.
2492 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
2493 if (is_high) {
2494 // Use architectural support: basically 1 << bsr.
2495 if (src.IsRegister()) {
2496 if (is_long) {
2497 __ bsrq(tmp, src.AsRegister<CpuRegister>());
2498 } else {
2499 __ bsrl(tmp, src.AsRegister<CpuRegister>());
2500 }
2501 } else if (is_long) {
2502 DCHECK(src.IsDoubleStackSlot());
2503 __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2504 } else {
2505 DCHECK(src.IsStackSlot());
2506 __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2507 }
2508 // BSR sets ZF if the input was zero.
2509 NearLabel is_zero, done;
2510 __ j(kEqual, &is_zero);
2511 __ movl(out, Immediate(1)); // Clears upper bits too.
2512 if (is_long) {
2513 __ shlq(out, tmp);
2514 } else {
2515 __ shll(out, tmp);
2516 }
2517 __ jmp(&done);
2518 __ Bind(&is_zero);
2519 __ xorl(out, out); // Clears upper bits too.
2520 __ Bind(&done);
2521 } else {
2522 // Copy input into temporary.
2523 if (src.IsRegister()) {
2524 if (is_long) {
2525 __ movq(tmp, src.AsRegister<CpuRegister>());
2526 } else {
2527 __ movl(tmp, src.AsRegister<CpuRegister>());
2528 }
2529 } else if (is_long) {
2530 DCHECK(src.IsDoubleStackSlot());
2531 __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2532 } else {
2533 DCHECK(src.IsStackSlot());
2534 __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2535 }
2536 // Do the bit twiddling: basically tmp & -tmp;
2537 if (is_long) {
2538 __ movq(out, tmp);
2539 __ negq(tmp);
2540 __ andq(out, tmp);
2541 } else {
2542 __ movl(out, tmp);
2543 __ negl(tmp);
2544 __ andl(out, tmp);
2545 }
2546 }
2547 }
2548
VisitIntegerHighestOneBit(HInvoke * invoke)2549 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2550 CreateOneBitLocations(arena_, invoke, /* is_high */ true);
2551 }
2552
VisitIntegerHighestOneBit(HInvoke * invoke)2553 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2554 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false);
2555 }
2556
VisitLongHighestOneBit(HInvoke * invoke)2557 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2558 CreateOneBitLocations(arena_, invoke, /* is_high */ true);
2559 }
2560
VisitLongHighestOneBit(HInvoke * invoke)2561 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2562 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true);
2563 }
2564
VisitIntegerLowestOneBit(HInvoke * invoke)2565 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2566 CreateOneBitLocations(arena_, invoke, /* is_high */ false);
2567 }
2568
VisitIntegerLowestOneBit(HInvoke * invoke)2569 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2570 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false);
2571 }
2572
VisitLongLowestOneBit(HInvoke * invoke)2573 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2574 CreateOneBitLocations(arena_, invoke, /* is_high */ false);
2575 }
2576
VisitLongLowestOneBit(HInvoke * invoke)2577 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2578 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true);
2579 }
2580
CreateLeadingZeroLocations(ArenaAllocator * arena,HInvoke * invoke)2581 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
2582 LocationSummary* locations = new (arena) LocationSummary(invoke,
2583 LocationSummary::kNoCall,
2584 kIntrinsified);
2585 locations->SetInAt(0, Location::Any());
2586 locations->SetOut(Location::RequiresRegister());
2587 }
2588
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2589 static void GenLeadingZeros(X86_64Assembler* assembler,
2590 CodeGeneratorX86_64* codegen,
2591 HInvoke* invoke, bool is_long) {
2592 LocationSummary* locations = invoke->GetLocations();
2593 Location src = locations->InAt(0);
2594 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2595
2596 int zero_value_result = is_long ? 64 : 32;
2597 if (invoke->InputAt(0)->IsConstant()) {
2598 // Evaluate this at compile time.
2599 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2600 if (value == 0) {
2601 value = zero_value_result;
2602 } else {
2603 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2604 }
2605 codegen->Load32BitValue(out, value);
2606 return;
2607 }
2608
2609 // Handle the non-constant cases.
2610 if (src.IsRegister()) {
2611 if (is_long) {
2612 __ bsrq(out, src.AsRegister<CpuRegister>());
2613 } else {
2614 __ bsrl(out, src.AsRegister<CpuRegister>());
2615 }
2616 } else if (is_long) {
2617 DCHECK(src.IsDoubleStackSlot());
2618 __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2619 } else {
2620 DCHECK(src.IsStackSlot());
2621 __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2622 }
2623
2624 // BSR sets ZF if the input was zero, and the output is undefined.
2625 NearLabel is_zero, done;
2626 __ j(kEqual, &is_zero);
2627
2628 // Correct the result from BSR to get the CLZ result.
2629 __ xorl(out, Immediate(zero_value_result - 1));
2630 __ jmp(&done);
2631
2632 // Fix the zero case with the expected result.
2633 __ Bind(&is_zero);
2634 __ movl(out, Immediate(zero_value_result));
2635
2636 __ Bind(&done);
2637 }
2638
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2639 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2640 CreateLeadingZeroLocations(arena_, invoke);
2641 }
2642
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2643 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2644 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2645 }
2646
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2647 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2648 CreateLeadingZeroLocations(arena_, invoke);
2649 }
2650
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2651 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2652 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2653 }
2654
CreateTrailingZeroLocations(ArenaAllocator * arena,HInvoke * invoke)2655 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
2656 LocationSummary* locations = new (arena) LocationSummary(invoke,
2657 LocationSummary::kNoCall,
2658 kIntrinsified);
2659 locations->SetInAt(0, Location::Any());
2660 locations->SetOut(Location::RequiresRegister());
2661 }
2662
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2663 static void GenTrailingZeros(X86_64Assembler* assembler,
2664 CodeGeneratorX86_64* codegen,
2665 HInvoke* invoke, bool is_long) {
2666 LocationSummary* locations = invoke->GetLocations();
2667 Location src = locations->InAt(0);
2668 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2669
2670 int zero_value_result = is_long ? 64 : 32;
2671 if (invoke->InputAt(0)->IsConstant()) {
2672 // Evaluate this at compile time.
2673 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2674 if (value == 0) {
2675 value = zero_value_result;
2676 } else {
2677 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2678 }
2679 codegen->Load32BitValue(out, value);
2680 return;
2681 }
2682
2683 // Handle the non-constant cases.
2684 if (src.IsRegister()) {
2685 if (is_long) {
2686 __ bsfq(out, src.AsRegister<CpuRegister>());
2687 } else {
2688 __ bsfl(out, src.AsRegister<CpuRegister>());
2689 }
2690 } else if (is_long) {
2691 DCHECK(src.IsDoubleStackSlot());
2692 __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2693 } else {
2694 DCHECK(src.IsStackSlot());
2695 __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2696 }
2697
2698 // BSF sets ZF if the input was zero, and the output is undefined.
2699 NearLabel done;
2700 __ j(kNotEqual, &done);
2701
2702 // Fix the zero case with the expected result.
2703 __ movl(out, Immediate(zero_value_result));
2704
2705 __ Bind(&done);
2706 }
2707
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2708 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2709 CreateTrailingZeroLocations(arena_, invoke);
2710 }
2711
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2712 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2713 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2714 }
2715
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2716 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2717 CreateTrailingZeroLocations(arena_, invoke);
2718 }
2719
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2720 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2721 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2722 }
2723
2724 UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent)
2725 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
2726 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
2727
2728 // 1.8.
2729 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
2730 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
2731 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
2732 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong)
2733 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject)
2734
2735 UNREACHABLE_INTRINSICS(X86_64)
2736
2737 #undef __
2738
2739 } // namespace x86_64
2740 } // namespace art
2741