1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm64.h"
18
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "base/bit_utils_iterator.h"
21 #include "mirror/array-inl.h"
22 #include "mirror/string.h"
23
24 using namespace vixl::aarch64; // NOLINT(build/namespaces)
25
26 namespace art {
27 namespace arm64 {
28
29 using helpers::DRegisterFrom;
30 using helpers::HeapOperand;
31 using helpers::InputRegisterAt;
32 using helpers::Int64FromLocation;
33 using helpers::LocationFrom;
34 using helpers::OutputRegister;
35 using helpers::QRegisterFrom;
36 using helpers::StackOperandFrom;
37 using helpers::VRegisterFrom;
38 using helpers::XRegisterFrom;
39
40 #define __ GetVIXLAssembler()->
41
42 // Returns whether the value of the constant can be directly encoded into the instruction as
43 // immediate.
NEONCanEncodeConstantAsImmediate(HConstant * constant,HInstruction * instr)44 inline bool NEONCanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
45 // TODO: Improve this when IsSIMDConstantEncodable method is implemented in VIXL.
46 if (instr->IsVecReplicateScalar()) {
47 if (constant->IsLongConstant()) {
48 return false;
49 } else if (constant->IsFloatConstant()) {
50 return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
51 } else if (constant->IsDoubleConstant()) {
52 return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
53 }
54 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
55 return IsUint<8>(value);
56 }
57 return false;
58 }
59
60 // Returns
61 // - constant location - if 'constant' is an actual constant and its value can be
62 // encoded into the instruction.
63 // - register location otherwise.
NEONEncodableConstantOrRegister(HInstruction * constant,HInstruction * instr)64 inline Location NEONEncodableConstantOrRegister(HInstruction* constant,
65 HInstruction* instr) {
66 if (constant->IsConstant()
67 && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
68 return Location::ConstantLocation(constant->AsConstant());
69 }
70
71 return Location::RequiresRegister();
72 }
73
74 // Returns whether dot product instructions should be emitted.
ShouldEmitDotProductInstructions(const CodeGeneratorARM64 * codegen_)75 static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
76 return codegen_->GetInstructionSetFeatures().HasDotProd();
77 }
78
VisitVecReplicateScalar(HVecReplicateScalar * instruction)79 void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
80 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
81 HInstruction* input = instruction->InputAt(0);
82 switch (instruction->GetPackedType()) {
83 case DataType::Type::kBool:
84 case DataType::Type::kUint8:
85 case DataType::Type::kInt8:
86 case DataType::Type::kUint16:
87 case DataType::Type::kInt16:
88 case DataType::Type::kInt32:
89 case DataType::Type::kInt64:
90 locations->SetInAt(0, NEONEncodableConstantOrRegister(input, instruction));
91 locations->SetOut(Location::RequiresFpuRegister());
92 break;
93 case DataType::Type::kFloat32:
94 case DataType::Type::kFloat64:
95 if (input->IsConstant() &&
96 NEONCanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
97 locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
98 locations->SetOut(Location::RequiresFpuRegister());
99 } else {
100 locations->SetInAt(0, Location::RequiresFpuRegister());
101 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
102 }
103 break;
104 default:
105 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
106 UNREACHABLE();
107 }
108 }
109
VisitVecReplicateScalar(HVecReplicateScalar * instruction)110 void InstructionCodeGeneratorARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
111 LocationSummary* locations = instruction->GetLocations();
112 Location src_loc = locations->InAt(0);
113 VRegister dst = VRegisterFrom(locations->Out());
114 switch (instruction->GetPackedType()) {
115 case DataType::Type::kBool:
116 case DataType::Type::kUint8:
117 case DataType::Type::kInt8:
118 DCHECK_EQ(16u, instruction->GetVectorLength());
119 if (src_loc.IsConstant()) {
120 __ Movi(dst.V16B(), Int64FromLocation(src_loc));
121 } else {
122 __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
123 }
124 break;
125 case DataType::Type::kUint16:
126 case DataType::Type::kInt16:
127 DCHECK_EQ(8u, instruction->GetVectorLength());
128 if (src_loc.IsConstant()) {
129 __ Movi(dst.V8H(), Int64FromLocation(src_loc));
130 } else {
131 __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
132 }
133 break;
134 case DataType::Type::kInt32:
135 DCHECK_EQ(4u, instruction->GetVectorLength());
136 if (src_loc.IsConstant()) {
137 __ Movi(dst.V4S(), Int64FromLocation(src_loc));
138 } else {
139 __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
140 }
141 break;
142 case DataType::Type::kInt64:
143 DCHECK_EQ(2u, instruction->GetVectorLength());
144 if (src_loc.IsConstant()) {
145 __ Movi(dst.V2D(), Int64FromLocation(src_loc));
146 } else {
147 __ Dup(dst.V2D(), XRegisterFrom(src_loc));
148 }
149 break;
150 case DataType::Type::kFloat32:
151 DCHECK_EQ(4u, instruction->GetVectorLength());
152 if (src_loc.IsConstant()) {
153 __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
154 } else {
155 __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
156 }
157 break;
158 case DataType::Type::kFloat64:
159 DCHECK_EQ(2u, instruction->GetVectorLength());
160 if (src_loc.IsConstant()) {
161 __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
162 } else {
163 __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
164 }
165 break;
166 default:
167 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
168 UNREACHABLE();
169 }
170 }
171
VisitVecExtractScalar(HVecExtractScalar * instruction)172 void LocationsBuilderARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
173 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
174 switch (instruction->GetPackedType()) {
175 case DataType::Type::kBool:
176 case DataType::Type::kUint8:
177 case DataType::Type::kInt8:
178 case DataType::Type::kUint16:
179 case DataType::Type::kInt16:
180 case DataType::Type::kInt32:
181 case DataType::Type::kInt64:
182 locations->SetInAt(0, Location::RequiresFpuRegister());
183 locations->SetOut(Location::RequiresRegister());
184 break;
185 case DataType::Type::kFloat32:
186 case DataType::Type::kFloat64:
187 locations->SetInAt(0, Location::RequiresFpuRegister());
188 locations->SetOut(Location::SameAsFirstInput());
189 break;
190 default:
191 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
192 UNREACHABLE();
193 }
194 }
195
VisitVecExtractScalar(HVecExtractScalar * instruction)196 void InstructionCodeGeneratorARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
197 LocationSummary* locations = instruction->GetLocations();
198 VRegister src = VRegisterFrom(locations->InAt(0));
199 switch (instruction->GetPackedType()) {
200 case DataType::Type::kInt32:
201 DCHECK_EQ(4u, instruction->GetVectorLength());
202 __ Umov(OutputRegister(instruction), src.V4S(), 0);
203 break;
204 case DataType::Type::kInt64:
205 DCHECK_EQ(2u, instruction->GetVectorLength());
206 __ Umov(OutputRegister(instruction), src.V2D(), 0);
207 break;
208 case DataType::Type::kFloat32:
209 case DataType::Type::kFloat64:
210 DCHECK_LE(2u, instruction->GetVectorLength());
211 DCHECK_LE(instruction->GetVectorLength(), 4u);
212 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
213 break;
214 default:
215 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
216 UNREACHABLE();
217 }
218 }
219
220 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)221 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
222 LocationSummary* locations = new (allocator) LocationSummary(instruction);
223 switch (instruction->GetPackedType()) {
224 case DataType::Type::kBool:
225 locations->SetInAt(0, Location::RequiresFpuRegister());
226 locations->SetOut(Location::RequiresFpuRegister(),
227 instruction->IsVecNot() ? Location::kOutputOverlap
228 : Location::kNoOutputOverlap);
229 break;
230 case DataType::Type::kUint8:
231 case DataType::Type::kInt8:
232 case DataType::Type::kUint16:
233 case DataType::Type::kInt16:
234 case DataType::Type::kInt32:
235 case DataType::Type::kInt64:
236 case DataType::Type::kFloat32:
237 case DataType::Type::kFloat64:
238 locations->SetInAt(0, Location::RequiresFpuRegister());
239 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
240 break;
241 default:
242 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
243 UNREACHABLE();
244 }
245 }
246
VisitVecReduce(HVecReduce * instruction)247 void LocationsBuilderARM64Neon::VisitVecReduce(HVecReduce* instruction) {
248 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
249 }
250
VisitVecReduce(HVecReduce * instruction)251 void InstructionCodeGeneratorARM64Neon::VisitVecReduce(HVecReduce* instruction) {
252 LocationSummary* locations = instruction->GetLocations();
253 VRegister src = VRegisterFrom(locations->InAt(0));
254 VRegister dst = DRegisterFrom(locations->Out());
255 switch (instruction->GetPackedType()) {
256 case DataType::Type::kInt32:
257 DCHECK_EQ(4u, instruction->GetVectorLength());
258 switch (instruction->GetReductionKind()) {
259 case HVecReduce::kSum:
260 __ Addv(dst.S(), src.V4S());
261 break;
262 case HVecReduce::kMin:
263 __ Sminv(dst.S(), src.V4S());
264 break;
265 case HVecReduce::kMax:
266 __ Smaxv(dst.S(), src.V4S());
267 break;
268 }
269 break;
270 case DataType::Type::kInt64:
271 DCHECK_EQ(2u, instruction->GetVectorLength());
272 switch (instruction->GetReductionKind()) {
273 case HVecReduce::kSum:
274 __ Addp(dst.D(), src.V2D());
275 break;
276 default:
277 LOG(FATAL) << "Unsupported SIMD min/max";
278 UNREACHABLE();
279 }
280 break;
281 default:
282 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
283 UNREACHABLE();
284 }
285 }
286
VisitVecCnv(HVecCnv * instruction)287 void LocationsBuilderARM64Neon::VisitVecCnv(HVecCnv* instruction) {
288 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
289 }
290
VisitVecCnv(HVecCnv * instruction)291 void InstructionCodeGeneratorARM64Neon::VisitVecCnv(HVecCnv* instruction) {
292 LocationSummary* locations = instruction->GetLocations();
293 VRegister src = VRegisterFrom(locations->InAt(0));
294 VRegister dst = VRegisterFrom(locations->Out());
295 DataType::Type from = instruction->GetInputType();
296 DataType::Type to = instruction->GetResultType();
297 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
298 DCHECK_EQ(4u, instruction->GetVectorLength());
299 __ Scvtf(dst.V4S(), src.V4S());
300 } else {
301 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
302 }
303 }
304
VisitVecNeg(HVecNeg * instruction)305 void LocationsBuilderARM64Neon::VisitVecNeg(HVecNeg* instruction) {
306 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
307 }
308
VisitVecNeg(HVecNeg * instruction)309 void InstructionCodeGeneratorARM64Neon::VisitVecNeg(HVecNeg* instruction) {
310 LocationSummary* locations = instruction->GetLocations();
311 VRegister src = VRegisterFrom(locations->InAt(0));
312 VRegister dst = VRegisterFrom(locations->Out());
313 switch (instruction->GetPackedType()) {
314 case DataType::Type::kUint8:
315 case DataType::Type::kInt8:
316 DCHECK_EQ(16u, instruction->GetVectorLength());
317 __ Neg(dst.V16B(), src.V16B());
318 break;
319 case DataType::Type::kUint16:
320 case DataType::Type::kInt16:
321 DCHECK_EQ(8u, instruction->GetVectorLength());
322 __ Neg(dst.V8H(), src.V8H());
323 break;
324 case DataType::Type::kInt32:
325 DCHECK_EQ(4u, instruction->GetVectorLength());
326 __ Neg(dst.V4S(), src.V4S());
327 break;
328 case DataType::Type::kInt64:
329 DCHECK_EQ(2u, instruction->GetVectorLength());
330 __ Neg(dst.V2D(), src.V2D());
331 break;
332 case DataType::Type::kFloat32:
333 DCHECK_EQ(4u, instruction->GetVectorLength());
334 __ Fneg(dst.V4S(), src.V4S());
335 break;
336 case DataType::Type::kFloat64:
337 DCHECK_EQ(2u, instruction->GetVectorLength());
338 __ Fneg(dst.V2D(), src.V2D());
339 break;
340 default:
341 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
342 UNREACHABLE();
343 }
344 }
345
VisitVecAbs(HVecAbs * instruction)346 void LocationsBuilderARM64Neon::VisitVecAbs(HVecAbs* instruction) {
347 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
348 }
349
VisitVecAbs(HVecAbs * instruction)350 void InstructionCodeGeneratorARM64Neon::VisitVecAbs(HVecAbs* instruction) {
351 LocationSummary* locations = instruction->GetLocations();
352 VRegister src = VRegisterFrom(locations->InAt(0));
353 VRegister dst = VRegisterFrom(locations->Out());
354 switch (instruction->GetPackedType()) {
355 case DataType::Type::kInt8:
356 DCHECK_EQ(16u, instruction->GetVectorLength());
357 __ Abs(dst.V16B(), src.V16B());
358 break;
359 case DataType::Type::kInt16:
360 DCHECK_EQ(8u, instruction->GetVectorLength());
361 __ Abs(dst.V8H(), src.V8H());
362 break;
363 case DataType::Type::kInt32:
364 DCHECK_EQ(4u, instruction->GetVectorLength());
365 __ Abs(dst.V4S(), src.V4S());
366 break;
367 case DataType::Type::kInt64:
368 DCHECK_EQ(2u, instruction->GetVectorLength());
369 __ Abs(dst.V2D(), src.V2D());
370 break;
371 case DataType::Type::kFloat32:
372 DCHECK_EQ(4u, instruction->GetVectorLength());
373 __ Fabs(dst.V4S(), src.V4S());
374 break;
375 case DataType::Type::kFloat64:
376 DCHECK_EQ(2u, instruction->GetVectorLength());
377 __ Fabs(dst.V2D(), src.V2D());
378 break;
379 default:
380 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
381 UNREACHABLE();
382 }
383 }
384
VisitVecNot(HVecNot * instruction)385 void LocationsBuilderARM64Neon::VisitVecNot(HVecNot* instruction) {
386 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
387 }
388
VisitVecNot(HVecNot * instruction)389 void InstructionCodeGeneratorARM64Neon::VisitVecNot(HVecNot* instruction) {
390 LocationSummary* locations = instruction->GetLocations();
391 VRegister src = VRegisterFrom(locations->InAt(0));
392 VRegister dst = VRegisterFrom(locations->Out());
393 switch (instruction->GetPackedType()) {
394 case DataType::Type::kBool: // special case boolean-not
395 DCHECK_EQ(16u, instruction->GetVectorLength());
396 __ Movi(dst.V16B(), 1);
397 __ Eor(dst.V16B(), dst.V16B(), src.V16B());
398 break;
399 case DataType::Type::kUint8:
400 case DataType::Type::kInt8:
401 case DataType::Type::kUint16:
402 case DataType::Type::kInt16:
403 case DataType::Type::kInt32:
404 case DataType::Type::kInt64:
405 __ Not(dst.V16B(), src.V16B()); // lanes do not matter
406 break;
407 default:
408 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
409 UNREACHABLE();
410 }
411 }
412
413 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)414 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
415 LocationSummary* locations = new (allocator) LocationSummary(instruction);
416 switch (instruction->GetPackedType()) {
417 case DataType::Type::kBool:
418 case DataType::Type::kUint8:
419 case DataType::Type::kInt8:
420 case DataType::Type::kUint16:
421 case DataType::Type::kInt16:
422 case DataType::Type::kInt32:
423 case DataType::Type::kInt64:
424 case DataType::Type::kFloat32:
425 case DataType::Type::kFloat64:
426 locations->SetInAt(0, Location::RequiresFpuRegister());
427 locations->SetInAt(1, Location::RequiresFpuRegister());
428 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
429 break;
430 default:
431 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
432 UNREACHABLE();
433 }
434 }
435
VisitVecAdd(HVecAdd * instruction)436 void LocationsBuilderARM64Neon::VisitVecAdd(HVecAdd* instruction) {
437 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
438 }
439
VisitVecAdd(HVecAdd * instruction)440 void InstructionCodeGeneratorARM64Neon::VisitVecAdd(HVecAdd* instruction) {
441 LocationSummary* locations = instruction->GetLocations();
442 VRegister lhs = VRegisterFrom(locations->InAt(0));
443 VRegister rhs = VRegisterFrom(locations->InAt(1));
444 VRegister dst = VRegisterFrom(locations->Out());
445 switch (instruction->GetPackedType()) {
446 case DataType::Type::kUint8:
447 case DataType::Type::kInt8:
448 DCHECK_EQ(16u, instruction->GetVectorLength());
449 __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
450 break;
451 case DataType::Type::kUint16:
452 case DataType::Type::kInt16:
453 DCHECK_EQ(8u, instruction->GetVectorLength());
454 __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
455 break;
456 case DataType::Type::kInt32:
457 DCHECK_EQ(4u, instruction->GetVectorLength());
458 __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
459 break;
460 case DataType::Type::kInt64:
461 DCHECK_EQ(2u, instruction->GetVectorLength());
462 __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
463 break;
464 case DataType::Type::kFloat32:
465 DCHECK_EQ(4u, instruction->GetVectorLength());
466 __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
467 break;
468 case DataType::Type::kFloat64:
469 DCHECK_EQ(2u, instruction->GetVectorLength());
470 __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
471 break;
472 default:
473 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
474 UNREACHABLE();
475 }
476 }
477
VisitVecSaturationAdd(HVecSaturationAdd * instruction)478 void LocationsBuilderARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
479 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
480 }
481
VisitVecSaturationAdd(HVecSaturationAdd * instruction)482 void InstructionCodeGeneratorARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
483 LocationSummary* locations = instruction->GetLocations();
484 VRegister lhs = VRegisterFrom(locations->InAt(0));
485 VRegister rhs = VRegisterFrom(locations->InAt(1));
486 VRegister dst = VRegisterFrom(locations->Out());
487 switch (instruction->GetPackedType()) {
488 case DataType::Type::kUint8:
489 DCHECK_EQ(16u, instruction->GetVectorLength());
490 __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
491 break;
492 case DataType::Type::kInt8:
493 DCHECK_EQ(16u, instruction->GetVectorLength());
494 __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
495 break;
496 case DataType::Type::kUint16:
497 DCHECK_EQ(8u, instruction->GetVectorLength());
498 __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
499 break;
500 case DataType::Type::kInt16:
501 DCHECK_EQ(8u, instruction->GetVectorLength());
502 __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
503 break;
504 default:
505 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
506 UNREACHABLE();
507 }
508 }
509
VisitVecHalvingAdd(HVecHalvingAdd * instruction)510 void LocationsBuilderARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
511 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
512 }
513
VisitVecHalvingAdd(HVecHalvingAdd * instruction)514 void InstructionCodeGeneratorARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
515 LocationSummary* locations = instruction->GetLocations();
516 VRegister lhs = VRegisterFrom(locations->InAt(0));
517 VRegister rhs = VRegisterFrom(locations->InAt(1));
518 VRegister dst = VRegisterFrom(locations->Out());
519 switch (instruction->GetPackedType()) {
520 case DataType::Type::kUint8:
521 DCHECK_EQ(16u, instruction->GetVectorLength());
522 instruction->IsRounded()
523 ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
524 : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
525 break;
526 case DataType::Type::kInt8:
527 DCHECK_EQ(16u, instruction->GetVectorLength());
528 instruction->IsRounded()
529 ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
530 : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
531 break;
532 case DataType::Type::kUint16:
533 DCHECK_EQ(8u, instruction->GetVectorLength());
534 instruction->IsRounded()
535 ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
536 : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
537 break;
538 case DataType::Type::kInt16:
539 DCHECK_EQ(8u, instruction->GetVectorLength());
540 instruction->IsRounded()
541 ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
542 : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
543 break;
544 default:
545 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
546 UNREACHABLE();
547 }
548 }
549
VisitVecSub(HVecSub * instruction)550 void LocationsBuilderARM64Neon::VisitVecSub(HVecSub* instruction) {
551 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
552 }
553
VisitVecSub(HVecSub * instruction)554 void InstructionCodeGeneratorARM64Neon::VisitVecSub(HVecSub* instruction) {
555 LocationSummary* locations = instruction->GetLocations();
556 VRegister lhs = VRegisterFrom(locations->InAt(0));
557 VRegister rhs = VRegisterFrom(locations->InAt(1));
558 VRegister dst = VRegisterFrom(locations->Out());
559 switch (instruction->GetPackedType()) {
560 case DataType::Type::kUint8:
561 case DataType::Type::kInt8:
562 DCHECK_EQ(16u, instruction->GetVectorLength());
563 __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
564 break;
565 case DataType::Type::kUint16:
566 case DataType::Type::kInt16:
567 DCHECK_EQ(8u, instruction->GetVectorLength());
568 __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
569 break;
570 case DataType::Type::kInt32:
571 DCHECK_EQ(4u, instruction->GetVectorLength());
572 __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
573 break;
574 case DataType::Type::kInt64:
575 DCHECK_EQ(2u, instruction->GetVectorLength());
576 __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
577 break;
578 case DataType::Type::kFloat32:
579 DCHECK_EQ(4u, instruction->GetVectorLength());
580 __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
581 break;
582 case DataType::Type::kFloat64:
583 DCHECK_EQ(2u, instruction->GetVectorLength());
584 __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
585 break;
586 default:
587 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
588 UNREACHABLE();
589 }
590 }
591
VisitVecSaturationSub(HVecSaturationSub * instruction)592 void LocationsBuilderARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
593 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
594 }
595
VisitVecSaturationSub(HVecSaturationSub * instruction)596 void InstructionCodeGeneratorARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
597 LocationSummary* locations = instruction->GetLocations();
598 VRegister lhs = VRegisterFrom(locations->InAt(0));
599 VRegister rhs = VRegisterFrom(locations->InAt(1));
600 VRegister dst = VRegisterFrom(locations->Out());
601 switch (instruction->GetPackedType()) {
602 case DataType::Type::kUint8:
603 DCHECK_EQ(16u, instruction->GetVectorLength());
604 __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
605 break;
606 case DataType::Type::kInt8:
607 DCHECK_EQ(16u, instruction->GetVectorLength());
608 __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
609 break;
610 case DataType::Type::kUint16:
611 DCHECK_EQ(8u, instruction->GetVectorLength());
612 __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
613 break;
614 case DataType::Type::kInt16:
615 DCHECK_EQ(8u, instruction->GetVectorLength());
616 __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
617 break;
618 default:
619 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
620 UNREACHABLE();
621 }
622 }
623
VisitVecMul(HVecMul * instruction)624 void LocationsBuilderARM64Neon::VisitVecMul(HVecMul* instruction) {
625 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
626 }
627
VisitVecMul(HVecMul * instruction)628 void InstructionCodeGeneratorARM64Neon::VisitVecMul(HVecMul* instruction) {
629 LocationSummary* locations = instruction->GetLocations();
630 VRegister lhs = VRegisterFrom(locations->InAt(0));
631 VRegister rhs = VRegisterFrom(locations->InAt(1));
632 VRegister dst = VRegisterFrom(locations->Out());
633 switch (instruction->GetPackedType()) {
634 case DataType::Type::kUint8:
635 case DataType::Type::kInt8:
636 DCHECK_EQ(16u, instruction->GetVectorLength());
637 __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
638 break;
639 case DataType::Type::kUint16:
640 case DataType::Type::kInt16:
641 DCHECK_EQ(8u, instruction->GetVectorLength());
642 __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
643 break;
644 case DataType::Type::kInt32:
645 DCHECK_EQ(4u, instruction->GetVectorLength());
646 __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
647 break;
648 case DataType::Type::kFloat32:
649 DCHECK_EQ(4u, instruction->GetVectorLength());
650 __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
651 break;
652 case DataType::Type::kFloat64:
653 DCHECK_EQ(2u, instruction->GetVectorLength());
654 __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
655 break;
656 default:
657 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
658 UNREACHABLE();
659 }
660 }
661
VisitVecDiv(HVecDiv * instruction)662 void LocationsBuilderARM64Neon::VisitVecDiv(HVecDiv* instruction) {
663 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
664 }
665
VisitVecDiv(HVecDiv * instruction)666 void InstructionCodeGeneratorARM64Neon::VisitVecDiv(HVecDiv* instruction) {
667 LocationSummary* locations = instruction->GetLocations();
668 VRegister lhs = VRegisterFrom(locations->InAt(0));
669 VRegister rhs = VRegisterFrom(locations->InAt(1));
670 VRegister dst = VRegisterFrom(locations->Out());
671 switch (instruction->GetPackedType()) {
672 case DataType::Type::kFloat32:
673 DCHECK_EQ(4u, instruction->GetVectorLength());
674 __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
675 break;
676 case DataType::Type::kFloat64:
677 DCHECK_EQ(2u, instruction->GetVectorLength());
678 __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
679 break;
680 default:
681 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
682 UNREACHABLE();
683 }
684 }
685
VisitVecMin(HVecMin * instruction)686 void LocationsBuilderARM64Neon::VisitVecMin(HVecMin* instruction) {
687 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
688 }
689
VisitVecMin(HVecMin * instruction)690 void InstructionCodeGeneratorARM64Neon::VisitVecMin(HVecMin* instruction) {
691 LocationSummary* locations = instruction->GetLocations();
692 VRegister lhs = VRegisterFrom(locations->InAt(0));
693 VRegister rhs = VRegisterFrom(locations->InAt(1));
694 VRegister dst = VRegisterFrom(locations->Out());
695 switch (instruction->GetPackedType()) {
696 case DataType::Type::kUint8:
697 DCHECK_EQ(16u, instruction->GetVectorLength());
698 __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
699 break;
700 case DataType::Type::kInt8:
701 DCHECK_EQ(16u, instruction->GetVectorLength());
702 __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
703 break;
704 case DataType::Type::kUint16:
705 DCHECK_EQ(8u, instruction->GetVectorLength());
706 __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
707 break;
708 case DataType::Type::kInt16:
709 DCHECK_EQ(8u, instruction->GetVectorLength());
710 __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
711 break;
712 case DataType::Type::kUint32:
713 DCHECK_EQ(4u, instruction->GetVectorLength());
714 __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
715 break;
716 case DataType::Type::kInt32:
717 DCHECK_EQ(4u, instruction->GetVectorLength());
718 __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
719 break;
720 case DataType::Type::kFloat32:
721 DCHECK_EQ(4u, instruction->GetVectorLength());
722 __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
723 break;
724 case DataType::Type::kFloat64:
725 DCHECK_EQ(2u, instruction->GetVectorLength());
726 __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
727 break;
728 default:
729 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
730 UNREACHABLE();
731 }
732 }
733
VisitVecMax(HVecMax * instruction)734 void LocationsBuilderARM64Neon::VisitVecMax(HVecMax* instruction) {
735 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
736 }
737
VisitVecMax(HVecMax * instruction)738 void InstructionCodeGeneratorARM64Neon::VisitVecMax(HVecMax* instruction) {
739 LocationSummary* locations = instruction->GetLocations();
740 VRegister lhs = VRegisterFrom(locations->InAt(0));
741 VRegister rhs = VRegisterFrom(locations->InAt(1));
742 VRegister dst = VRegisterFrom(locations->Out());
743 switch (instruction->GetPackedType()) {
744 case DataType::Type::kUint8:
745 DCHECK_EQ(16u, instruction->GetVectorLength());
746 __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
747 break;
748 case DataType::Type::kInt8:
749 DCHECK_EQ(16u, instruction->GetVectorLength());
750 __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
751 break;
752 case DataType::Type::kUint16:
753 DCHECK_EQ(8u, instruction->GetVectorLength());
754 __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
755 break;
756 case DataType::Type::kInt16:
757 DCHECK_EQ(8u, instruction->GetVectorLength());
758 __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
759 break;
760 case DataType::Type::kUint32:
761 DCHECK_EQ(4u, instruction->GetVectorLength());
762 __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
763 break;
764 case DataType::Type::kInt32:
765 DCHECK_EQ(4u, instruction->GetVectorLength());
766 __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
767 break;
768 case DataType::Type::kFloat32:
769 DCHECK_EQ(4u, instruction->GetVectorLength());
770 __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
771 break;
772 case DataType::Type::kFloat64:
773 DCHECK_EQ(2u, instruction->GetVectorLength());
774 __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
775 break;
776 default:
777 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
778 UNREACHABLE();
779 }
780 }
781
VisitVecAnd(HVecAnd * instruction)782 void LocationsBuilderARM64Neon::VisitVecAnd(HVecAnd* instruction) {
783 // TODO: Allow constants supported by BIC (vector, immediate).
784 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
785 }
786
VisitVecAnd(HVecAnd * instruction)787 void InstructionCodeGeneratorARM64Neon::VisitVecAnd(HVecAnd* instruction) {
788 LocationSummary* locations = instruction->GetLocations();
789 VRegister lhs = VRegisterFrom(locations->InAt(0));
790 VRegister rhs = VRegisterFrom(locations->InAt(1));
791 VRegister dst = VRegisterFrom(locations->Out());
792 switch (instruction->GetPackedType()) {
793 case DataType::Type::kBool:
794 case DataType::Type::kUint8:
795 case DataType::Type::kInt8:
796 case DataType::Type::kUint16:
797 case DataType::Type::kInt16:
798 case DataType::Type::kInt32:
799 case DataType::Type::kInt64:
800 case DataType::Type::kFloat32:
801 case DataType::Type::kFloat64:
802 __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
803 break;
804 default:
805 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
806 UNREACHABLE();
807 }
808 }
809
VisitVecAndNot(HVecAndNot * instruction)810 void LocationsBuilderARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
811 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
812 }
813
VisitVecAndNot(HVecAndNot * instruction)814 void InstructionCodeGeneratorARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
815 // TODO: Use BIC (vector, register).
816 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
817 }
818
VisitVecOr(HVecOr * instruction)819 void LocationsBuilderARM64Neon::VisitVecOr(HVecOr* instruction) {
820 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
821 }
822
VisitVecOr(HVecOr * instruction)823 void InstructionCodeGeneratorARM64Neon::VisitVecOr(HVecOr* instruction) {
824 LocationSummary* locations = instruction->GetLocations();
825 VRegister lhs = VRegisterFrom(locations->InAt(0));
826 VRegister rhs = VRegisterFrom(locations->InAt(1));
827 VRegister dst = VRegisterFrom(locations->Out());
828 switch (instruction->GetPackedType()) {
829 case DataType::Type::kBool:
830 case DataType::Type::kUint8:
831 case DataType::Type::kInt8:
832 case DataType::Type::kUint16:
833 case DataType::Type::kInt16:
834 case DataType::Type::kInt32:
835 case DataType::Type::kInt64:
836 case DataType::Type::kFloat32:
837 case DataType::Type::kFloat64:
838 __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
839 break;
840 default:
841 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
842 UNREACHABLE();
843 }
844 }
845
VisitVecXor(HVecXor * instruction)846 void LocationsBuilderARM64Neon::VisitVecXor(HVecXor* instruction) {
847 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
848 }
849
VisitVecXor(HVecXor * instruction)850 void InstructionCodeGeneratorARM64Neon::VisitVecXor(HVecXor* instruction) {
851 LocationSummary* locations = instruction->GetLocations();
852 VRegister lhs = VRegisterFrom(locations->InAt(0));
853 VRegister rhs = VRegisterFrom(locations->InAt(1));
854 VRegister dst = VRegisterFrom(locations->Out());
855 switch (instruction->GetPackedType()) {
856 case DataType::Type::kBool:
857 case DataType::Type::kUint8:
858 case DataType::Type::kInt8:
859 case DataType::Type::kUint16:
860 case DataType::Type::kInt16:
861 case DataType::Type::kInt32:
862 case DataType::Type::kInt64:
863 case DataType::Type::kFloat32:
864 case DataType::Type::kFloat64:
865 __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
866 break;
867 default:
868 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
869 UNREACHABLE();
870 }
871 }
872
873 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)874 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
875 LocationSummary* locations = new (allocator) LocationSummary(instruction);
876 switch (instruction->GetPackedType()) {
877 case DataType::Type::kUint8:
878 case DataType::Type::kInt8:
879 case DataType::Type::kUint16:
880 case DataType::Type::kInt16:
881 case DataType::Type::kInt32:
882 case DataType::Type::kInt64:
883 locations->SetInAt(0, Location::RequiresFpuRegister());
884 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
885 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
886 break;
887 default:
888 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
889 UNREACHABLE();
890 }
891 }
892
VisitVecShl(HVecShl * instruction)893 void LocationsBuilderARM64Neon::VisitVecShl(HVecShl* instruction) {
894 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
895 }
896
VisitVecShl(HVecShl * instruction)897 void InstructionCodeGeneratorARM64Neon::VisitVecShl(HVecShl* instruction) {
898 LocationSummary* locations = instruction->GetLocations();
899 VRegister lhs = VRegisterFrom(locations->InAt(0));
900 VRegister dst = VRegisterFrom(locations->Out());
901 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
902 switch (instruction->GetPackedType()) {
903 case DataType::Type::kUint8:
904 case DataType::Type::kInt8:
905 DCHECK_EQ(16u, instruction->GetVectorLength());
906 __ Shl(dst.V16B(), lhs.V16B(), value);
907 break;
908 case DataType::Type::kUint16:
909 case DataType::Type::kInt16:
910 DCHECK_EQ(8u, instruction->GetVectorLength());
911 __ Shl(dst.V8H(), lhs.V8H(), value);
912 break;
913 case DataType::Type::kInt32:
914 DCHECK_EQ(4u, instruction->GetVectorLength());
915 __ Shl(dst.V4S(), lhs.V4S(), value);
916 break;
917 case DataType::Type::kInt64:
918 DCHECK_EQ(2u, instruction->GetVectorLength());
919 __ Shl(dst.V2D(), lhs.V2D(), value);
920 break;
921 default:
922 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
923 UNREACHABLE();
924 }
925 }
926
VisitVecShr(HVecShr * instruction)927 void LocationsBuilderARM64Neon::VisitVecShr(HVecShr* instruction) {
928 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
929 }
930
VisitVecShr(HVecShr * instruction)931 void InstructionCodeGeneratorARM64Neon::VisitVecShr(HVecShr* instruction) {
932 LocationSummary* locations = instruction->GetLocations();
933 VRegister lhs = VRegisterFrom(locations->InAt(0));
934 VRegister dst = VRegisterFrom(locations->Out());
935 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
936 switch (instruction->GetPackedType()) {
937 case DataType::Type::kUint8:
938 case DataType::Type::kInt8:
939 DCHECK_EQ(16u, instruction->GetVectorLength());
940 __ Sshr(dst.V16B(), lhs.V16B(), value);
941 break;
942 case DataType::Type::kUint16:
943 case DataType::Type::kInt16:
944 DCHECK_EQ(8u, instruction->GetVectorLength());
945 __ Sshr(dst.V8H(), lhs.V8H(), value);
946 break;
947 case DataType::Type::kInt32:
948 DCHECK_EQ(4u, instruction->GetVectorLength());
949 __ Sshr(dst.V4S(), lhs.V4S(), value);
950 break;
951 case DataType::Type::kInt64:
952 DCHECK_EQ(2u, instruction->GetVectorLength());
953 __ Sshr(dst.V2D(), lhs.V2D(), value);
954 break;
955 default:
956 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
957 UNREACHABLE();
958 }
959 }
960
VisitVecUShr(HVecUShr * instruction)961 void LocationsBuilderARM64Neon::VisitVecUShr(HVecUShr* instruction) {
962 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
963 }
964
VisitVecUShr(HVecUShr * instruction)965 void InstructionCodeGeneratorARM64Neon::VisitVecUShr(HVecUShr* instruction) {
966 LocationSummary* locations = instruction->GetLocations();
967 VRegister lhs = VRegisterFrom(locations->InAt(0));
968 VRegister dst = VRegisterFrom(locations->Out());
969 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
970 switch (instruction->GetPackedType()) {
971 case DataType::Type::kUint8:
972 case DataType::Type::kInt8:
973 DCHECK_EQ(16u, instruction->GetVectorLength());
974 __ Ushr(dst.V16B(), lhs.V16B(), value);
975 break;
976 case DataType::Type::kUint16:
977 case DataType::Type::kInt16:
978 DCHECK_EQ(8u, instruction->GetVectorLength());
979 __ Ushr(dst.V8H(), lhs.V8H(), value);
980 break;
981 case DataType::Type::kInt32:
982 DCHECK_EQ(4u, instruction->GetVectorLength());
983 __ Ushr(dst.V4S(), lhs.V4S(), value);
984 break;
985 case DataType::Type::kInt64:
986 DCHECK_EQ(2u, instruction->GetVectorLength());
987 __ Ushr(dst.V2D(), lhs.V2D(), value);
988 break;
989 default:
990 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
991 UNREACHABLE();
992 }
993 }
994
VisitVecSetScalars(HVecSetScalars * instruction)995 void LocationsBuilderARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
996 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
997
998 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
999
1000 HInstruction* input = instruction->InputAt(0);
1001 bool is_zero = IsZeroBitPattern(input);
1002
1003 switch (instruction->GetPackedType()) {
1004 case DataType::Type::kBool:
1005 case DataType::Type::kUint8:
1006 case DataType::Type::kInt8:
1007 case DataType::Type::kUint16:
1008 case DataType::Type::kInt16:
1009 case DataType::Type::kInt32:
1010 case DataType::Type::kInt64:
1011 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1012 : Location::RequiresRegister());
1013 locations->SetOut(Location::RequiresFpuRegister());
1014 break;
1015 case DataType::Type::kFloat32:
1016 case DataType::Type::kFloat64:
1017 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1018 : Location::RequiresFpuRegister());
1019 locations->SetOut(Location::RequiresFpuRegister());
1020 break;
1021 default:
1022 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1023 UNREACHABLE();
1024 }
1025 }
1026
VisitVecSetScalars(HVecSetScalars * instruction)1027 void InstructionCodeGeneratorARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
1028 LocationSummary* locations = instruction->GetLocations();
1029 VRegister dst = VRegisterFrom(locations->Out());
1030
1031 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
1032
1033 // Zero out all other elements first.
1034 __ Movi(dst.V16B(), 0);
1035
1036 // Shorthand for any type of zero.
1037 if (IsZeroBitPattern(instruction->InputAt(0))) {
1038 return;
1039 }
1040
1041 // Set required elements.
1042 switch (instruction->GetPackedType()) {
1043 case DataType::Type::kBool:
1044 case DataType::Type::kUint8:
1045 case DataType::Type::kInt8:
1046 DCHECK_EQ(16u, instruction->GetVectorLength());
1047 __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
1048 break;
1049 case DataType::Type::kUint16:
1050 case DataType::Type::kInt16:
1051 DCHECK_EQ(8u, instruction->GetVectorLength());
1052 __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
1053 break;
1054 case DataType::Type::kInt32:
1055 DCHECK_EQ(4u, instruction->GetVectorLength());
1056 __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
1057 break;
1058 case DataType::Type::kInt64:
1059 DCHECK_EQ(2u, instruction->GetVectorLength());
1060 __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
1061 break;
1062 default:
1063 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1064 UNREACHABLE();
1065 }
1066 }
1067
1068 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1069 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1070 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1071 switch (instruction->GetPackedType()) {
1072 case DataType::Type::kUint8:
1073 case DataType::Type::kInt8:
1074 case DataType::Type::kUint16:
1075 case DataType::Type::kInt16:
1076 case DataType::Type::kInt32:
1077 case DataType::Type::kInt64:
1078 locations->SetInAt(0, Location::RequiresFpuRegister());
1079 locations->SetInAt(1, Location::RequiresFpuRegister());
1080 locations->SetInAt(2, Location::RequiresFpuRegister());
1081 locations->SetOut(Location::SameAsFirstInput());
1082 break;
1083 default:
1084 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1085 UNREACHABLE();
1086 }
1087 }
1088
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1089 void LocationsBuilderARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1090 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1091 }
1092
1093 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
1094 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
1095 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1096 void InstructionCodeGeneratorARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1097 LocationSummary* locations = instruction->GetLocations();
1098 VRegister acc = VRegisterFrom(locations->InAt(0));
1099 VRegister left = VRegisterFrom(locations->InAt(1));
1100 VRegister right = VRegisterFrom(locations->InAt(2));
1101
1102 DCHECK(locations->InAt(0).Equals(locations->Out()));
1103
1104 switch (instruction->GetPackedType()) {
1105 case DataType::Type::kUint8:
1106 case DataType::Type::kInt8:
1107 DCHECK_EQ(16u, instruction->GetVectorLength());
1108 if (instruction->GetOpKind() == HInstruction::kAdd) {
1109 __ Mla(acc.V16B(), left.V16B(), right.V16B());
1110 } else {
1111 __ Mls(acc.V16B(), left.V16B(), right.V16B());
1112 }
1113 break;
1114 case DataType::Type::kUint16:
1115 case DataType::Type::kInt16:
1116 DCHECK_EQ(8u, instruction->GetVectorLength());
1117 if (instruction->GetOpKind() == HInstruction::kAdd) {
1118 __ Mla(acc.V8H(), left.V8H(), right.V8H());
1119 } else {
1120 __ Mls(acc.V8H(), left.V8H(), right.V8H());
1121 }
1122 break;
1123 case DataType::Type::kInt32:
1124 DCHECK_EQ(4u, instruction->GetVectorLength());
1125 if (instruction->GetOpKind() == HInstruction::kAdd) {
1126 __ Mla(acc.V4S(), left.V4S(), right.V4S());
1127 } else {
1128 __ Mls(acc.V4S(), left.V4S(), right.V4S());
1129 }
1130 break;
1131 default:
1132 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1133 UNREACHABLE();
1134 }
1135 }
1136
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1137 void LocationsBuilderARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1138 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1139 // Some conversions require temporary registers.
1140 LocationSummary* locations = instruction->GetLocations();
1141 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1142 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1143 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1144 HVecOperation::ToSignedType(b->GetPackedType()));
1145 switch (a->GetPackedType()) {
1146 case DataType::Type::kUint8:
1147 case DataType::Type::kInt8:
1148 switch (instruction->GetPackedType()) {
1149 case DataType::Type::kInt64:
1150 locations->AddTemp(Location::RequiresFpuRegister());
1151 locations->AddTemp(Location::RequiresFpuRegister());
1152 FALLTHROUGH_INTENDED;
1153 case DataType::Type::kInt32:
1154 locations->AddTemp(Location::RequiresFpuRegister());
1155 locations->AddTemp(Location::RequiresFpuRegister());
1156 break;
1157 default:
1158 break;
1159 }
1160 break;
1161 case DataType::Type::kUint16:
1162 case DataType::Type::kInt16:
1163 if (instruction->GetPackedType() == DataType::Type::kInt64) {
1164 locations->AddTemp(Location::RequiresFpuRegister());
1165 locations->AddTemp(Location::RequiresFpuRegister());
1166 }
1167 break;
1168 case DataType::Type::kInt32:
1169 case DataType::Type::kInt64:
1170 if (instruction->GetPackedType() == a->GetPackedType()) {
1171 locations->AddTemp(Location::RequiresFpuRegister());
1172 }
1173 break;
1174 default:
1175 break;
1176 }
1177 }
1178
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1179 void InstructionCodeGeneratorARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1180 LocationSummary* locations = instruction->GetLocations();
1181 VRegister acc = VRegisterFrom(locations->InAt(0));
1182 VRegister left = VRegisterFrom(locations->InAt(1));
1183 VRegister right = VRegisterFrom(locations->InAt(2));
1184
1185 DCHECK(locations->InAt(0).Equals(locations->Out()));
1186
1187 // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
1188 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1189 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1190 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1191 HVecOperation::ToSignedType(b->GetPackedType()));
1192 switch (a->GetPackedType()) {
1193 case DataType::Type::kUint8:
1194 case DataType::Type::kInt8:
1195 DCHECK_EQ(16u, a->GetVectorLength());
1196 switch (instruction->GetPackedType()) {
1197 case DataType::Type::kInt16:
1198 DCHECK_EQ(8u, instruction->GetVectorLength());
1199 __ Sabal(acc.V8H(), left.V8B(), right.V8B());
1200 __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
1201 break;
1202 case DataType::Type::kInt32: {
1203 DCHECK_EQ(4u, instruction->GetVectorLength());
1204 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1205 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1206 __ Sxtl(tmp1.V8H(), left.V8B());
1207 __ Sxtl(tmp2.V8H(), right.V8B());
1208 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1209 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1210 __ Sxtl2(tmp1.V8H(), left.V16B());
1211 __ Sxtl2(tmp2.V8H(), right.V16B());
1212 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1213 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1214 break;
1215 }
1216 case DataType::Type::kInt64: {
1217 DCHECK_EQ(2u, instruction->GetVectorLength());
1218 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1219 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1220 VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
1221 VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
1222 __ Sxtl(tmp1.V8H(), left.V8B());
1223 __ Sxtl(tmp2.V8H(), right.V8B());
1224 __ Sxtl(tmp3.V4S(), tmp1.V4H());
1225 __ Sxtl(tmp4.V4S(), tmp2.V4H());
1226 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1227 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1228 __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1229 __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1230 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1231 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1232 __ Sxtl2(tmp1.V8H(), left.V16B());
1233 __ Sxtl2(tmp2.V8H(), right.V16B());
1234 __ Sxtl(tmp3.V4S(), tmp1.V4H());
1235 __ Sxtl(tmp4.V4S(), tmp2.V4H());
1236 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1237 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1238 __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1239 __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1240 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1241 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1242 break;
1243 }
1244 default:
1245 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1246 UNREACHABLE();
1247 }
1248 break;
1249 case DataType::Type::kUint16:
1250 case DataType::Type::kInt16:
1251 DCHECK_EQ(8u, a->GetVectorLength());
1252 switch (instruction->GetPackedType()) {
1253 case DataType::Type::kInt32:
1254 DCHECK_EQ(4u, instruction->GetVectorLength());
1255 __ Sabal(acc.V4S(), left.V4H(), right.V4H());
1256 __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
1257 break;
1258 case DataType::Type::kInt64: {
1259 DCHECK_EQ(2u, instruction->GetVectorLength());
1260 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1261 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1262 __ Sxtl(tmp1.V4S(), left.V4H());
1263 __ Sxtl(tmp2.V4S(), right.V4H());
1264 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1265 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1266 __ Sxtl2(tmp1.V4S(), left.V8H());
1267 __ Sxtl2(tmp2.V4S(), right.V8H());
1268 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1269 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1270 break;
1271 }
1272 default:
1273 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1274 UNREACHABLE();
1275 }
1276 break;
1277 case DataType::Type::kInt32:
1278 DCHECK_EQ(4u, a->GetVectorLength());
1279 switch (instruction->GetPackedType()) {
1280 case DataType::Type::kInt32: {
1281 DCHECK_EQ(4u, instruction->GetVectorLength());
1282 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1283 __ Sub(tmp.V4S(), left.V4S(), right.V4S());
1284 __ Abs(tmp.V4S(), tmp.V4S());
1285 __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
1286 break;
1287 }
1288 case DataType::Type::kInt64:
1289 DCHECK_EQ(2u, instruction->GetVectorLength());
1290 __ Sabal(acc.V2D(), left.V2S(), right.V2S());
1291 __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
1292 break;
1293 default:
1294 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1295 UNREACHABLE();
1296 }
1297 break;
1298 case DataType::Type::kInt64:
1299 DCHECK_EQ(2u, a->GetVectorLength());
1300 switch (instruction->GetPackedType()) {
1301 case DataType::Type::kInt64: {
1302 DCHECK_EQ(2u, instruction->GetVectorLength());
1303 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1304 __ Sub(tmp.V2D(), left.V2D(), right.V2D());
1305 __ Abs(tmp.V2D(), tmp.V2D());
1306 __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
1307 break;
1308 }
1309 default:
1310 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1311 UNREACHABLE();
1312 }
1313 break;
1314 default:
1315 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1316 }
1317 }
1318
VisitVecDotProd(HVecDotProd * instruction)1319 void LocationsBuilderARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
1320 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1321 DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1322 locations->SetInAt(0, Location::RequiresFpuRegister());
1323 locations->SetInAt(1, Location::RequiresFpuRegister());
1324 locations->SetInAt(2, Location::RequiresFpuRegister());
1325 locations->SetOut(Location::SameAsFirstInput());
1326
1327 // For Int8 and Uint8 general case we need a temp register.
1328 if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
1329 !ShouldEmitDotProductInstructions(codegen_)) {
1330 locations->AddTemp(Location::RequiresFpuRegister());
1331 }
1332 }
1333
VisitVecDotProd(HVecDotProd * instruction)1334 void InstructionCodeGeneratorARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
1335 LocationSummary* locations = instruction->GetLocations();
1336 DCHECK(locations->InAt(0).Equals(locations->Out()));
1337 VRegister acc = VRegisterFrom(locations->InAt(0));
1338 VRegister left = VRegisterFrom(locations->InAt(1));
1339 VRegister right = VRegisterFrom(locations->InAt(2));
1340 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1341 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1342 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1343 HVecOperation::ToSignedType(b->GetPackedType()));
1344 DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1345 DCHECK_EQ(4u, instruction->GetVectorLength());
1346
1347 size_t inputs_data_size = DataType::Size(a->GetPackedType());
1348 switch (inputs_data_size) {
1349 case 1u: {
1350 DCHECK_EQ(16u, a->GetVectorLength());
1351 if (instruction->IsZeroExtending()) {
1352 if (ShouldEmitDotProductInstructions(codegen_)) {
1353 __ Udot(acc.V4S(), left.V16B(), right.V16B());
1354 } else {
1355 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1356 __ Umull(tmp.V8H(), left.V8B(), right.V8B());
1357 __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1358 __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1359
1360 __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
1361 __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1362 __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1363 }
1364 } else {
1365 if (ShouldEmitDotProductInstructions(codegen_)) {
1366 __ Sdot(acc.V4S(), left.V16B(), right.V16B());
1367 } else {
1368 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1369 __ Smull(tmp.V8H(), left.V8B(), right.V8B());
1370 __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1371 __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1372
1373 __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
1374 __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1375 __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1376 }
1377 }
1378 break;
1379 }
1380 case 2u:
1381 DCHECK_EQ(8u, a->GetVectorLength());
1382 if (instruction->IsZeroExtending()) {
1383 __ Umlal(acc.V4S(), left.V4H(), right.V4H());
1384 __ Umlal2(acc.V4S(), left.V8H(), right.V8H());
1385 } else {
1386 __ Smlal(acc.V4S(), left.V4H(), right.V4H());
1387 __ Smlal2(acc.V4S(), left.V8H(), right.V8H());
1388 }
1389 break;
1390 default:
1391 LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1392 }
1393 }
1394
1395 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1396 static void CreateVecMemLocations(ArenaAllocator* allocator,
1397 HVecMemoryOperation* instruction,
1398 bool is_load) {
1399 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1400 switch (instruction->GetPackedType()) {
1401 case DataType::Type::kBool:
1402 case DataType::Type::kUint8:
1403 case DataType::Type::kInt8:
1404 case DataType::Type::kUint16:
1405 case DataType::Type::kInt16:
1406 case DataType::Type::kInt32:
1407 case DataType::Type::kInt64:
1408 case DataType::Type::kFloat32:
1409 case DataType::Type::kFloat64:
1410 locations->SetInAt(0, Location::RequiresRegister());
1411 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1412 if (is_load) {
1413 locations->SetOut(Location::RequiresFpuRegister());
1414 } else {
1415 locations->SetInAt(2, Location::RequiresFpuRegister());
1416 }
1417 break;
1418 default:
1419 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1420 UNREACHABLE();
1421 }
1422 }
1423
VisitVecLoad(HVecLoad * instruction)1424 void LocationsBuilderARM64Neon::VisitVecLoad(HVecLoad* instruction) {
1425 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1426 }
1427
VisitVecLoad(HVecLoad * instruction)1428 void InstructionCodeGeneratorARM64Neon::VisitVecLoad(HVecLoad* instruction) {
1429 LocationSummary* locations = instruction->GetLocations();
1430 size_t size = DataType::Size(instruction->GetPackedType());
1431 VRegister reg = VRegisterFrom(locations->Out());
1432 UseScratchRegisterScope temps(GetVIXLAssembler());
1433 Register scratch;
1434
1435 switch (instruction->GetPackedType()) {
1436 case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1437 case DataType::Type::kUint16:
1438 DCHECK_EQ(8u, instruction->GetVectorLength());
1439 // Special handling of compressed/uncompressed string load.
1440 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1441 vixl::aarch64::Label uncompressed_load, done;
1442 // Test compression bit.
1443 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1444 "Expecting 0=compressed, 1=uncompressed");
1445 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1446 Register length = temps.AcquireW();
1447 __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
1448 __ Tbnz(length.W(), 0, &uncompressed_load);
1449 temps.Release(length); // no longer needed
1450 // Zero extend 8 compressed bytes into 8 chars.
1451 __ Ldr(DRegisterFrom(locations->Out()).V8B(),
1452 VecNEONAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
1453 __ Uxtl(reg.V8H(), reg.V8B());
1454 __ B(&done);
1455 if (scratch.IsValid()) {
1456 temps.Release(scratch); // if used, no longer needed
1457 }
1458 // Load 8 direct uncompressed chars.
1459 __ Bind(&uncompressed_load);
1460 __ Ldr(reg,
1461 VecNEONAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
1462 __ Bind(&done);
1463 return;
1464 }
1465 FALLTHROUGH_INTENDED;
1466 case DataType::Type::kBool:
1467 case DataType::Type::kUint8:
1468 case DataType::Type::kInt8:
1469 case DataType::Type::kInt32:
1470 case DataType::Type::kFloat32:
1471 case DataType::Type::kInt64:
1472 case DataType::Type::kFloat64:
1473 DCHECK_LE(2u, instruction->GetVectorLength());
1474 DCHECK_LE(instruction->GetVectorLength(), 16u);
1475 __ Ldr(reg,
1476 VecNEONAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
1477 break;
1478 default:
1479 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1480 UNREACHABLE();
1481 }
1482 }
1483
VisitVecStore(HVecStore * instruction)1484 void LocationsBuilderARM64Neon::VisitVecStore(HVecStore* instruction) {
1485 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1486 }
1487
VisitVecStore(HVecStore * instruction)1488 void InstructionCodeGeneratorARM64Neon::VisitVecStore(HVecStore* instruction) {
1489 LocationSummary* locations = instruction->GetLocations();
1490 size_t size = DataType::Size(instruction->GetPackedType());
1491 VRegister reg = VRegisterFrom(locations->InAt(2));
1492 UseScratchRegisterScope temps(GetVIXLAssembler());
1493 Register scratch;
1494
1495 switch (instruction->GetPackedType()) {
1496 case DataType::Type::kBool:
1497 case DataType::Type::kUint8:
1498 case DataType::Type::kInt8:
1499 case DataType::Type::kUint16:
1500 case DataType::Type::kInt16:
1501 case DataType::Type::kInt32:
1502 case DataType::Type::kFloat32:
1503 case DataType::Type::kInt64:
1504 case DataType::Type::kFloat64:
1505 DCHECK_LE(2u, instruction->GetVectorLength());
1506 DCHECK_LE(instruction->GetVectorLength(), 16u);
1507 __ Str(reg,
1508 VecNEONAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1509 break;
1510 default:
1511 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1512 UNREACHABLE();
1513 }
1514 }
1515
VisitVecPredSetAll(HVecPredSetAll * instruction)1516 void LocationsBuilderARM64Neon::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1517 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1518 DCHECK(instruction->InputAt(0)->IsIntConstant());
1519 locations->SetInAt(0, Location::NoLocation());
1520 locations->SetOut(Location::NoLocation());
1521 }
1522
VisitVecPredSetAll(HVecPredSetAll *)1523 void InstructionCodeGeneratorARM64Neon::VisitVecPredSetAll(HVecPredSetAll*) {
1524 }
1525
VisitVecPredWhile(HVecPredWhile * instruction)1526 void LocationsBuilderARM64Neon::VisitVecPredWhile(HVecPredWhile* instruction) {
1527 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1528 UNREACHABLE();
1529 }
1530
VisitVecPredWhile(HVecPredWhile * instruction)1531 void InstructionCodeGeneratorARM64Neon::VisitVecPredWhile(HVecPredWhile* instruction) {
1532 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1533 UNREACHABLE();
1534 }
1535
VisitVecPredCondition(HVecPredCondition * instruction)1536 void LocationsBuilderARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
1537 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1538 UNREACHABLE();
1539 }
1540
VisitVecPredCondition(HVecPredCondition * instruction)1541 void InstructionCodeGeneratorARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
1542 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1543 UNREACHABLE();
1544 }
1545
AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope * scope)1546 Location InstructionCodeGeneratorARM64Neon::AllocateSIMDScratchLocation(
1547 vixl::aarch64::UseScratchRegisterScope* scope) {
1548 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1549 return LocationFrom(scope->AcquireVRegisterOfSize(kQRegSize));
1550 }
1551
FreeSIMDScratchLocation(Location loc,vixl::aarch64::UseScratchRegisterScope * scope)1552 void InstructionCodeGeneratorARM64Neon::FreeSIMDScratchLocation(Location loc,
1553 vixl::aarch64::UseScratchRegisterScope* scope) {
1554 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1555 scope->Release(QRegisterFrom(loc));
1556 }
1557
LoadSIMDRegFromStack(Location destination,Location source)1558 void InstructionCodeGeneratorARM64Neon::LoadSIMDRegFromStack(Location destination,
1559 Location source) {
1560 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1561 __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1562 }
1563
MoveSIMDRegToSIMDReg(Location destination,Location source)1564 void InstructionCodeGeneratorARM64Neon::MoveSIMDRegToSIMDReg(Location destination,
1565 Location source) {
1566 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1567 __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1568 }
1569
MoveToSIMDStackSlot(Location destination,Location source)1570 void InstructionCodeGeneratorARM64Neon::MoveToSIMDStackSlot(Location destination,
1571 Location source) {
1572 DCHECK(destination.IsSIMDStackSlot());
1573 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1574
1575 if (source.IsFpuRegister()) {
1576 __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1577 } else {
1578 DCHECK(source.IsSIMDStackSlot());
1579 UseScratchRegisterScope temps(GetVIXLAssembler());
1580 if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
1581 Register temp = temps.AcquireX();
1582 __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1583 __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1584 __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1585 __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1586 } else {
1587 VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1588 __ Ldr(temp, StackOperandFrom(source));
1589 __ Str(temp, StackOperandFrom(destination));
1590 }
1591 }
1592 }
1593
1594 // Calculate memory accessing operand for save/restore live registers.
1595 template <bool is_save>
SaveRestoreLiveRegistersHelperNeonImpl(CodeGeneratorARM64 * codegen,LocationSummary * locations,int64_t spill_offset)1596 void SaveRestoreLiveRegistersHelperNeonImpl(CodeGeneratorARM64* codegen,
1597 LocationSummary* locations,
1598 int64_t spill_offset) {
1599 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
1600 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
1601 DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
1602 codegen->GetNumberOfCoreRegisters(),
1603 fp_spills,
1604 codegen->GetNumberOfFloatingPointRegisters()));
1605
1606 CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
1607 const unsigned v_reg_size_in_bits = codegen->GetSlowPathFPWidth() * 8;
1608 DCHECK_LE(codegen->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1609 CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size_in_bits, fp_spills);
1610
1611 MacroAssembler* masm = codegen->GetVIXLAssembler();
1612 UseScratchRegisterScope temps(masm);
1613
1614 Register base = masm->StackPointer();
1615 int64_t core_spill_size = core_list.GetTotalSizeInBytes();
1616 int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
1617 int64_t reg_size = kXRegSizeInBytes;
1618 int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
1619 uint32_t ls_access_size = WhichPowerOf2(reg_size);
1620 if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
1621 !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
1622 // If the offset does not fit in the instruction's immediate field, use an alternate register
1623 // to compute the base address(float point registers spill base address).
1624 Register new_base = temps.AcquireSameSizeAs(base);
1625 masm->Add(new_base, base, Operand(spill_offset + core_spill_size));
1626 base = new_base;
1627 spill_offset = -core_spill_size;
1628 int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
1629 DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
1630 DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
1631 }
1632
1633 if (is_save) {
1634 masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1635 masm->StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
1636 } else {
1637 masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1638 masm->LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
1639 }
1640 }
1641
SaveLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1642 void InstructionCodeGeneratorARM64Neon::SaveLiveRegistersHelper(LocationSummary* locations,
1643 int64_t spill_offset) {
1644 SaveRestoreLiveRegistersHelperNeonImpl</* is_save= */ true>(codegen_, locations, spill_offset);
1645 }
1646
RestoreLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1647 void InstructionCodeGeneratorARM64Neon::RestoreLiveRegistersHelper(LocationSummary* locations,
1648 int64_t spill_offset) {
1649 SaveRestoreLiveRegistersHelperNeonImpl</* is_save= */ false>(codegen_, locations, spill_offset);
1650 }
1651
1652 #undef __
1653
1654 } // namespace arm64
1655 } // namespace art
1656