• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "base/bit_utils_iterator.h"
21 #include "mirror/array-inl.h"
22 #include "mirror/string.h"
23 
24 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
25 
26 namespace art {
27 namespace arm64 {
28 
29 using helpers::DRegisterFrom;
30 using helpers::HeapOperand;
31 using helpers::InputRegisterAt;
32 using helpers::Int64FromLocation;
33 using helpers::LocationFrom;
34 using helpers::OutputRegister;
35 using helpers::QRegisterFrom;
36 using helpers::StackOperandFrom;
37 using helpers::VRegisterFrom;
38 using helpers::XRegisterFrom;
39 
40 #define __ GetVIXLAssembler()->
41 
42 // Returns whether the value of the constant can be directly encoded into the instruction as
43 // immediate.
NEONCanEncodeConstantAsImmediate(HConstant * constant,HInstruction * instr)44 inline bool NEONCanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
45   // TODO: Improve this when IsSIMDConstantEncodable method is implemented in VIXL.
46   if (instr->IsVecReplicateScalar()) {
47     if (constant->IsLongConstant()) {
48       return false;
49     } else if (constant->IsFloatConstant()) {
50       return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
51     } else if (constant->IsDoubleConstant()) {
52       return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
53     }
54     int64_t value = CodeGenerator::GetInt64ValueOf(constant);
55     return IsUint<8>(value);
56   }
57   return false;
58 }
59 
60 // Returns
61 //  - constant location - if 'constant' is an actual constant and its value can be
62 //    encoded into the instruction.
63 //  - register location otherwise.
NEONEncodableConstantOrRegister(HInstruction * constant,HInstruction * instr)64 inline Location NEONEncodableConstantOrRegister(HInstruction* constant,
65                                                 HInstruction* instr) {
66   if (constant->IsConstant()
67       && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
68     return Location::ConstantLocation(constant->AsConstant());
69   }
70 
71   return Location::RequiresRegister();
72 }
73 
74 // Returns whether dot product instructions should be emitted.
ShouldEmitDotProductInstructions(const CodeGeneratorARM64 * codegen_)75 static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
76   return codegen_->GetInstructionSetFeatures().HasDotProd();
77 }
78 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)79 void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
80   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
81   HInstruction* input = instruction->InputAt(0);
82   switch (instruction->GetPackedType()) {
83     case DataType::Type::kBool:
84     case DataType::Type::kUint8:
85     case DataType::Type::kInt8:
86     case DataType::Type::kUint16:
87     case DataType::Type::kInt16:
88     case DataType::Type::kInt32:
89     case DataType::Type::kInt64:
90       locations->SetInAt(0, NEONEncodableConstantOrRegister(input, instruction));
91       locations->SetOut(Location::RequiresFpuRegister());
92       break;
93     case DataType::Type::kFloat32:
94     case DataType::Type::kFloat64:
95       if (input->IsConstant() &&
96           NEONCanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
97         locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
98         locations->SetOut(Location::RequiresFpuRegister());
99       } else {
100         locations->SetInAt(0, Location::RequiresFpuRegister());
101         locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
102       }
103       break;
104     default:
105       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
106       UNREACHABLE();
107   }
108 }
109 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)110 void InstructionCodeGeneratorARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
111   LocationSummary* locations = instruction->GetLocations();
112   Location src_loc = locations->InAt(0);
113   VRegister dst = VRegisterFrom(locations->Out());
114   switch (instruction->GetPackedType()) {
115     case DataType::Type::kBool:
116     case DataType::Type::kUint8:
117     case DataType::Type::kInt8:
118       DCHECK_EQ(16u, instruction->GetVectorLength());
119       if (src_loc.IsConstant()) {
120         __ Movi(dst.V16B(), Int64FromLocation(src_loc));
121       } else {
122         __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
123       }
124       break;
125     case DataType::Type::kUint16:
126     case DataType::Type::kInt16:
127       DCHECK_EQ(8u, instruction->GetVectorLength());
128       if (src_loc.IsConstant()) {
129         __ Movi(dst.V8H(), Int64FromLocation(src_loc));
130       } else {
131         __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
132       }
133       break;
134     case DataType::Type::kInt32:
135       DCHECK_EQ(4u, instruction->GetVectorLength());
136       if (src_loc.IsConstant()) {
137         __ Movi(dst.V4S(), Int64FromLocation(src_loc));
138       } else {
139         __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
140       }
141       break;
142     case DataType::Type::kInt64:
143       DCHECK_EQ(2u, instruction->GetVectorLength());
144       if (src_loc.IsConstant()) {
145         __ Movi(dst.V2D(), Int64FromLocation(src_loc));
146       } else {
147         __ Dup(dst.V2D(), XRegisterFrom(src_loc));
148       }
149       break;
150     case DataType::Type::kFloat32:
151       DCHECK_EQ(4u, instruction->GetVectorLength());
152       if (src_loc.IsConstant()) {
153         __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
154       } else {
155         __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
156       }
157       break;
158     case DataType::Type::kFloat64:
159       DCHECK_EQ(2u, instruction->GetVectorLength());
160       if (src_loc.IsConstant()) {
161         __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
162       } else {
163         __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
164       }
165       break;
166     default:
167       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
168       UNREACHABLE();
169   }
170 }
171 
VisitVecExtractScalar(HVecExtractScalar * instruction)172 void LocationsBuilderARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
173   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
174   switch (instruction->GetPackedType()) {
175     case DataType::Type::kBool:
176     case DataType::Type::kUint8:
177     case DataType::Type::kInt8:
178     case DataType::Type::kUint16:
179     case DataType::Type::kInt16:
180     case DataType::Type::kInt32:
181     case DataType::Type::kInt64:
182       locations->SetInAt(0, Location::RequiresFpuRegister());
183       locations->SetOut(Location::RequiresRegister());
184       break;
185     case DataType::Type::kFloat32:
186     case DataType::Type::kFloat64:
187       locations->SetInAt(0, Location::RequiresFpuRegister());
188       locations->SetOut(Location::SameAsFirstInput());
189       break;
190     default:
191       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
192       UNREACHABLE();
193   }
194 }
195 
VisitVecExtractScalar(HVecExtractScalar * instruction)196 void InstructionCodeGeneratorARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
197   LocationSummary* locations = instruction->GetLocations();
198   VRegister src = VRegisterFrom(locations->InAt(0));
199   switch (instruction->GetPackedType()) {
200     case DataType::Type::kInt32:
201       DCHECK_EQ(4u, instruction->GetVectorLength());
202       __ Umov(OutputRegister(instruction), src.V4S(), 0);
203       break;
204     case DataType::Type::kInt64:
205       DCHECK_EQ(2u, instruction->GetVectorLength());
206       __ Umov(OutputRegister(instruction), src.V2D(), 0);
207       break;
208     case DataType::Type::kFloat32:
209     case DataType::Type::kFloat64:
210       DCHECK_LE(2u, instruction->GetVectorLength());
211       DCHECK_LE(instruction->GetVectorLength(), 4u);
212       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
213       break;
214     default:
215       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
216       UNREACHABLE();
217   }
218 }
219 
220 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)221 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
222   LocationSummary* locations = new (allocator) LocationSummary(instruction);
223   switch (instruction->GetPackedType()) {
224     case DataType::Type::kBool:
225       locations->SetInAt(0, Location::RequiresFpuRegister());
226       locations->SetOut(Location::RequiresFpuRegister(),
227                         instruction->IsVecNot() ? Location::kOutputOverlap
228                                                 : Location::kNoOutputOverlap);
229       break;
230     case DataType::Type::kUint8:
231     case DataType::Type::kInt8:
232     case DataType::Type::kUint16:
233     case DataType::Type::kInt16:
234     case DataType::Type::kInt32:
235     case DataType::Type::kInt64:
236     case DataType::Type::kFloat32:
237     case DataType::Type::kFloat64:
238       locations->SetInAt(0, Location::RequiresFpuRegister());
239       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
240       break;
241     default:
242       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
243       UNREACHABLE();
244   }
245 }
246 
VisitVecReduce(HVecReduce * instruction)247 void LocationsBuilderARM64Neon::VisitVecReduce(HVecReduce* instruction) {
248   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
249 }
250 
VisitVecReduce(HVecReduce * instruction)251 void InstructionCodeGeneratorARM64Neon::VisitVecReduce(HVecReduce* instruction) {
252   LocationSummary* locations = instruction->GetLocations();
253   VRegister src = VRegisterFrom(locations->InAt(0));
254   VRegister dst = DRegisterFrom(locations->Out());
255   switch (instruction->GetPackedType()) {
256     case DataType::Type::kInt32:
257       DCHECK_EQ(4u, instruction->GetVectorLength());
258       switch (instruction->GetReductionKind()) {
259         case HVecReduce::kSum:
260           __ Addv(dst.S(), src.V4S());
261           break;
262         case HVecReduce::kMin:
263           __ Sminv(dst.S(), src.V4S());
264           break;
265         case HVecReduce::kMax:
266           __ Smaxv(dst.S(), src.V4S());
267           break;
268       }
269       break;
270     case DataType::Type::kInt64:
271       DCHECK_EQ(2u, instruction->GetVectorLength());
272       switch (instruction->GetReductionKind()) {
273         case HVecReduce::kSum:
274           __ Addp(dst.D(), src.V2D());
275           break;
276         default:
277           LOG(FATAL) << "Unsupported SIMD min/max";
278           UNREACHABLE();
279       }
280       break;
281     default:
282       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
283       UNREACHABLE();
284   }
285 }
286 
VisitVecCnv(HVecCnv * instruction)287 void LocationsBuilderARM64Neon::VisitVecCnv(HVecCnv* instruction) {
288   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
289 }
290 
VisitVecCnv(HVecCnv * instruction)291 void InstructionCodeGeneratorARM64Neon::VisitVecCnv(HVecCnv* instruction) {
292   LocationSummary* locations = instruction->GetLocations();
293   VRegister src = VRegisterFrom(locations->InAt(0));
294   VRegister dst = VRegisterFrom(locations->Out());
295   DataType::Type from = instruction->GetInputType();
296   DataType::Type to = instruction->GetResultType();
297   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
298     DCHECK_EQ(4u, instruction->GetVectorLength());
299     __ Scvtf(dst.V4S(), src.V4S());
300   } else {
301     LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
302   }
303 }
304 
VisitVecNeg(HVecNeg * instruction)305 void LocationsBuilderARM64Neon::VisitVecNeg(HVecNeg* instruction) {
306   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
307 }
308 
VisitVecNeg(HVecNeg * instruction)309 void InstructionCodeGeneratorARM64Neon::VisitVecNeg(HVecNeg* instruction) {
310   LocationSummary* locations = instruction->GetLocations();
311   VRegister src = VRegisterFrom(locations->InAt(0));
312   VRegister dst = VRegisterFrom(locations->Out());
313   switch (instruction->GetPackedType()) {
314     case DataType::Type::kUint8:
315     case DataType::Type::kInt8:
316       DCHECK_EQ(16u, instruction->GetVectorLength());
317       __ Neg(dst.V16B(), src.V16B());
318       break;
319     case DataType::Type::kUint16:
320     case DataType::Type::kInt16:
321       DCHECK_EQ(8u, instruction->GetVectorLength());
322       __ Neg(dst.V8H(), src.V8H());
323       break;
324     case DataType::Type::kInt32:
325       DCHECK_EQ(4u, instruction->GetVectorLength());
326       __ Neg(dst.V4S(), src.V4S());
327       break;
328     case DataType::Type::kInt64:
329       DCHECK_EQ(2u, instruction->GetVectorLength());
330       __ Neg(dst.V2D(), src.V2D());
331       break;
332     case DataType::Type::kFloat32:
333       DCHECK_EQ(4u, instruction->GetVectorLength());
334       __ Fneg(dst.V4S(), src.V4S());
335       break;
336     case DataType::Type::kFloat64:
337       DCHECK_EQ(2u, instruction->GetVectorLength());
338       __ Fneg(dst.V2D(), src.V2D());
339       break;
340     default:
341       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
342       UNREACHABLE();
343   }
344 }
345 
VisitVecAbs(HVecAbs * instruction)346 void LocationsBuilderARM64Neon::VisitVecAbs(HVecAbs* instruction) {
347   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
348 }
349 
VisitVecAbs(HVecAbs * instruction)350 void InstructionCodeGeneratorARM64Neon::VisitVecAbs(HVecAbs* instruction) {
351   LocationSummary* locations = instruction->GetLocations();
352   VRegister src = VRegisterFrom(locations->InAt(0));
353   VRegister dst = VRegisterFrom(locations->Out());
354   switch (instruction->GetPackedType()) {
355     case DataType::Type::kInt8:
356       DCHECK_EQ(16u, instruction->GetVectorLength());
357       __ Abs(dst.V16B(), src.V16B());
358       break;
359     case DataType::Type::kInt16:
360       DCHECK_EQ(8u, instruction->GetVectorLength());
361       __ Abs(dst.V8H(), src.V8H());
362       break;
363     case DataType::Type::kInt32:
364       DCHECK_EQ(4u, instruction->GetVectorLength());
365       __ Abs(dst.V4S(), src.V4S());
366       break;
367     case DataType::Type::kInt64:
368       DCHECK_EQ(2u, instruction->GetVectorLength());
369       __ Abs(dst.V2D(), src.V2D());
370       break;
371     case DataType::Type::kFloat32:
372       DCHECK_EQ(4u, instruction->GetVectorLength());
373       __ Fabs(dst.V4S(), src.V4S());
374       break;
375     case DataType::Type::kFloat64:
376       DCHECK_EQ(2u, instruction->GetVectorLength());
377       __ Fabs(dst.V2D(), src.V2D());
378       break;
379     default:
380       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
381       UNREACHABLE();
382   }
383 }
384 
VisitVecNot(HVecNot * instruction)385 void LocationsBuilderARM64Neon::VisitVecNot(HVecNot* instruction) {
386   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
387 }
388 
VisitVecNot(HVecNot * instruction)389 void InstructionCodeGeneratorARM64Neon::VisitVecNot(HVecNot* instruction) {
390   LocationSummary* locations = instruction->GetLocations();
391   VRegister src = VRegisterFrom(locations->InAt(0));
392   VRegister dst = VRegisterFrom(locations->Out());
393   switch (instruction->GetPackedType()) {
394     case DataType::Type::kBool:  // special case boolean-not
395       DCHECK_EQ(16u, instruction->GetVectorLength());
396       __ Movi(dst.V16B(), 1);
397       __ Eor(dst.V16B(), dst.V16B(), src.V16B());
398       break;
399     case DataType::Type::kUint8:
400     case DataType::Type::kInt8:
401     case DataType::Type::kUint16:
402     case DataType::Type::kInt16:
403     case DataType::Type::kInt32:
404     case DataType::Type::kInt64:
405       __ Not(dst.V16B(), src.V16B());  // lanes do not matter
406       break;
407     default:
408       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
409       UNREACHABLE();
410   }
411 }
412 
413 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)414 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
415   LocationSummary* locations = new (allocator) LocationSummary(instruction);
416   switch (instruction->GetPackedType()) {
417     case DataType::Type::kBool:
418     case DataType::Type::kUint8:
419     case DataType::Type::kInt8:
420     case DataType::Type::kUint16:
421     case DataType::Type::kInt16:
422     case DataType::Type::kInt32:
423     case DataType::Type::kInt64:
424     case DataType::Type::kFloat32:
425     case DataType::Type::kFloat64:
426       locations->SetInAt(0, Location::RequiresFpuRegister());
427       locations->SetInAt(1, Location::RequiresFpuRegister());
428       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
429       break;
430     default:
431       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
432       UNREACHABLE();
433   }
434 }
435 
VisitVecAdd(HVecAdd * instruction)436 void LocationsBuilderARM64Neon::VisitVecAdd(HVecAdd* instruction) {
437   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
438 }
439 
VisitVecAdd(HVecAdd * instruction)440 void InstructionCodeGeneratorARM64Neon::VisitVecAdd(HVecAdd* instruction) {
441   LocationSummary* locations = instruction->GetLocations();
442   VRegister lhs = VRegisterFrom(locations->InAt(0));
443   VRegister rhs = VRegisterFrom(locations->InAt(1));
444   VRegister dst = VRegisterFrom(locations->Out());
445   switch (instruction->GetPackedType()) {
446     case DataType::Type::kUint8:
447     case DataType::Type::kInt8:
448       DCHECK_EQ(16u, instruction->GetVectorLength());
449       __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
450       break;
451     case DataType::Type::kUint16:
452     case DataType::Type::kInt16:
453       DCHECK_EQ(8u, instruction->GetVectorLength());
454       __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
455       break;
456     case DataType::Type::kInt32:
457       DCHECK_EQ(4u, instruction->GetVectorLength());
458       __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
459       break;
460     case DataType::Type::kInt64:
461       DCHECK_EQ(2u, instruction->GetVectorLength());
462       __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
463       break;
464     case DataType::Type::kFloat32:
465       DCHECK_EQ(4u, instruction->GetVectorLength());
466       __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
467       break;
468     case DataType::Type::kFloat64:
469       DCHECK_EQ(2u, instruction->GetVectorLength());
470       __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
471       break;
472     default:
473       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
474       UNREACHABLE();
475   }
476 }
477 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)478 void LocationsBuilderARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
479   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
480 }
481 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)482 void InstructionCodeGeneratorARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
483   LocationSummary* locations = instruction->GetLocations();
484   VRegister lhs = VRegisterFrom(locations->InAt(0));
485   VRegister rhs = VRegisterFrom(locations->InAt(1));
486   VRegister dst = VRegisterFrom(locations->Out());
487   switch (instruction->GetPackedType()) {
488     case DataType::Type::kUint8:
489       DCHECK_EQ(16u, instruction->GetVectorLength());
490       __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
491       break;
492     case DataType::Type::kInt8:
493       DCHECK_EQ(16u, instruction->GetVectorLength());
494       __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
495       break;
496     case DataType::Type::kUint16:
497       DCHECK_EQ(8u, instruction->GetVectorLength());
498       __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
499       break;
500     case DataType::Type::kInt16:
501       DCHECK_EQ(8u, instruction->GetVectorLength());
502       __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
503       break;
504     default:
505       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
506       UNREACHABLE();
507   }
508 }
509 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)510 void LocationsBuilderARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
511   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
512 }
513 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)514 void InstructionCodeGeneratorARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
515   LocationSummary* locations = instruction->GetLocations();
516   VRegister lhs = VRegisterFrom(locations->InAt(0));
517   VRegister rhs = VRegisterFrom(locations->InAt(1));
518   VRegister dst = VRegisterFrom(locations->Out());
519   switch (instruction->GetPackedType()) {
520     case DataType::Type::kUint8:
521       DCHECK_EQ(16u, instruction->GetVectorLength());
522       instruction->IsRounded()
523           ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
524           : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
525       break;
526     case DataType::Type::kInt8:
527       DCHECK_EQ(16u, instruction->GetVectorLength());
528       instruction->IsRounded()
529           ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
530           : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
531       break;
532     case DataType::Type::kUint16:
533       DCHECK_EQ(8u, instruction->GetVectorLength());
534       instruction->IsRounded()
535           ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
536           : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
537       break;
538     case DataType::Type::kInt16:
539       DCHECK_EQ(8u, instruction->GetVectorLength());
540       instruction->IsRounded()
541           ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
542           : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
543       break;
544     default:
545       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
546       UNREACHABLE();
547   }
548 }
549 
VisitVecSub(HVecSub * instruction)550 void LocationsBuilderARM64Neon::VisitVecSub(HVecSub* instruction) {
551   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
552 }
553 
VisitVecSub(HVecSub * instruction)554 void InstructionCodeGeneratorARM64Neon::VisitVecSub(HVecSub* instruction) {
555   LocationSummary* locations = instruction->GetLocations();
556   VRegister lhs = VRegisterFrom(locations->InAt(0));
557   VRegister rhs = VRegisterFrom(locations->InAt(1));
558   VRegister dst = VRegisterFrom(locations->Out());
559   switch (instruction->GetPackedType()) {
560     case DataType::Type::kUint8:
561     case DataType::Type::kInt8:
562       DCHECK_EQ(16u, instruction->GetVectorLength());
563       __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
564       break;
565     case DataType::Type::kUint16:
566     case DataType::Type::kInt16:
567       DCHECK_EQ(8u, instruction->GetVectorLength());
568       __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
569       break;
570     case DataType::Type::kInt32:
571       DCHECK_EQ(4u, instruction->GetVectorLength());
572       __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
573       break;
574     case DataType::Type::kInt64:
575       DCHECK_EQ(2u, instruction->GetVectorLength());
576       __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
577       break;
578     case DataType::Type::kFloat32:
579       DCHECK_EQ(4u, instruction->GetVectorLength());
580       __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
581       break;
582     case DataType::Type::kFloat64:
583       DCHECK_EQ(2u, instruction->GetVectorLength());
584       __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
585       break;
586     default:
587       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
588       UNREACHABLE();
589   }
590 }
591 
VisitVecSaturationSub(HVecSaturationSub * instruction)592 void LocationsBuilderARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
593   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
594 }
595 
VisitVecSaturationSub(HVecSaturationSub * instruction)596 void InstructionCodeGeneratorARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
597   LocationSummary* locations = instruction->GetLocations();
598   VRegister lhs = VRegisterFrom(locations->InAt(0));
599   VRegister rhs = VRegisterFrom(locations->InAt(1));
600   VRegister dst = VRegisterFrom(locations->Out());
601   switch (instruction->GetPackedType()) {
602     case DataType::Type::kUint8:
603       DCHECK_EQ(16u, instruction->GetVectorLength());
604       __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
605       break;
606     case DataType::Type::kInt8:
607       DCHECK_EQ(16u, instruction->GetVectorLength());
608       __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
609       break;
610     case DataType::Type::kUint16:
611       DCHECK_EQ(8u, instruction->GetVectorLength());
612       __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
613       break;
614     case DataType::Type::kInt16:
615       DCHECK_EQ(8u, instruction->GetVectorLength());
616       __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
617       break;
618     default:
619       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
620       UNREACHABLE();
621   }
622 }
623 
VisitVecMul(HVecMul * instruction)624 void LocationsBuilderARM64Neon::VisitVecMul(HVecMul* instruction) {
625   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
626 }
627 
VisitVecMul(HVecMul * instruction)628 void InstructionCodeGeneratorARM64Neon::VisitVecMul(HVecMul* instruction) {
629   LocationSummary* locations = instruction->GetLocations();
630   VRegister lhs = VRegisterFrom(locations->InAt(0));
631   VRegister rhs = VRegisterFrom(locations->InAt(1));
632   VRegister dst = VRegisterFrom(locations->Out());
633   switch (instruction->GetPackedType()) {
634     case DataType::Type::kUint8:
635     case DataType::Type::kInt8:
636       DCHECK_EQ(16u, instruction->GetVectorLength());
637       __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
638       break;
639     case DataType::Type::kUint16:
640     case DataType::Type::kInt16:
641       DCHECK_EQ(8u, instruction->GetVectorLength());
642       __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
643       break;
644     case DataType::Type::kInt32:
645       DCHECK_EQ(4u, instruction->GetVectorLength());
646       __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
647       break;
648     case DataType::Type::kFloat32:
649       DCHECK_EQ(4u, instruction->GetVectorLength());
650       __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
651       break;
652     case DataType::Type::kFloat64:
653       DCHECK_EQ(2u, instruction->GetVectorLength());
654       __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
655       break;
656     default:
657       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
658       UNREACHABLE();
659   }
660 }
661 
VisitVecDiv(HVecDiv * instruction)662 void LocationsBuilderARM64Neon::VisitVecDiv(HVecDiv* instruction) {
663   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
664 }
665 
VisitVecDiv(HVecDiv * instruction)666 void InstructionCodeGeneratorARM64Neon::VisitVecDiv(HVecDiv* instruction) {
667   LocationSummary* locations = instruction->GetLocations();
668   VRegister lhs = VRegisterFrom(locations->InAt(0));
669   VRegister rhs = VRegisterFrom(locations->InAt(1));
670   VRegister dst = VRegisterFrom(locations->Out());
671   switch (instruction->GetPackedType()) {
672     case DataType::Type::kFloat32:
673       DCHECK_EQ(4u, instruction->GetVectorLength());
674       __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
675       break;
676     case DataType::Type::kFloat64:
677       DCHECK_EQ(2u, instruction->GetVectorLength());
678       __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
679       break;
680     default:
681       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
682       UNREACHABLE();
683   }
684 }
685 
VisitVecMin(HVecMin * instruction)686 void LocationsBuilderARM64Neon::VisitVecMin(HVecMin* instruction) {
687   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
688 }
689 
VisitVecMin(HVecMin * instruction)690 void InstructionCodeGeneratorARM64Neon::VisitVecMin(HVecMin* instruction) {
691   LocationSummary* locations = instruction->GetLocations();
692   VRegister lhs = VRegisterFrom(locations->InAt(0));
693   VRegister rhs = VRegisterFrom(locations->InAt(1));
694   VRegister dst = VRegisterFrom(locations->Out());
695   switch (instruction->GetPackedType()) {
696     case DataType::Type::kUint8:
697       DCHECK_EQ(16u, instruction->GetVectorLength());
698       __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
699       break;
700     case DataType::Type::kInt8:
701       DCHECK_EQ(16u, instruction->GetVectorLength());
702       __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
703       break;
704     case DataType::Type::kUint16:
705       DCHECK_EQ(8u, instruction->GetVectorLength());
706       __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
707       break;
708     case DataType::Type::kInt16:
709       DCHECK_EQ(8u, instruction->GetVectorLength());
710       __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
711       break;
712     case DataType::Type::kUint32:
713       DCHECK_EQ(4u, instruction->GetVectorLength());
714       __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
715       break;
716     case DataType::Type::kInt32:
717       DCHECK_EQ(4u, instruction->GetVectorLength());
718       __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
719       break;
720     case DataType::Type::kFloat32:
721       DCHECK_EQ(4u, instruction->GetVectorLength());
722       __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
723       break;
724     case DataType::Type::kFloat64:
725       DCHECK_EQ(2u, instruction->GetVectorLength());
726       __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
727       break;
728     default:
729       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
730       UNREACHABLE();
731   }
732 }
733 
VisitVecMax(HVecMax * instruction)734 void LocationsBuilderARM64Neon::VisitVecMax(HVecMax* instruction) {
735   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
736 }
737 
VisitVecMax(HVecMax * instruction)738 void InstructionCodeGeneratorARM64Neon::VisitVecMax(HVecMax* instruction) {
739   LocationSummary* locations = instruction->GetLocations();
740   VRegister lhs = VRegisterFrom(locations->InAt(0));
741   VRegister rhs = VRegisterFrom(locations->InAt(1));
742   VRegister dst = VRegisterFrom(locations->Out());
743   switch (instruction->GetPackedType()) {
744     case DataType::Type::kUint8:
745       DCHECK_EQ(16u, instruction->GetVectorLength());
746       __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
747       break;
748     case DataType::Type::kInt8:
749       DCHECK_EQ(16u, instruction->GetVectorLength());
750       __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
751       break;
752     case DataType::Type::kUint16:
753       DCHECK_EQ(8u, instruction->GetVectorLength());
754       __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
755       break;
756     case DataType::Type::kInt16:
757       DCHECK_EQ(8u, instruction->GetVectorLength());
758       __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
759       break;
760     case DataType::Type::kUint32:
761       DCHECK_EQ(4u, instruction->GetVectorLength());
762       __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
763       break;
764     case DataType::Type::kInt32:
765       DCHECK_EQ(4u, instruction->GetVectorLength());
766       __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
767       break;
768     case DataType::Type::kFloat32:
769       DCHECK_EQ(4u, instruction->GetVectorLength());
770       __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
771       break;
772     case DataType::Type::kFloat64:
773       DCHECK_EQ(2u, instruction->GetVectorLength());
774       __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
775       break;
776     default:
777       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
778       UNREACHABLE();
779   }
780 }
781 
VisitVecAnd(HVecAnd * instruction)782 void LocationsBuilderARM64Neon::VisitVecAnd(HVecAnd* instruction) {
783   // TODO: Allow constants supported by BIC (vector, immediate).
784   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
785 }
786 
VisitVecAnd(HVecAnd * instruction)787 void InstructionCodeGeneratorARM64Neon::VisitVecAnd(HVecAnd* instruction) {
788   LocationSummary* locations = instruction->GetLocations();
789   VRegister lhs = VRegisterFrom(locations->InAt(0));
790   VRegister rhs = VRegisterFrom(locations->InAt(1));
791   VRegister dst = VRegisterFrom(locations->Out());
792   switch (instruction->GetPackedType()) {
793     case DataType::Type::kBool:
794     case DataType::Type::kUint8:
795     case DataType::Type::kInt8:
796     case DataType::Type::kUint16:
797     case DataType::Type::kInt16:
798     case DataType::Type::kInt32:
799     case DataType::Type::kInt64:
800     case DataType::Type::kFloat32:
801     case DataType::Type::kFloat64:
802       __ And(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
803       break;
804     default:
805       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
806       UNREACHABLE();
807   }
808 }
809 
VisitVecAndNot(HVecAndNot * instruction)810 void LocationsBuilderARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
811   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
812 }
813 
VisitVecAndNot(HVecAndNot * instruction)814 void InstructionCodeGeneratorARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
815   // TODO: Use BIC (vector, register).
816   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
817 }
818 
VisitVecOr(HVecOr * instruction)819 void LocationsBuilderARM64Neon::VisitVecOr(HVecOr* instruction) {
820   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
821 }
822 
VisitVecOr(HVecOr * instruction)823 void InstructionCodeGeneratorARM64Neon::VisitVecOr(HVecOr* instruction) {
824   LocationSummary* locations = instruction->GetLocations();
825   VRegister lhs = VRegisterFrom(locations->InAt(0));
826   VRegister rhs = VRegisterFrom(locations->InAt(1));
827   VRegister dst = VRegisterFrom(locations->Out());
828   switch (instruction->GetPackedType()) {
829     case DataType::Type::kBool:
830     case DataType::Type::kUint8:
831     case DataType::Type::kInt8:
832     case DataType::Type::kUint16:
833     case DataType::Type::kInt16:
834     case DataType::Type::kInt32:
835     case DataType::Type::kInt64:
836     case DataType::Type::kFloat32:
837     case DataType::Type::kFloat64:
838       __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
839       break;
840     default:
841       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
842       UNREACHABLE();
843   }
844 }
845 
VisitVecXor(HVecXor * instruction)846 void LocationsBuilderARM64Neon::VisitVecXor(HVecXor* instruction) {
847   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
848 }
849 
VisitVecXor(HVecXor * instruction)850 void InstructionCodeGeneratorARM64Neon::VisitVecXor(HVecXor* instruction) {
851   LocationSummary* locations = instruction->GetLocations();
852   VRegister lhs = VRegisterFrom(locations->InAt(0));
853   VRegister rhs = VRegisterFrom(locations->InAt(1));
854   VRegister dst = VRegisterFrom(locations->Out());
855   switch (instruction->GetPackedType()) {
856     case DataType::Type::kBool:
857     case DataType::Type::kUint8:
858     case DataType::Type::kInt8:
859     case DataType::Type::kUint16:
860     case DataType::Type::kInt16:
861     case DataType::Type::kInt32:
862     case DataType::Type::kInt64:
863     case DataType::Type::kFloat32:
864     case DataType::Type::kFloat64:
865       __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
866       break;
867     default:
868       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
869       UNREACHABLE();
870   }
871 }
872 
873 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)874 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
875   LocationSummary* locations = new (allocator) LocationSummary(instruction);
876   switch (instruction->GetPackedType()) {
877     case DataType::Type::kUint8:
878     case DataType::Type::kInt8:
879     case DataType::Type::kUint16:
880     case DataType::Type::kInt16:
881     case DataType::Type::kInt32:
882     case DataType::Type::kInt64:
883       locations->SetInAt(0, Location::RequiresFpuRegister());
884       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
885       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
886       break;
887     default:
888       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
889       UNREACHABLE();
890   }
891 }
892 
VisitVecShl(HVecShl * instruction)893 void LocationsBuilderARM64Neon::VisitVecShl(HVecShl* instruction) {
894   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
895 }
896 
VisitVecShl(HVecShl * instruction)897 void InstructionCodeGeneratorARM64Neon::VisitVecShl(HVecShl* instruction) {
898   LocationSummary* locations = instruction->GetLocations();
899   VRegister lhs = VRegisterFrom(locations->InAt(0));
900   VRegister dst = VRegisterFrom(locations->Out());
901   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
902   switch (instruction->GetPackedType()) {
903     case DataType::Type::kUint8:
904     case DataType::Type::kInt8:
905       DCHECK_EQ(16u, instruction->GetVectorLength());
906       __ Shl(dst.V16B(), lhs.V16B(), value);
907       break;
908     case DataType::Type::kUint16:
909     case DataType::Type::kInt16:
910       DCHECK_EQ(8u, instruction->GetVectorLength());
911       __ Shl(dst.V8H(), lhs.V8H(), value);
912       break;
913     case DataType::Type::kInt32:
914       DCHECK_EQ(4u, instruction->GetVectorLength());
915       __ Shl(dst.V4S(), lhs.V4S(), value);
916       break;
917     case DataType::Type::kInt64:
918       DCHECK_EQ(2u, instruction->GetVectorLength());
919       __ Shl(dst.V2D(), lhs.V2D(), value);
920       break;
921     default:
922       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
923       UNREACHABLE();
924   }
925 }
926 
VisitVecShr(HVecShr * instruction)927 void LocationsBuilderARM64Neon::VisitVecShr(HVecShr* instruction) {
928   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
929 }
930 
VisitVecShr(HVecShr * instruction)931 void InstructionCodeGeneratorARM64Neon::VisitVecShr(HVecShr* instruction) {
932   LocationSummary* locations = instruction->GetLocations();
933   VRegister lhs = VRegisterFrom(locations->InAt(0));
934   VRegister dst = VRegisterFrom(locations->Out());
935   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
936   switch (instruction->GetPackedType()) {
937     case DataType::Type::kUint8:
938     case DataType::Type::kInt8:
939       DCHECK_EQ(16u, instruction->GetVectorLength());
940       __ Sshr(dst.V16B(), lhs.V16B(), value);
941       break;
942     case DataType::Type::kUint16:
943     case DataType::Type::kInt16:
944       DCHECK_EQ(8u, instruction->GetVectorLength());
945       __ Sshr(dst.V8H(), lhs.V8H(), value);
946       break;
947     case DataType::Type::kInt32:
948       DCHECK_EQ(4u, instruction->GetVectorLength());
949       __ Sshr(dst.V4S(), lhs.V4S(), value);
950       break;
951     case DataType::Type::kInt64:
952       DCHECK_EQ(2u, instruction->GetVectorLength());
953       __ Sshr(dst.V2D(), lhs.V2D(), value);
954       break;
955     default:
956       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
957       UNREACHABLE();
958   }
959 }
960 
VisitVecUShr(HVecUShr * instruction)961 void LocationsBuilderARM64Neon::VisitVecUShr(HVecUShr* instruction) {
962   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
963 }
964 
VisitVecUShr(HVecUShr * instruction)965 void InstructionCodeGeneratorARM64Neon::VisitVecUShr(HVecUShr* instruction) {
966   LocationSummary* locations = instruction->GetLocations();
967   VRegister lhs = VRegisterFrom(locations->InAt(0));
968   VRegister dst = VRegisterFrom(locations->Out());
969   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
970   switch (instruction->GetPackedType()) {
971     case DataType::Type::kUint8:
972     case DataType::Type::kInt8:
973       DCHECK_EQ(16u, instruction->GetVectorLength());
974       __ Ushr(dst.V16B(), lhs.V16B(), value);
975       break;
976     case DataType::Type::kUint16:
977     case DataType::Type::kInt16:
978       DCHECK_EQ(8u, instruction->GetVectorLength());
979       __ Ushr(dst.V8H(), lhs.V8H(), value);
980       break;
981     case DataType::Type::kInt32:
982       DCHECK_EQ(4u, instruction->GetVectorLength());
983       __ Ushr(dst.V4S(), lhs.V4S(), value);
984       break;
985     case DataType::Type::kInt64:
986       DCHECK_EQ(2u, instruction->GetVectorLength());
987       __ Ushr(dst.V2D(), lhs.V2D(), value);
988       break;
989     default:
990       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
991       UNREACHABLE();
992   }
993 }
994 
VisitVecSetScalars(HVecSetScalars * instruction)995 void LocationsBuilderARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
996   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
997 
998   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
999 
1000   HInstruction* input = instruction->InputAt(0);
1001   bool is_zero = IsZeroBitPattern(input);
1002 
1003   switch (instruction->GetPackedType()) {
1004     case DataType::Type::kBool:
1005     case DataType::Type::kUint8:
1006     case DataType::Type::kInt8:
1007     case DataType::Type::kUint16:
1008     case DataType::Type::kInt16:
1009     case DataType::Type::kInt32:
1010     case DataType::Type::kInt64:
1011       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1012                                     : Location::RequiresRegister());
1013       locations->SetOut(Location::RequiresFpuRegister());
1014       break;
1015     case DataType::Type::kFloat32:
1016     case DataType::Type::kFloat64:
1017       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1018                                     : Location::RequiresFpuRegister());
1019       locations->SetOut(Location::RequiresFpuRegister());
1020       break;
1021     default:
1022       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1023       UNREACHABLE();
1024   }
1025 }
1026 
VisitVecSetScalars(HVecSetScalars * instruction)1027 void InstructionCodeGeneratorARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
1028   LocationSummary* locations = instruction->GetLocations();
1029   VRegister dst = VRegisterFrom(locations->Out());
1030 
1031   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
1032 
1033   // Zero out all other elements first.
1034   __ Movi(dst.V16B(), 0);
1035 
1036   // Shorthand for any type of zero.
1037   if (IsZeroBitPattern(instruction->InputAt(0))) {
1038     return;
1039   }
1040 
1041   // Set required elements.
1042   switch (instruction->GetPackedType()) {
1043     case DataType::Type::kBool:
1044     case DataType::Type::kUint8:
1045     case DataType::Type::kInt8:
1046       DCHECK_EQ(16u, instruction->GetVectorLength());
1047       __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
1048       break;
1049     case DataType::Type::kUint16:
1050     case DataType::Type::kInt16:
1051       DCHECK_EQ(8u, instruction->GetVectorLength());
1052       __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
1053       break;
1054     case DataType::Type::kInt32:
1055       DCHECK_EQ(4u, instruction->GetVectorLength());
1056       __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
1057       break;
1058     case DataType::Type::kInt64:
1059       DCHECK_EQ(2u, instruction->GetVectorLength());
1060       __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
1061       break;
1062     default:
1063       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1064       UNREACHABLE();
1065   }
1066 }
1067 
1068 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1069 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1070   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1071   switch (instruction->GetPackedType()) {
1072     case DataType::Type::kUint8:
1073     case DataType::Type::kInt8:
1074     case DataType::Type::kUint16:
1075     case DataType::Type::kInt16:
1076     case DataType::Type::kInt32:
1077     case DataType::Type::kInt64:
1078       locations->SetInAt(0, Location::RequiresFpuRegister());
1079       locations->SetInAt(1, Location::RequiresFpuRegister());
1080       locations->SetInAt(2, Location::RequiresFpuRegister());
1081       locations->SetOut(Location::SameAsFirstInput());
1082       break;
1083     default:
1084       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1085       UNREACHABLE();
1086   }
1087 }
1088 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1089 void LocationsBuilderARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1090   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1091 }
1092 
1093 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
1094 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
1095 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1096 void InstructionCodeGeneratorARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1097   LocationSummary* locations = instruction->GetLocations();
1098   VRegister acc = VRegisterFrom(locations->InAt(0));
1099   VRegister left = VRegisterFrom(locations->InAt(1));
1100   VRegister right = VRegisterFrom(locations->InAt(2));
1101 
1102   DCHECK(locations->InAt(0).Equals(locations->Out()));
1103 
1104   switch (instruction->GetPackedType()) {
1105     case DataType::Type::kUint8:
1106     case DataType::Type::kInt8:
1107       DCHECK_EQ(16u, instruction->GetVectorLength());
1108       if (instruction->GetOpKind() == HInstruction::kAdd) {
1109         __ Mla(acc.V16B(), left.V16B(), right.V16B());
1110       } else {
1111         __ Mls(acc.V16B(), left.V16B(), right.V16B());
1112       }
1113       break;
1114     case DataType::Type::kUint16:
1115     case DataType::Type::kInt16:
1116       DCHECK_EQ(8u, instruction->GetVectorLength());
1117       if (instruction->GetOpKind() == HInstruction::kAdd) {
1118         __ Mla(acc.V8H(), left.V8H(), right.V8H());
1119       } else {
1120         __ Mls(acc.V8H(), left.V8H(), right.V8H());
1121       }
1122       break;
1123     case DataType::Type::kInt32:
1124       DCHECK_EQ(4u, instruction->GetVectorLength());
1125       if (instruction->GetOpKind() == HInstruction::kAdd) {
1126         __ Mla(acc.V4S(), left.V4S(), right.V4S());
1127       } else {
1128         __ Mls(acc.V4S(), left.V4S(), right.V4S());
1129       }
1130       break;
1131     default:
1132       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1133       UNREACHABLE();
1134   }
1135 }
1136 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1137 void LocationsBuilderARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1138   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1139   // Some conversions require temporary registers.
1140   LocationSummary* locations = instruction->GetLocations();
1141   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1142   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1143   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1144             HVecOperation::ToSignedType(b->GetPackedType()));
1145   switch (a->GetPackedType()) {
1146     case DataType::Type::kUint8:
1147     case DataType::Type::kInt8:
1148       switch (instruction->GetPackedType()) {
1149         case DataType::Type::kInt64:
1150           locations->AddTemp(Location::RequiresFpuRegister());
1151           locations->AddTemp(Location::RequiresFpuRegister());
1152           FALLTHROUGH_INTENDED;
1153         case DataType::Type::kInt32:
1154           locations->AddTemp(Location::RequiresFpuRegister());
1155           locations->AddTemp(Location::RequiresFpuRegister());
1156           break;
1157         default:
1158           break;
1159       }
1160       break;
1161     case DataType::Type::kUint16:
1162     case DataType::Type::kInt16:
1163       if (instruction->GetPackedType() == DataType::Type::kInt64) {
1164         locations->AddTemp(Location::RequiresFpuRegister());
1165         locations->AddTemp(Location::RequiresFpuRegister());
1166       }
1167       break;
1168     case DataType::Type::kInt32:
1169     case DataType::Type::kInt64:
1170       if (instruction->GetPackedType() == a->GetPackedType()) {
1171         locations->AddTemp(Location::RequiresFpuRegister());
1172       }
1173       break;
1174     default:
1175       break;
1176   }
1177 }
1178 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1179 void InstructionCodeGeneratorARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1180   LocationSummary* locations = instruction->GetLocations();
1181   VRegister acc = VRegisterFrom(locations->InAt(0));
1182   VRegister left = VRegisterFrom(locations->InAt(1));
1183   VRegister right = VRegisterFrom(locations->InAt(2));
1184 
1185   DCHECK(locations->InAt(0).Equals(locations->Out()));
1186 
1187   // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
1188   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1189   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1190   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1191             HVecOperation::ToSignedType(b->GetPackedType()));
1192   switch (a->GetPackedType()) {
1193     case DataType::Type::kUint8:
1194     case DataType::Type::kInt8:
1195       DCHECK_EQ(16u, a->GetVectorLength());
1196       switch (instruction->GetPackedType()) {
1197         case DataType::Type::kInt16:
1198           DCHECK_EQ(8u, instruction->GetVectorLength());
1199           __ Sabal(acc.V8H(), left.V8B(), right.V8B());
1200           __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
1201           break;
1202         case DataType::Type::kInt32: {
1203           DCHECK_EQ(4u, instruction->GetVectorLength());
1204           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1205           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1206           __ Sxtl(tmp1.V8H(), left.V8B());
1207           __ Sxtl(tmp2.V8H(), right.V8B());
1208           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1209           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1210           __ Sxtl2(tmp1.V8H(), left.V16B());
1211           __ Sxtl2(tmp2.V8H(), right.V16B());
1212           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1213           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1214           break;
1215         }
1216         case DataType::Type::kInt64: {
1217           DCHECK_EQ(2u, instruction->GetVectorLength());
1218           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1219           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1220           VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
1221           VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
1222           __ Sxtl(tmp1.V8H(), left.V8B());
1223           __ Sxtl(tmp2.V8H(), right.V8B());
1224           __ Sxtl(tmp3.V4S(), tmp1.V4H());
1225           __ Sxtl(tmp4.V4S(), tmp2.V4H());
1226           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1227           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1228           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1229           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1230           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1231           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1232           __ Sxtl2(tmp1.V8H(), left.V16B());
1233           __ Sxtl2(tmp2.V8H(), right.V16B());
1234           __ Sxtl(tmp3.V4S(), tmp1.V4H());
1235           __ Sxtl(tmp4.V4S(), tmp2.V4H());
1236           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1237           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1238           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1239           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1240           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1241           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1242           break;
1243         }
1244         default:
1245           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1246           UNREACHABLE();
1247       }
1248       break;
1249     case DataType::Type::kUint16:
1250     case DataType::Type::kInt16:
1251       DCHECK_EQ(8u, a->GetVectorLength());
1252       switch (instruction->GetPackedType()) {
1253         case DataType::Type::kInt32:
1254           DCHECK_EQ(4u, instruction->GetVectorLength());
1255           __ Sabal(acc.V4S(), left.V4H(), right.V4H());
1256           __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
1257           break;
1258         case DataType::Type::kInt64: {
1259           DCHECK_EQ(2u, instruction->GetVectorLength());
1260           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1261           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1262           __ Sxtl(tmp1.V4S(), left.V4H());
1263           __ Sxtl(tmp2.V4S(), right.V4H());
1264           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1265           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1266           __ Sxtl2(tmp1.V4S(), left.V8H());
1267           __ Sxtl2(tmp2.V4S(), right.V8H());
1268           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1269           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1270           break;
1271         }
1272         default:
1273           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1274           UNREACHABLE();
1275       }
1276       break;
1277     case DataType::Type::kInt32:
1278       DCHECK_EQ(4u, a->GetVectorLength());
1279       switch (instruction->GetPackedType()) {
1280         case DataType::Type::kInt32: {
1281           DCHECK_EQ(4u, instruction->GetVectorLength());
1282           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1283           __ Sub(tmp.V4S(), left.V4S(), right.V4S());
1284           __ Abs(tmp.V4S(), tmp.V4S());
1285           __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
1286           break;
1287         }
1288         case DataType::Type::kInt64:
1289           DCHECK_EQ(2u, instruction->GetVectorLength());
1290           __ Sabal(acc.V2D(), left.V2S(), right.V2S());
1291           __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
1292           break;
1293         default:
1294           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1295           UNREACHABLE();
1296       }
1297       break;
1298     case DataType::Type::kInt64:
1299       DCHECK_EQ(2u, a->GetVectorLength());
1300       switch (instruction->GetPackedType()) {
1301         case DataType::Type::kInt64: {
1302           DCHECK_EQ(2u, instruction->GetVectorLength());
1303           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1304           __ Sub(tmp.V2D(), left.V2D(), right.V2D());
1305           __ Abs(tmp.V2D(), tmp.V2D());
1306           __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
1307           break;
1308         }
1309         default:
1310           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1311           UNREACHABLE();
1312       }
1313       break;
1314     default:
1315       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1316   }
1317 }
1318 
VisitVecDotProd(HVecDotProd * instruction)1319 void LocationsBuilderARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
1320   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1321   DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1322   locations->SetInAt(0, Location::RequiresFpuRegister());
1323   locations->SetInAt(1, Location::RequiresFpuRegister());
1324   locations->SetInAt(2, Location::RequiresFpuRegister());
1325   locations->SetOut(Location::SameAsFirstInput());
1326 
1327   // For Int8 and Uint8 general case we need a temp register.
1328   if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
1329       !ShouldEmitDotProductInstructions(codegen_)) {
1330     locations->AddTemp(Location::RequiresFpuRegister());
1331   }
1332 }
1333 
VisitVecDotProd(HVecDotProd * instruction)1334 void InstructionCodeGeneratorARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
1335   LocationSummary* locations = instruction->GetLocations();
1336   DCHECK(locations->InAt(0).Equals(locations->Out()));
1337   VRegister acc = VRegisterFrom(locations->InAt(0));
1338   VRegister left = VRegisterFrom(locations->InAt(1));
1339   VRegister right = VRegisterFrom(locations->InAt(2));
1340   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1341   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1342   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1343             HVecOperation::ToSignedType(b->GetPackedType()));
1344   DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1345   DCHECK_EQ(4u, instruction->GetVectorLength());
1346 
1347   size_t inputs_data_size = DataType::Size(a->GetPackedType());
1348   switch (inputs_data_size) {
1349     case 1u: {
1350       DCHECK_EQ(16u, a->GetVectorLength());
1351       if (instruction->IsZeroExtending()) {
1352         if (ShouldEmitDotProductInstructions(codegen_)) {
1353           __ Udot(acc.V4S(), left.V16B(), right.V16B());
1354         } else {
1355           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1356           __ Umull(tmp.V8H(), left.V8B(), right.V8B());
1357           __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1358           __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1359 
1360           __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
1361           __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1362           __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1363         }
1364       } else {
1365         if (ShouldEmitDotProductInstructions(codegen_)) {
1366           __ Sdot(acc.V4S(), left.V16B(), right.V16B());
1367         } else {
1368           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1369           __ Smull(tmp.V8H(), left.V8B(), right.V8B());
1370           __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1371           __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1372 
1373           __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
1374           __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1375           __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1376         }
1377       }
1378       break;
1379     }
1380     case 2u:
1381       DCHECK_EQ(8u, a->GetVectorLength());
1382       if (instruction->IsZeroExtending()) {
1383         __ Umlal(acc.V4S(), left.V4H(), right.V4H());
1384         __ Umlal2(acc.V4S(), left.V8H(), right.V8H());
1385       } else {
1386         __ Smlal(acc.V4S(), left.V4H(), right.V4H());
1387         __ Smlal2(acc.V4S(), left.V8H(), right.V8H());
1388       }
1389       break;
1390     default:
1391       LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1392   }
1393 }
1394 
1395 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1396 static void CreateVecMemLocations(ArenaAllocator* allocator,
1397                                   HVecMemoryOperation* instruction,
1398                                   bool is_load) {
1399   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1400   switch (instruction->GetPackedType()) {
1401     case DataType::Type::kBool:
1402     case DataType::Type::kUint8:
1403     case DataType::Type::kInt8:
1404     case DataType::Type::kUint16:
1405     case DataType::Type::kInt16:
1406     case DataType::Type::kInt32:
1407     case DataType::Type::kInt64:
1408     case DataType::Type::kFloat32:
1409     case DataType::Type::kFloat64:
1410       locations->SetInAt(0, Location::RequiresRegister());
1411       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1412       if (is_load) {
1413         locations->SetOut(Location::RequiresFpuRegister());
1414       } else {
1415         locations->SetInAt(2, Location::RequiresFpuRegister());
1416       }
1417       break;
1418     default:
1419       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1420       UNREACHABLE();
1421   }
1422 }
1423 
VisitVecLoad(HVecLoad * instruction)1424 void LocationsBuilderARM64Neon::VisitVecLoad(HVecLoad* instruction) {
1425   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1426 }
1427 
VisitVecLoad(HVecLoad * instruction)1428 void InstructionCodeGeneratorARM64Neon::VisitVecLoad(HVecLoad* instruction) {
1429   LocationSummary* locations = instruction->GetLocations();
1430   size_t size = DataType::Size(instruction->GetPackedType());
1431   VRegister reg = VRegisterFrom(locations->Out());
1432   UseScratchRegisterScope temps(GetVIXLAssembler());
1433   Register scratch;
1434 
1435   switch (instruction->GetPackedType()) {
1436     case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1437     case DataType::Type::kUint16:
1438       DCHECK_EQ(8u, instruction->GetVectorLength());
1439       // Special handling of compressed/uncompressed string load.
1440       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1441         vixl::aarch64::Label uncompressed_load, done;
1442         // Test compression bit.
1443         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1444                       "Expecting 0=compressed, 1=uncompressed");
1445         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1446         Register length = temps.AcquireW();
1447         __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
1448         __ Tbnz(length.W(), 0, &uncompressed_load);
1449         temps.Release(length);  // no longer needed
1450         // Zero extend 8 compressed bytes into 8 chars.
1451         __ Ldr(DRegisterFrom(locations->Out()).V8B(),
1452                VecNEONAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
1453         __ Uxtl(reg.V8H(), reg.V8B());
1454         __ B(&done);
1455         if (scratch.IsValid()) {
1456           temps.Release(scratch);  // if used, no longer needed
1457         }
1458         // Load 8 direct uncompressed chars.
1459         __ Bind(&uncompressed_load);
1460         __ Ldr(reg,
1461                VecNEONAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
1462         __ Bind(&done);
1463         return;
1464       }
1465       FALLTHROUGH_INTENDED;
1466     case DataType::Type::kBool:
1467     case DataType::Type::kUint8:
1468     case DataType::Type::kInt8:
1469     case DataType::Type::kInt32:
1470     case DataType::Type::kFloat32:
1471     case DataType::Type::kInt64:
1472     case DataType::Type::kFloat64:
1473       DCHECK_LE(2u, instruction->GetVectorLength());
1474       DCHECK_LE(instruction->GetVectorLength(), 16u);
1475       __ Ldr(reg,
1476              VecNEONAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
1477       break;
1478     default:
1479       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1480       UNREACHABLE();
1481   }
1482 }
1483 
VisitVecStore(HVecStore * instruction)1484 void LocationsBuilderARM64Neon::VisitVecStore(HVecStore* instruction) {
1485   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1486 }
1487 
VisitVecStore(HVecStore * instruction)1488 void InstructionCodeGeneratorARM64Neon::VisitVecStore(HVecStore* instruction) {
1489   LocationSummary* locations = instruction->GetLocations();
1490   size_t size = DataType::Size(instruction->GetPackedType());
1491   VRegister reg = VRegisterFrom(locations->InAt(2));
1492   UseScratchRegisterScope temps(GetVIXLAssembler());
1493   Register scratch;
1494 
1495   switch (instruction->GetPackedType()) {
1496     case DataType::Type::kBool:
1497     case DataType::Type::kUint8:
1498     case DataType::Type::kInt8:
1499     case DataType::Type::kUint16:
1500     case DataType::Type::kInt16:
1501     case DataType::Type::kInt32:
1502     case DataType::Type::kFloat32:
1503     case DataType::Type::kInt64:
1504     case DataType::Type::kFloat64:
1505       DCHECK_LE(2u, instruction->GetVectorLength());
1506       DCHECK_LE(instruction->GetVectorLength(), 16u);
1507       __ Str(reg,
1508              VecNEONAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1509       break;
1510     default:
1511       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1512       UNREACHABLE();
1513   }
1514 }
1515 
VisitVecPredSetAll(HVecPredSetAll * instruction)1516 void LocationsBuilderARM64Neon::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1517   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1518   DCHECK(instruction->InputAt(0)->IsIntConstant());
1519   locations->SetInAt(0, Location::NoLocation());
1520   locations->SetOut(Location::NoLocation());
1521 }
1522 
VisitVecPredSetAll(HVecPredSetAll *)1523 void InstructionCodeGeneratorARM64Neon::VisitVecPredSetAll(HVecPredSetAll*) {
1524 }
1525 
VisitVecPredWhile(HVecPredWhile * instruction)1526 void LocationsBuilderARM64Neon::VisitVecPredWhile(HVecPredWhile* instruction) {
1527   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1528   UNREACHABLE();
1529 }
1530 
VisitVecPredWhile(HVecPredWhile * instruction)1531 void InstructionCodeGeneratorARM64Neon::VisitVecPredWhile(HVecPredWhile* instruction) {
1532   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1533   UNREACHABLE();
1534 }
1535 
VisitVecPredCondition(HVecPredCondition * instruction)1536 void LocationsBuilderARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
1537   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1538   UNREACHABLE();
1539 }
1540 
VisitVecPredCondition(HVecPredCondition * instruction)1541 void InstructionCodeGeneratorARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
1542   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1543   UNREACHABLE();
1544 }
1545 
AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope * scope)1546 Location InstructionCodeGeneratorARM64Neon::AllocateSIMDScratchLocation(
1547     vixl::aarch64::UseScratchRegisterScope* scope) {
1548   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1549   return LocationFrom(scope->AcquireVRegisterOfSize(kQRegSize));
1550 }
1551 
FreeSIMDScratchLocation(Location loc,vixl::aarch64::UseScratchRegisterScope * scope)1552 void InstructionCodeGeneratorARM64Neon::FreeSIMDScratchLocation(Location loc,
1553     vixl::aarch64::UseScratchRegisterScope* scope) {
1554   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1555   scope->Release(QRegisterFrom(loc));
1556 }
1557 
LoadSIMDRegFromStack(Location destination,Location source)1558 void InstructionCodeGeneratorARM64Neon::LoadSIMDRegFromStack(Location destination,
1559                                                              Location source) {
1560   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1561   __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1562 }
1563 
MoveSIMDRegToSIMDReg(Location destination,Location source)1564 void InstructionCodeGeneratorARM64Neon::MoveSIMDRegToSIMDReg(Location destination,
1565                                                              Location source) {
1566   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1567   __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1568 }
1569 
MoveToSIMDStackSlot(Location destination,Location source)1570 void InstructionCodeGeneratorARM64Neon::MoveToSIMDStackSlot(Location destination,
1571                                                             Location source) {
1572   DCHECK(destination.IsSIMDStackSlot());
1573   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1574 
1575   if (source.IsFpuRegister()) {
1576     __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1577   } else {
1578     DCHECK(source.IsSIMDStackSlot());
1579     UseScratchRegisterScope temps(GetVIXLAssembler());
1580     if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
1581       Register temp = temps.AcquireX();
1582       __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1583       __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1584       __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1585       __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1586     } else {
1587       VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1588       __ Ldr(temp, StackOperandFrom(source));
1589       __ Str(temp, StackOperandFrom(destination));
1590     }
1591   }
1592 }
1593 
1594 // Calculate memory accessing operand for save/restore live registers.
1595 template <bool is_save>
SaveRestoreLiveRegistersHelperNeonImpl(CodeGeneratorARM64 * codegen,LocationSummary * locations,int64_t spill_offset)1596 void SaveRestoreLiveRegistersHelperNeonImpl(CodeGeneratorARM64* codegen,
1597                                             LocationSummary* locations,
1598                                             int64_t spill_offset) {
1599   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
1600   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
1601   DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
1602                                                   codegen->GetNumberOfCoreRegisters(),
1603                                                   fp_spills,
1604                                                   codegen->GetNumberOfFloatingPointRegisters()));
1605 
1606   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
1607   const unsigned v_reg_size_in_bits = codegen->GetSlowPathFPWidth() * 8;
1608   DCHECK_LE(codegen->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1609   CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size_in_bits, fp_spills);
1610 
1611   MacroAssembler* masm = codegen->GetVIXLAssembler();
1612   UseScratchRegisterScope temps(masm);
1613 
1614   Register base = masm->StackPointer();
1615   int64_t core_spill_size = core_list.GetTotalSizeInBytes();
1616   int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
1617   int64_t reg_size = kXRegSizeInBytes;
1618   int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
1619   uint32_t ls_access_size = WhichPowerOf2(reg_size);
1620   if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
1621       !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
1622     // If the offset does not fit in the instruction's immediate field, use an alternate register
1623     // to compute the base address(float point registers spill base address).
1624     Register new_base = temps.AcquireSameSizeAs(base);
1625     masm->Add(new_base, base, Operand(spill_offset + core_spill_size));
1626     base = new_base;
1627     spill_offset = -core_spill_size;
1628     int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
1629     DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
1630     DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
1631   }
1632 
1633   if (is_save) {
1634     masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1635     masm->StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
1636   } else {
1637     masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1638     masm->LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
1639   }
1640 }
1641 
SaveLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1642 void InstructionCodeGeneratorARM64Neon::SaveLiveRegistersHelper(LocationSummary* locations,
1643                                                                 int64_t spill_offset) {
1644   SaveRestoreLiveRegistersHelperNeonImpl</* is_save= */ true>(codegen_, locations, spill_offset);
1645 }
1646 
RestoreLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1647 void InstructionCodeGeneratorARM64Neon::RestoreLiveRegistersHelper(LocationSummary* locations,
1648                                                                    int64_t spill_offset) {
1649   SaveRestoreLiveRegistersHelperNeonImpl</* is_save= */ false>(codegen_, locations, spill_offset);
1650 }
1651 
1652 #undef __
1653 
1654 }  // namespace arm64
1655 }  // namespace art
1656