• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "base/bit_utils_iterator.h"
21 #include "mirror/array-inl.h"
22 #include "mirror/string.h"
23 
24 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
25 
26 namespace art HIDDEN {
27 namespace arm64 {
28 
29 using helpers::DRegisterFrom;
30 using helpers::InputRegisterAt;
31 using helpers::Int64FromLocation;
32 using helpers::LocationFrom;
33 using helpers::OutputRegister;
34 using helpers::SveStackOperandFrom;
35 using helpers::VRegisterFrom;
36 using helpers::ZRegisterFrom;
37 using helpers::XRegisterFrom;
38 
39 #define __ GetVIXLAssembler()->
40 
41 // Returns whether the value of the constant can be directly encoded into the instruction as
42 // immediate.
SVECanEncodeConstantAsImmediate(HConstant * constant,HInstruction * instr)43 static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
44   if (instr->IsVecReplicateScalar()) {
45     if (constant->IsLongConstant()) {
46       return false;
47     } else if (constant->IsFloatConstant()) {
48       return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
49     } else if (constant->IsDoubleConstant()) {
50       return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
51     }
52     // TODO: Make use of shift part of DUP instruction.
53     int64_t value = CodeGenerator::GetInt64ValueOf(constant);
54     return IsInt<8>(value);
55   }
56 
57   return false;
58 }
59 
60 // Returns
61 //  - constant location - if 'constant' is an actual constant and its value can be
62 //    encoded into the instruction.
63 //  - register location otherwise.
SVEEncodableConstantOrRegister(HInstruction * constant,HInstruction * instr)64 inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
65   if (constant->IsConstant()
66       && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
67     return Location::ConstantLocation(constant);
68   }
69 
70   return Location::RequiresRegister();
71 }
72 
ValidateVectorLength(HVecOperation * instr) const73 void InstructionCodeGeneratorARM64Sve::ValidateVectorLength(HVecOperation* instr) const {
74   DCHECK_EQ(DataType::Size(instr->GetPackedType()) * instr->GetVectorLength(),
75             codegen_->GetSIMDRegisterWidth());
76 }
77 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)78 void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
79   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
80   HInstruction* input = instruction->InputAt(0);
81   switch (instruction->GetPackedType()) {
82     case DataType::Type::kBool:
83     case DataType::Type::kUint8:
84     case DataType::Type::kInt8:
85     case DataType::Type::kUint16:
86     case DataType::Type::kInt16:
87     case DataType::Type::kInt32:
88     case DataType::Type::kInt64:
89       locations->SetInAt(0, SVEEncodableConstantOrRegister(input, instruction));
90       locations->SetOut(Location::RequiresFpuRegister());
91       break;
92     case DataType::Type::kFloat32:
93     case DataType::Type::kFloat64:
94       if (input->IsConstant() &&
95           SVECanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
96         locations->SetInAt(0, Location::ConstantLocation(input));
97         locations->SetOut(Location::RequiresFpuRegister());
98       } else {
99         locations->SetInAt(0, Location::RequiresFpuRegister());
100         locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
101       }
102       break;
103     default:
104       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
105       UNREACHABLE();
106   }
107 }
108 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)109 void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
110   DCHECK(instruction->IsPredicated());
111   LocationSummary* locations = instruction->GetLocations();
112   Location src_loc = locations->InAt(0);
113   const ZRegister dst = ZRegisterFrom(locations->Out());
114   ValidateVectorLength(instruction);
115   switch (instruction->GetPackedType()) {
116     case DataType::Type::kBool:
117     case DataType::Type::kUint8:
118     case DataType::Type::kInt8:
119       if (src_loc.IsConstant()) {
120         __ Dup(dst.VnB(), Int64FromLocation(src_loc));
121       } else {
122         __ Dup(dst.VnB(), InputRegisterAt(instruction, 0));
123       }
124       break;
125     case DataType::Type::kUint16:
126     case DataType::Type::kInt16:
127       if (src_loc.IsConstant()) {
128         __ Dup(dst.VnH(), Int64FromLocation(src_loc));
129       } else {
130         __ Dup(dst.VnH(), InputRegisterAt(instruction, 0));
131       }
132       break;
133     case DataType::Type::kInt32:
134       if (src_loc.IsConstant()) {
135         __ Dup(dst.VnS(), Int64FromLocation(src_loc));
136       } else {
137         __ Dup(dst.VnS(), InputRegisterAt(instruction, 0));
138       }
139       break;
140     case DataType::Type::kInt64:
141       if (src_loc.IsConstant()) {
142         __ Dup(dst.VnD(), Int64FromLocation(src_loc));
143       } else {
144         __ Dup(dst.VnD(), XRegisterFrom(src_loc));
145       }
146       break;
147     case DataType::Type::kFloat32:
148       if (src_loc.IsConstant()) {
149         __ Fdup(dst.VnS(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
150       } else {
151         __ Dup(dst.VnS(), ZRegisterFrom(src_loc).VnS(), 0);
152       }
153       break;
154     case DataType::Type::kFloat64:
155       if (src_loc.IsConstant()) {
156         __ Fdup(dst.VnD(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
157       } else {
158         __ Dup(dst.VnD(), ZRegisterFrom(src_loc).VnD(), 0);
159       }
160       break;
161     default:
162       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
163       UNREACHABLE();
164   }
165 }
166 
VisitVecExtractScalar(HVecExtractScalar * instruction)167 void LocationsBuilderARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
168   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
169   switch (instruction->GetPackedType()) {
170     case DataType::Type::kBool:
171     case DataType::Type::kUint8:
172     case DataType::Type::kInt8:
173     case DataType::Type::kUint16:
174     case DataType::Type::kInt16:
175     case DataType::Type::kInt32:
176     case DataType::Type::kInt64:
177       locations->SetInAt(0, Location::RequiresFpuRegister());
178       locations->SetOut(Location::RequiresRegister());
179       break;
180     case DataType::Type::kFloat32:
181     case DataType::Type::kFloat64:
182       locations->SetInAt(0, Location::RequiresFpuRegister());
183       locations->SetOut(Location::SameAsFirstInput());
184       break;
185     default:
186       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
187       UNREACHABLE();
188   }
189 }
190 
VisitVecExtractScalar(HVecExtractScalar * instruction)191 void InstructionCodeGeneratorARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
192   DCHECK(instruction->IsPredicated());
193   LocationSummary* locations = instruction->GetLocations();
194   const VRegister src = VRegisterFrom(locations->InAt(0));
195   ValidateVectorLength(instruction);
196   switch (instruction->GetPackedType()) {
197     case DataType::Type::kInt32:
198       __ Umov(OutputRegister(instruction), src.V4S(), 0);
199       break;
200     case DataType::Type::kInt64:
201       __ Umov(OutputRegister(instruction), src.V2D(), 0);
202       break;
203     case DataType::Type::kFloat32:
204     case DataType::Type::kFloat64:
205       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
206       break;
207     default:
208       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
209       UNREACHABLE();
210   }
211 }
212 
213 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)214 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
215   LocationSummary* locations = new (allocator) LocationSummary(instruction);
216   switch (instruction->GetPackedType()) {
217     case DataType::Type::kBool:
218       locations->SetInAt(0, Location::RequiresFpuRegister());
219       locations->SetOut(Location::RequiresFpuRegister(),
220                         instruction->IsVecNot() ? Location::kOutputOverlap
221                                                 : Location::kNoOutputOverlap);
222       break;
223     case DataType::Type::kUint8:
224     case DataType::Type::kInt8:
225     case DataType::Type::kUint16:
226     case DataType::Type::kInt16:
227     case DataType::Type::kInt32:
228     case DataType::Type::kInt64:
229     case DataType::Type::kFloat32:
230     case DataType::Type::kFloat64:
231       locations->SetInAt(0, Location::RequiresFpuRegister());
232       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
233       break;
234     default:
235       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
236       UNREACHABLE();
237   }
238 }
239 
VisitVecReduce(HVecReduce * instruction)240 void LocationsBuilderARM64Sve::VisitVecReduce(HVecReduce* instruction) {
241   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
242 }
243 
VisitVecReduce(HVecReduce * instruction)244 void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
245   DCHECK(instruction->IsPredicated());
246   LocationSummary* locations = instruction->GetLocations();
247   const ZRegister src = ZRegisterFrom(locations->InAt(0));
248   const VRegister dst = DRegisterFrom(locations->Out());
249   const PRegister p_reg = LoopPReg();
250   ValidateVectorLength(instruction);
251   switch (instruction->GetPackedType()) {
252     case DataType::Type::kInt32:
253       switch (instruction->GetReductionKind()) {
254         case HVecReduce::kSum:
255           __ Saddv(dst.S(), p_reg, src.VnS());
256           break;
257         default:
258           LOG(FATAL) << "Unsupported SIMD instruction";
259           UNREACHABLE();
260       }
261       break;
262     case DataType::Type::kInt64:
263       switch (instruction->GetReductionKind()) {
264         case HVecReduce::kSum:
265           __ Uaddv(dst.D(), p_reg, src.VnD());
266           break;
267         default:
268           LOG(FATAL) << "Unsupported SIMD instruction";
269           UNREACHABLE();
270       }
271       break;
272     default:
273       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
274       UNREACHABLE();
275   }
276 }
277 
VisitVecCnv(HVecCnv * instruction)278 void LocationsBuilderARM64Sve::VisitVecCnv(HVecCnv* instruction) {
279   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
280 }
281 
VisitVecCnv(HVecCnv * instruction)282 void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
283   DCHECK(instruction->IsPredicated());
284   LocationSummary* locations = instruction->GetLocations();
285   const ZRegister src = ZRegisterFrom(locations->InAt(0));
286   const ZRegister dst = ZRegisterFrom(locations->Out());
287   const PRegisterM p_reg = LoopPReg().Merging();
288   DataType::Type from = instruction->GetInputType();
289   DataType::Type to = instruction->GetResultType();
290   ValidateVectorLength(instruction);
291   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
292     __ Scvtf(dst.VnS(), p_reg, src.VnS());
293   } else {
294     LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
295   }
296 }
297 
VisitVecNeg(HVecNeg * instruction)298 void LocationsBuilderARM64Sve::VisitVecNeg(HVecNeg* instruction) {
299   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
300 }
301 
VisitVecNeg(HVecNeg * instruction)302 void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
303   DCHECK(instruction->IsPredicated());
304   LocationSummary* locations = instruction->GetLocations();
305   const ZRegister src = ZRegisterFrom(locations->InAt(0));
306   const ZRegister dst = ZRegisterFrom(locations->Out());
307   const PRegisterM p_reg = LoopPReg().Merging();
308   ValidateVectorLength(instruction);
309   switch (instruction->GetPackedType()) {
310     case DataType::Type::kUint8:
311     case DataType::Type::kInt8:
312       __ Neg(dst.VnB(), p_reg, src.VnB());
313       break;
314     case DataType::Type::kUint16:
315     case DataType::Type::kInt16:
316       __ Neg(dst.VnH(), p_reg, src.VnH());
317       break;
318     case DataType::Type::kInt32:
319       __ Neg(dst.VnS(), p_reg, src.VnS());
320       break;
321     case DataType::Type::kInt64:
322       __ Neg(dst.VnD(), p_reg, src.VnD());
323       break;
324     case DataType::Type::kFloat32:
325       __ Fneg(dst.VnS(), p_reg, src.VnS());
326       break;
327     case DataType::Type::kFloat64:
328       __ Fneg(dst.VnD(), p_reg, src.VnD());
329       break;
330     default:
331       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
332       UNREACHABLE();
333   }
334 }
335 
VisitVecAbs(HVecAbs * instruction)336 void LocationsBuilderARM64Sve::VisitVecAbs(HVecAbs* instruction) {
337   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
338 }
339 
VisitVecAbs(HVecAbs * instruction)340 void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
341   DCHECK(instruction->IsPredicated());
342   LocationSummary* locations = instruction->GetLocations();
343   const ZRegister src = ZRegisterFrom(locations->InAt(0));
344   const ZRegister dst = ZRegisterFrom(locations->Out());
345   const PRegisterM p_reg = LoopPReg().Merging();
346   ValidateVectorLength(instruction);
347   switch (instruction->GetPackedType()) {
348     case DataType::Type::kInt8:
349       __ Abs(dst.VnB(), p_reg, src.VnB());
350       break;
351     case DataType::Type::kInt16:
352       __ Abs(dst.VnH(), p_reg, src.VnH());
353       break;
354     case DataType::Type::kInt32:
355       __ Abs(dst.VnS(), p_reg, src.VnS());
356       break;
357     case DataType::Type::kInt64:
358       __ Abs(dst.VnD(), p_reg, src.VnD());
359       break;
360     case DataType::Type::kFloat32:
361       __ Fabs(dst.VnS(), p_reg, src.VnS());
362       break;
363     case DataType::Type::kFloat64:
364       __ Fabs(dst.VnD(), p_reg, src.VnD());
365       break;
366     default:
367       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
368       UNREACHABLE();
369   }
370 }
371 
VisitVecNot(HVecNot * instruction)372 void LocationsBuilderARM64Sve::VisitVecNot(HVecNot* instruction) {
373   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
374 }
375 
VisitVecNot(HVecNot * instruction)376 void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
377   DCHECK(instruction->IsPredicated());
378   LocationSummary* locations = instruction->GetLocations();
379   const ZRegister src = ZRegisterFrom(locations->InAt(0));
380   const ZRegister dst = ZRegisterFrom(locations->Out());
381   const PRegisterM p_reg = LoopPReg().Merging();
382   ValidateVectorLength(instruction);
383   switch (instruction->GetPackedType()) {
384     case DataType::Type::kBool:  // special case boolean-not
385       __ Dup(dst.VnB(), 1);
386       __ Eor(dst.VnB(), p_reg, dst.VnB(), src.VnB());
387       break;
388     case DataType::Type::kUint8:
389     case DataType::Type::kInt8:
390       __ Not(dst.VnB(), p_reg, src.VnB());
391       break;
392     case DataType::Type::kUint16:
393     case DataType::Type::kInt16:
394       __ Not(dst.VnH(), p_reg, src.VnH());
395       break;
396     case DataType::Type::kInt32:
397       __ Not(dst.VnS(), p_reg, src.VnS());
398       break;
399     case DataType::Type::kInt64:
400       __ Not(dst.VnD(), p_reg, src.VnD());
401       break;
402     default:
403       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
404       UNREACHABLE();
405   }
406 }
407 
408 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)409 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
410   LocationSummary* locations = new (allocator) LocationSummary(instruction);
411   switch (instruction->GetPackedType()) {
412     case DataType::Type::kBool:
413     case DataType::Type::kUint8:
414     case DataType::Type::kInt8:
415     case DataType::Type::kUint16:
416     case DataType::Type::kInt16:
417     case DataType::Type::kInt32:
418     case DataType::Type::kInt64:
419     case DataType::Type::kFloat32:
420     case DataType::Type::kFloat64:
421       locations->SetInAt(0, Location::RequiresFpuRegister());
422       locations->SetInAt(1, Location::RequiresFpuRegister());
423       locations->SetOut(Location::SameAsFirstInput());
424       break;
425     default:
426       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
427       UNREACHABLE();
428   }
429 }
430 
VisitVecAdd(HVecAdd * instruction)431 void LocationsBuilderARM64Sve::VisitVecAdd(HVecAdd* instruction) {
432   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
433 }
434 
VisitVecAdd(HVecAdd * instruction)435 void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
436   DCHECK(instruction->IsPredicated());
437   LocationSummary* locations = instruction->GetLocations();
438   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
439   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
440   const ZRegister dst = ZRegisterFrom(locations->Out());
441   const PRegisterM p_reg = LoopPReg().Merging();
442   ValidateVectorLength(instruction);
443   switch (instruction->GetPackedType()) {
444     case DataType::Type::kUint8:
445     case DataType::Type::kInt8:
446       __ Add(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
447       break;
448     case DataType::Type::kUint16:
449     case DataType::Type::kInt16:
450       __ Add(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
451       break;
452     case DataType::Type::kInt32:
453       __ Add(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
454       break;
455     case DataType::Type::kInt64:
456       __ Add(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
457       break;
458     case DataType::Type::kFloat32:
459       __ Fadd(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
460       break;
461     case DataType::Type::kFloat64:
462       __ Fadd(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
463       break;
464     default:
465       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
466       UNREACHABLE();
467   }
468 }
469 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)470 void LocationsBuilderARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
471   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
472   UNREACHABLE();
473 }
474 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)475 void InstructionCodeGeneratorARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
476   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
477   UNREACHABLE();
478 }
479 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)480 void LocationsBuilderARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
481   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
482   UNREACHABLE();
483 }
484 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)485 void InstructionCodeGeneratorARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
486   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
487   UNREACHABLE();
488 }
489 
VisitVecSub(HVecSub * instruction)490 void LocationsBuilderARM64Sve::VisitVecSub(HVecSub* instruction) {
491   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
492 }
493 
VisitVecSub(HVecSub * instruction)494 void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
495   DCHECK(instruction->IsPredicated());
496   LocationSummary* locations = instruction->GetLocations();
497   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
498   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
499   const ZRegister dst = ZRegisterFrom(locations->Out());
500   const PRegisterM p_reg = LoopPReg().Merging();
501   ValidateVectorLength(instruction);
502   switch (instruction->GetPackedType()) {
503     case DataType::Type::kUint8:
504     case DataType::Type::kInt8:
505       __ Sub(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
506       break;
507     case DataType::Type::kUint16:
508     case DataType::Type::kInt16:
509       __ Sub(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
510       break;
511     case DataType::Type::kInt32:
512       __ Sub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
513       break;
514     case DataType::Type::kInt64:
515       __ Sub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
516       break;
517     case DataType::Type::kFloat32:
518       __ Fsub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
519       break;
520     case DataType::Type::kFloat64:
521       __ Fsub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
522       break;
523     default:
524       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
525       UNREACHABLE();
526   }
527 }
528 
VisitVecSaturationSub(HVecSaturationSub * instruction)529 void LocationsBuilderARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
530   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
531   UNREACHABLE();
532 }
533 
VisitVecSaturationSub(HVecSaturationSub * instruction)534 void InstructionCodeGeneratorARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
535   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
536   UNREACHABLE();
537 }
538 
VisitVecMul(HVecMul * instruction)539 void LocationsBuilderARM64Sve::VisitVecMul(HVecMul* instruction) {
540   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
541 }
542 
VisitVecMul(HVecMul * instruction)543 void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
544   DCHECK(instruction->IsPredicated());
545   LocationSummary* locations = instruction->GetLocations();
546   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
547   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
548   const ZRegister dst = ZRegisterFrom(locations->Out());
549   const PRegisterM p_reg = LoopPReg().Merging();
550   ValidateVectorLength(instruction);
551   switch (instruction->GetPackedType()) {
552     case DataType::Type::kUint8:
553     case DataType::Type::kInt8:
554       __ Mul(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
555       break;
556     case DataType::Type::kUint16:
557     case DataType::Type::kInt16:
558       __ Mul(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
559       break;
560     case DataType::Type::kInt32:
561       __ Mul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
562       break;
563     case DataType::Type::kInt64:
564       __ Mul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
565       break;
566     case DataType::Type::kFloat32:
567       __ Fmul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
568       break;
569     case DataType::Type::kFloat64:
570       __ Fmul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
571       break;
572     default:
573       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
574       UNREACHABLE();
575   }
576 }
577 
VisitVecDiv(HVecDiv * instruction)578 void LocationsBuilderARM64Sve::VisitVecDiv(HVecDiv* instruction) {
579   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
580 }
581 
VisitVecDiv(HVecDiv * instruction)582 void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
583   DCHECK(instruction->IsPredicated());
584   LocationSummary* locations = instruction->GetLocations();
585   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
586   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
587   const ZRegister dst = ZRegisterFrom(locations->Out());
588   const PRegisterM p_reg = LoopPReg().Merging();
589   ValidateVectorLength(instruction);
590 
591   // Note: VIXL guarantees StrictNaNPropagation for Fdiv.
592   switch (instruction->GetPackedType()) {
593     case DataType::Type::kFloat32:
594       __ Fdiv(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
595       break;
596     case DataType::Type::kFloat64:
597       __ Fdiv(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
598       break;
599     default:
600       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
601       UNREACHABLE();
602   }
603 }
604 
VisitVecMin(HVecMin * instruction)605 void LocationsBuilderARM64Sve::VisitVecMin(HVecMin* instruction) {
606   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
607   UNREACHABLE();
608 }
609 
VisitVecMin(HVecMin * instruction)610 void InstructionCodeGeneratorARM64Sve::VisitVecMin(HVecMin* instruction) {
611   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
612   UNREACHABLE();
613 }
614 
VisitVecMax(HVecMax * instruction)615 void LocationsBuilderARM64Sve::VisitVecMax(HVecMax* instruction) {
616   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
617   UNREACHABLE();
618 }
619 
VisitVecMax(HVecMax * instruction)620 void InstructionCodeGeneratorARM64Sve::VisitVecMax(HVecMax* instruction) {
621   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
622   UNREACHABLE();
623 }
624 
VisitVecAnd(HVecAnd * instruction)625 void LocationsBuilderARM64Sve::VisitVecAnd(HVecAnd* instruction) {
626   // TODO: Allow constants supported by BIC (vector, immediate).
627   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
628 }
629 
VisitVecAnd(HVecAnd * instruction)630 void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
631   DCHECK(instruction->IsPredicated());
632   LocationSummary* locations = instruction->GetLocations();
633   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
634   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
635   const ZRegister dst = ZRegisterFrom(locations->Out());
636   const PRegisterM p_reg = LoopPReg().Merging();
637   ValidateVectorLength(instruction);
638   switch (instruction->GetPackedType()) {
639     case DataType::Type::kBool:
640     case DataType::Type::kUint8:
641     case DataType::Type::kInt8:
642       __ And(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
643       break;
644     case DataType::Type::kUint16:
645     case DataType::Type::kInt16:
646       __ And(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
647       break;
648     case DataType::Type::kInt32:
649     case DataType::Type::kFloat32:
650       __ And(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
651       break;
652     case DataType::Type::kInt64:
653     case DataType::Type::kFloat64:
654       __ And(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
655       break;
656     default:
657       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
658       UNREACHABLE();
659   }
660 }
661 
VisitVecAndNot(HVecAndNot * instruction)662 void LocationsBuilderARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
663   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
664 }
665 
VisitVecAndNot(HVecAndNot * instruction)666 void InstructionCodeGeneratorARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
667   // TODO: Use BIC (vector, register).
668   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
669 }
670 
VisitVecOr(HVecOr * instruction)671 void LocationsBuilderARM64Sve::VisitVecOr(HVecOr* instruction) {
672   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
673 }
674 
VisitVecOr(HVecOr * instruction)675 void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
676   DCHECK(instruction->IsPredicated());
677   LocationSummary* locations = instruction->GetLocations();
678   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
679   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
680   const ZRegister dst = ZRegisterFrom(locations->Out());
681   const PRegisterM p_reg = LoopPReg().Merging();
682   ValidateVectorLength(instruction);
683   switch (instruction->GetPackedType()) {
684     case DataType::Type::kBool:
685     case DataType::Type::kUint8:
686     case DataType::Type::kInt8:
687       __ Orr(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
688       break;
689     case DataType::Type::kUint16:
690     case DataType::Type::kInt16:
691       __ Orr(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
692       break;
693     case DataType::Type::kInt32:
694     case DataType::Type::kFloat32:
695       __ Orr(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
696       break;
697     case DataType::Type::kInt64:
698     case DataType::Type::kFloat64:
699       __ Orr(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
700       break;
701     default:
702       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
703       UNREACHABLE();
704   }
705 }
706 
VisitVecXor(HVecXor * instruction)707 void LocationsBuilderARM64Sve::VisitVecXor(HVecXor* instruction) {
708   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
709 }
710 
VisitVecXor(HVecXor * instruction)711 void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
712   DCHECK(instruction->IsPredicated());
713   LocationSummary* locations = instruction->GetLocations();
714   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
715   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
716   const ZRegister dst = ZRegisterFrom(locations->Out());
717   const PRegisterM p_reg = LoopPReg().Merging();
718   ValidateVectorLength(instruction);
719   switch (instruction->GetPackedType()) {
720     case DataType::Type::kBool:
721     case DataType::Type::kUint8:
722     case DataType::Type::kInt8:
723       __ Eor(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
724       break;
725     case DataType::Type::kUint16:
726     case DataType::Type::kInt16:
727       __ Eor(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
728       break;
729     case DataType::Type::kInt32:
730     case DataType::Type::kFloat32:
731       __ Eor(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
732       break;
733     case DataType::Type::kInt64:
734     case DataType::Type::kFloat64:
735       __ Eor(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
736       break;
737     default:
738       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
739       UNREACHABLE();
740   }
741 }
742 
743 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)744 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
745   LocationSummary* locations = new (allocator) LocationSummary(instruction);
746   switch (instruction->GetPackedType()) {
747     case DataType::Type::kUint8:
748     case DataType::Type::kInt8:
749     case DataType::Type::kUint16:
750     case DataType::Type::kInt16:
751     case DataType::Type::kInt32:
752     case DataType::Type::kInt64:
753       locations->SetInAt(0, Location::RequiresFpuRegister());
754       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
755       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
756       break;
757     default:
758       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
759       UNREACHABLE();
760   }
761 }
762 
VisitVecShl(HVecShl * instruction)763 void LocationsBuilderARM64Sve::VisitVecShl(HVecShl* instruction) {
764   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
765 }
766 
VisitVecShl(HVecShl * instruction)767 void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
768   DCHECK(instruction->IsPredicated());
769   LocationSummary* locations = instruction->GetLocations();
770   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
771   const ZRegister dst = ZRegisterFrom(locations->Out());
772   const PRegisterM p_reg = LoopPReg().Merging();
773   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
774   ValidateVectorLength(instruction);
775   switch (instruction->GetPackedType()) {
776     case DataType::Type::kUint8:
777     case DataType::Type::kInt8:
778       __ Lsl(dst.VnB(), p_reg, lhs.VnB(), value);
779       break;
780     case DataType::Type::kUint16:
781     case DataType::Type::kInt16:
782       __ Lsl(dst.VnH(), p_reg, lhs.VnH(), value);
783       break;
784     case DataType::Type::kInt32:
785       __ Lsl(dst.VnS(), p_reg, lhs.VnS(), value);
786       break;
787     case DataType::Type::kInt64:
788       __ Lsl(dst.VnD(), p_reg, lhs.VnD(), value);
789       break;
790     default:
791       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
792       UNREACHABLE();
793   }
794 }
795 
VisitVecShr(HVecShr * instruction)796 void LocationsBuilderARM64Sve::VisitVecShr(HVecShr* instruction) {
797   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
798 }
799 
VisitVecShr(HVecShr * instruction)800 void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
801   DCHECK(instruction->IsPredicated());
802   LocationSummary* locations = instruction->GetLocations();
803   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
804   const ZRegister dst = ZRegisterFrom(locations->Out());
805   const PRegisterM p_reg = LoopPReg().Merging();
806   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
807   ValidateVectorLength(instruction);
808   switch (instruction->GetPackedType()) {
809     case DataType::Type::kUint8:
810     case DataType::Type::kInt8:
811       __ Asr(dst.VnB(), p_reg, lhs.VnB(), value);
812       break;
813     case DataType::Type::kUint16:
814     case DataType::Type::kInt16:
815       __ Asr(dst.VnH(), p_reg, lhs.VnH(), value);
816       break;
817     case DataType::Type::kInt32:
818       __ Asr(dst.VnS(), p_reg, lhs.VnS(), value);
819       break;
820     case DataType::Type::kInt64:
821       __ Asr(dst.VnD(), p_reg, lhs.VnD(), value);
822       break;
823     default:
824       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
825       UNREACHABLE();
826   }
827 }
828 
VisitVecUShr(HVecUShr * instruction)829 void LocationsBuilderARM64Sve::VisitVecUShr(HVecUShr* instruction) {
830   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
831 }
832 
VisitVecUShr(HVecUShr * instruction)833 void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
834   DCHECK(instruction->IsPredicated());
835   LocationSummary* locations = instruction->GetLocations();
836   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
837   const ZRegister dst = ZRegisterFrom(locations->Out());
838   const PRegisterM p_reg = LoopPReg().Merging();
839   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
840   ValidateVectorLength(instruction);
841   switch (instruction->GetPackedType()) {
842     case DataType::Type::kUint8:
843     case DataType::Type::kInt8:
844       __ Lsr(dst.VnB(), p_reg, lhs.VnB(), value);
845       break;
846     case DataType::Type::kUint16:
847     case DataType::Type::kInt16:
848       __ Lsr(dst.VnH(), p_reg, lhs.VnH(), value);
849       break;
850     case DataType::Type::kInt32:
851       __ Lsr(dst.VnS(), p_reg, lhs.VnS(), value);
852       break;
853     case DataType::Type::kInt64:
854       __ Lsr(dst.VnD(), p_reg, lhs.VnD(), value);
855       break;
856     default:
857       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
858       UNREACHABLE();
859   }
860 }
861 
VisitVecSetScalars(HVecSetScalars * instruction)862 void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
863   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
864 
865   DCHECK_EQ(2u, instruction->InputCount());  // only one input currently implemented + predicate.
866 
867   HInstruction* input = instruction->InputAt(0);
868   bool is_zero = IsZeroBitPattern(input);
869 
870   switch (instruction->GetPackedType()) {
871     case DataType::Type::kBool:
872     case DataType::Type::kUint8:
873     case DataType::Type::kInt8:
874     case DataType::Type::kUint16:
875     case DataType::Type::kInt16:
876     case DataType::Type::kInt32:
877     case DataType::Type::kInt64:
878       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
879                                     : Location::RequiresRegister());
880       locations->SetOut(Location::RequiresFpuRegister());
881       break;
882     case DataType::Type::kFloat32:
883     case DataType::Type::kFloat64:
884       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
885                                     : Location::RequiresFpuRegister());
886       locations->SetOut(Location::RequiresFpuRegister());
887       break;
888     default:
889       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
890       UNREACHABLE();
891   }
892 }
893 
VisitVecSetScalars(HVecSetScalars * instruction)894 void InstructionCodeGeneratorARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
895   DCHECK(instruction->IsPredicated());
896   LocationSummary* locations = instruction->GetLocations();
897   const ZRegister z_dst = ZRegisterFrom(locations->Out());
898 
899   DCHECK_EQ(2u, instruction->InputCount());  // only one input currently implemented + predicate.
900 
901   // Zero out all other elements first.
902   __ Dup(z_dst.VnB(), 0);
903 
904   const VRegister dst = VRegisterFrom(locations->Out());
905   // Shorthand for any type of zero.
906   if (IsZeroBitPattern(instruction->InputAt(0))) {
907     return;
908   }
909   ValidateVectorLength(instruction);
910 
911   // Set required elements.
912   switch (instruction->GetPackedType()) {
913     case DataType::Type::kBool:
914     case DataType::Type::kUint8:
915     case DataType::Type::kInt8:
916       __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
917       break;
918     case DataType::Type::kUint16:
919     case DataType::Type::kInt16:
920       __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
921       break;
922     case DataType::Type::kInt32:
923       __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
924       break;
925     case DataType::Type::kInt64:
926       __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
927       break;
928     default:
929       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
930       UNREACHABLE();
931   }
932 }
933 
934 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)935 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
936   LocationSummary* locations = new (allocator) LocationSummary(instruction);
937   switch (instruction->GetPackedType()) {
938     case DataType::Type::kUint8:
939     case DataType::Type::kInt8:
940     case DataType::Type::kUint16:
941     case DataType::Type::kInt16:
942     case DataType::Type::kInt32:
943     case DataType::Type::kInt64:
944       locations->SetInAt(0, Location::RequiresFpuRegister());
945       locations->SetInAt(1, Location::RequiresFpuRegister());
946       locations->SetInAt(2, Location::RequiresFpuRegister());
947       locations->SetOut(Location::SameAsFirstInput());
948       break;
949     default:
950       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
951       UNREACHABLE();
952   }
953 }
954 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)955 void LocationsBuilderARM64Sve::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
956   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
957 }
958 
959 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
960 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
961 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)962 void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
963     HVecMultiplyAccumulate* instruction) {
964   DCHECK(instruction->IsPredicated());
965   LocationSummary* locations = instruction->GetLocations();
966   const ZRegister acc = ZRegisterFrom(locations->InAt(0));
967   const ZRegister left = ZRegisterFrom(locations->InAt(1));
968   const ZRegister right = ZRegisterFrom(locations->InAt(2));
969   const PRegisterM p_reg = LoopPReg().Merging();
970 
971   DCHECK(locations->InAt(0).Equals(locations->Out()));
972   ValidateVectorLength(instruction);
973 
974   switch (instruction->GetPackedType()) {
975     case DataType::Type::kUint8:
976     case DataType::Type::kInt8:
977       if (instruction->GetOpKind() == HInstruction::kAdd) {
978         __ Mla(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
979       } else {
980         __ Mls(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
981       }
982       break;
983     case DataType::Type::kUint16:
984     case DataType::Type::kInt16:
985       if (instruction->GetOpKind() == HInstruction::kAdd) {
986         __ Mla(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
987       } else {
988         __ Mls(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
989       }
990       break;
991     case DataType::Type::kInt32:
992       if (instruction->GetOpKind() == HInstruction::kAdd) {
993         __ Mla(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
994       } else {
995         __ Mls(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
996       }
997       break;
998     default:
999       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1000       UNREACHABLE();
1001   }
1002 }
1003 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1004 void LocationsBuilderARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1005   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
1006   UNREACHABLE();
1007 }
1008 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1009 void InstructionCodeGeneratorARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1010   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
1011   UNREACHABLE();
1012 }
1013 
VisitVecDotProd(HVecDotProd * instruction)1014 void LocationsBuilderARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
1015   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1016   DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1017   locations->SetInAt(0, Location::RequiresFpuRegister());
1018   locations->SetInAt(1, Location::RequiresFpuRegister());
1019   locations->SetInAt(2, Location::RequiresFpuRegister());
1020   locations->SetOut(Location::SameAsFirstInput());
1021 
1022   locations->AddTemp(Location::RequiresFpuRegister());
1023 }
1024 
VisitVecDotProd(HVecDotProd * instruction)1025 void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
1026   DCHECK(instruction->IsPredicated());
1027   LocationSummary* locations = instruction->GetLocations();
1028   DCHECK(locations->InAt(0).Equals(locations->Out()));
1029   const ZRegister acc = ZRegisterFrom(locations->InAt(0));
1030   const ZRegister left = ZRegisterFrom(locations->InAt(1));
1031   const ZRegister right = ZRegisterFrom(locations->InAt(2));
1032   const PRegisterM p_reg = LoopPReg().Merging();
1033   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1034   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1035   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1036             HVecOperation::ToSignedType(b->GetPackedType()));
1037   DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1038   ValidateVectorLength(instruction);
1039 
1040   size_t inputs_data_size = DataType::Size(a->GetPackedType());
1041   switch (inputs_data_size) {
1042     case 1u: {
1043       UseScratchRegisterScope temps(GetVIXLAssembler());
1044       const ZRegister tmp0 = temps.AcquireZ();
1045       const ZRegister tmp1 = ZRegisterFrom(locations->GetTemp(0));
1046 
1047       __ Dup(tmp1.VnB(), 0u);
1048       __ Sel(tmp0.VnB(), p_reg, left.VnB(), tmp1.VnB());
1049       __ Sel(tmp1.VnB(), p_reg, right.VnB(), tmp1.VnB());
1050       if (instruction->IsZeroExtending()) {
1051         __ Udot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
1052       } else {
1053         __ Sdot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
1054       }
1055       break;
1056     }
1057     default:
1058       LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1059   }
1060 }
1061 
1062 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1063 static void CreateVecMemLocations(ArenaAllocator* allocator,
1064                                   HVecMemoryOperation* instruction,
1065                                   bool is_load) {
1066   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1067   switch (instruction->GetPackedType()) {
1068     case DataType::Type::kBool:
1069     case DataType::Type::kUint8:
1070     case DataType::Type::kInt8:
1071     case DataType::Type::kUint16:
1072     case DataType::Type::kInt16:
1073     case DataType::Type::kInt32:
1074     case DataType::Type::kInt64:
1075     case DataType::Type::kFloat32:
1076     case DataType::Type::kFloat64:
1077       locations->SetInAt(0, Location::RequiresRegister());
1078       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1079       if (is_load) {
1080         locations->SetOut(Location::RequiresFpuRegister());
1081       } else {
1082         locations->SetInAt(2, Location::RequiresFpuRegister());
1083       }
1084       break;
1085     default:
1086       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1087       UNREACHABLE();
1088   }
1089 }
1090 
VisitVecLoad(HVecLoad * instruction)1091 void LocationsBuilderARM64Sve::VisitVecLoad(HVecLoad* instruction) {
1092   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1093 }
1094 
VisitVecLoad(HVecLoad * instruction)1095 void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
1096   DCHECK(instruction->IsPredicated());
1097   LocationSummary* locations = instruction->GetLocations();
1098   size_t size = DataType::Size(instruction->GetPackedType());
1099   const ZRegister reg = ZRegisterFrom(locations->Out());
1100   UseScratchRegisterScope temps(GetVIXLAssembler());
1101   Register scratch;
1102   const PRegisterZ p_reg = LoopPReg().Zeroing();
1103   ValidateVectorLength(instruction);
1104 
1105   switch (instruction->GetPackedType()) {
1106     case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1107     case DataType::Type::kUint16:
1108       __ Ld1h(reg.VnH(), p_reg,
1109               VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1110       break;
1111     case DataType::Type::kBool:
1112     case DataType::Type::kUint8:
1113     case DataType::Type::kInt8:
1114       __ Ld1b(reg.VnB(), p_reg,
1115               VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1116       break;
1117     case DataType::Type::kInt32:
1118     case DataType::Type::kFloat32:
1119       __ Ld1w(reg.VnS(), p_reg,
1120               VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1121       break;
1122     case DataType::Type::kInt64:
1123     case DataType::Type::kFloat64:
1124       __ Ld1d(reg.VnD(), p_reg,
1125               VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1126       break;
1127     default:
1128       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1129       UNREACHABLE();
1130   }
1131 }
1132 
VisitVecStore(HVecStore * instruction)1133 void LocationsBuilderARM64Sve::VisitVecStore(HVecStore* instruction) {
1134   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1135 }
1136 
VisitVecStore(HVecStore * instruction)1137 void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
1138   DCHECK(instruction->IsPredicated());
1139   LocationSummary* locations = instruction->GetLocations();
1140   size_t size = DataType::Size(instruction->GetPackedType());
1141   const ZRegister reg = ZRegisterFrom(locations->InAt(2));
1142   UseScratchRegisterScope temps(GetVIXLAssembler());
1143   Register scratch;
1144   const PRegisterZ p_reg = LoopPReg().Zeroing();
1145   ValidateVectorLength(instruction);
1146 
1147   switch (instruction->GetPackedType()) {
1148     case DataType::Type::kBool:
1149     case DataType::Type::kUint8:
1150     case DataType::Type::kInt8:
1151       __ St1b(reg.VnB(), p_reg,
1152           VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1153       break;
1154     case DataType::Type::kUint16:
1155     case DataType::Type::kInt16:
1156       __ St1h(reg.VnH(), p_reg,
1157           VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1158       break;
1159     case DataType::Type::kInt32:
1160     case DataType::Type::kFloat32:
1161       __ St1w(reg.VnS(), p_reg,
1162           VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1163       break;
1164     case DataType::Type::kInt64:
1165     case DataType::Type::kFloat64:
1166       __ St1d(reg.VnD(), p_reg,
1167           VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1168       break;
1169     default:
1170       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1171       UNREACHABLE();
1172   }
1173 }
1174 
VisitVecPredSetAll(HVecPredSetAll * instruction)1175 void LocationsBuilderARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1176   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1177   DCHECK(instruction->InputAt(0)->IsIntConstant());
1178   locations->SetInAt(0, Location::NoLocation());
1179   locations->SetOut(Location::NoLocation());
1180 }
1181 
VisitVecPredSetAll(HVecPredSetAll * instruction)1182 void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1183   // Instruction is not predicated, see nodes_vector.h
1184   DCHECK(!instruction->IsPredicated());
1185   const PRegister p_reg = LoopPReg();
1186 
1187   switch (instruction->GetPackedType()) {
1188     case DataType::Type::kBool:
1189     case DataType::Type::kUint8:
1190     case DataType::Type::kInt8:
1191       __ Ptrue(p_reg.VnB(), vixl::aarch64::SVE_ALL);
1192       break;
1193     case DataType::Type::kUint16:
1194     case DataType::Type::kInt16:
1195       __ Ptrue(p_reg.VnH(), vixl::aarch64::SVE_ALL);
1196       break;
1197     case DataType::Type::kInt32:
1198     case DataType::Type::kFloat32:
1199       __ Ptrue(p_reg.VnS(), vixl::aarch64::SVE_ALL);
1200       break;
1201     case DataType::Type::kInt64:
1202     case DataType::Type::kFloat64:
1203       __ Ptrue(p_reg.VnD(), vixl::aarch64::SVE_ALL);
1204       break;
1205     default:
1206       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1207       UNREACHABLE();
1208   }
1209 }
1210 
VisitVecPredWhile(HVecPredWhile * instruction)1211 void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
1212   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1213   locations->SetInAt(0, Location::RequiresRegister());
1214   locations->SetInAt(1, Location::RequiresRegister());
1215   // The instruction doesn't really need a core register as out location; this is a hack
1216   // to workaround absence of support for vector predicates in register allocation.
1217   //
1218   // Semantically, the out location of this instruction and predicate inputs locations of
1219   // its users should be a fixed predicate register (similar to
1220   // Location::RegisterLocation(int reg)). But the register allocator (RA) doesn't support
1221   // SIMD regs (e.g. predicate), so LoopPReg() is used explicitly without exposing it
1222   // to the RA.
1223   //
1224   // To make the RA happy Location::NoLocation() was used for all the vector instructions
1225   // predicate inputs; but for the PredSetOperations (e.g. VecPredWhile) Location::NoLocation()
1226   // can't be used without changes to RA - "ssa_liveness_analysis.cc] Check failed:
1227   // input->IsEmittedAtUseSite()" would fire.
1228   //
1229   // Using a core register as a hack is the easiest way to tackle this problem. The RA will
1230   // block one core register for the loop without actually using it; this should not be
1231   // a performance issue as a SIMD loop operates mainly on SIMD registers.
1232   //
1233   // TODO: Support SIMD types in register allocator.
1234   locations->SetOut(Location::RequiresRegister());
1235 }
1236 
VisitVecPredWhile(HVecPredWhile * instruction)1237 void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
1238   // Instruction is not predicated, see nodes_vector.h
1239   DCHECK(!instruction->IsPredicated());
1240   // Current implementation of predicated loop execution only supports kLO condition.
1241   DCHECK(instruction->GetCondKind() == HVecPredWhile::CondKind::kLO);
1242   Register left = InputRegisterAt(instruction, 0);
1243   Register right = InputRegisterAt(instruction, 1);
1244 
1245   DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u);
1246 
1247   switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) {
1248     case 1u:
1249       __ Whilelo(LoopPReg().VnB(), left, right);
1250       break;
1251     case 2u:
1252       __ Whilelo(LoopPReg().VnH(), left, right);
1253       break;
1254     case 4u:
1255       __ Whilelo(LoopPReg().VnS(), left, right);
1256       break;
1257     case 8u:
1258       __ Whilelo(LoopPReg().VnD(), left, right);
1259       break;
1260     default:
1261       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1262       UNREACHABLE();
1263   }
1264 }
1265 
VisitVecPredCondition(HVecPredCondition * instruction)1266 void LocationsBuilderARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
1267   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1268   locations->SetInAt(0, Location::NoLocation());
1269   // Result of the operation - a boolean value in a core register.
1270   locations->SetOut(Location::RequiresRegister());
1271 }
1272 
VisitVecPredCondition(HVecPredCondition * instruction)1273 void InstructionCodeGeneratorARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
1274   // Instruction is not predicated, see nodes_vector.h
1275   DCHECK(!instruction->IsPredicated());
1276   Register reg = OutputRegister(instruction);
1277   // Currently VecPredCondition is only used as part of vectorized loop check condition
1278   // evaluation.
1279   DCHECK(instruction->GetPCondKind() == HVecPredCondition::PCondKind::kNFirst);
1280   __ Cset(reg, pl);
1281 }
1282 
AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope * scope)1283 Location InstructionCodeGeneratorARM64Sve::AllocateSIMDScratchLocation(
1284     vixl::aarch64::UseScratchRegisterScope* scope) {
1285   return LocationFrom(scope->AcquireZ());
1286 }
1287 
FreeSIMDScratchLocation(Location loc,vixl::aarch64::UseScratchRegisterScope * scope)1288 void InstructionCodeGeneratorARM64Sve::FreeSIMDScratchLocation(Location loc,
1289     vixl::aarch64::UseScratchRegisterScope* scope) {
1290   scope->Release(ZRegisterFrom(loc));
1291 }
1292 
LoadSIMDRegFromStack(Location destination,Location source)1293 void InstructionCodeGeneratorARM64Sve::LoadSIMDRegFromStack(Location destination,
1294                                                             Location source) {
1295   __ Ldr(ZRegisterFrom(destination), SveStackOperandFrom(source));
1296 }
1297 
MoveSIMDRegToSIMDReg(Location destination,Location source)1298 void InstructionCodeGeneratorARM64Sve::MoveSIMDRegToSIMDReg(Location destination,
1299                                                             Location source) {
1300   __ Mov(ZRegisterFrom(destination), ZRegisterFrom(source));
1301 }
1302 
MoveToSIMDStackSlot(Location destination,Location source)1303 void InstructionCodeGeneratorARM64Sve::MoveToSIMDStackSlot(Location destination,
1304                                                            Location source) {
1305   DCHECK(destination.IsSIMDStackSlot());
1306 
1307   if (source.IsFpuRegister()) {
1308     __ Str(ZRegisterFrom(source), SveStackOperandFrom(destination));
1309   } else {
1310     DCHECK(source.IsSIMDStackSlot());
1311     UseScratchRegisterScope temps(GetVIXLAssembler());
1312     if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
1313       // Very rare situation, only when there are cycles in ParallelMoveResolver graph.
1314       const Register temp = temps.AcquireX();
1315       DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % kArm64WordSize, 0u);
1316       // Emit a number of LDR/STR (XRegister, 64-bit) to cover the whole SIMD register size
1317       // when copying a stack slot.
1318       for (size_t offset = 0, e = codegen_->GetSIMDRegisterWidth();
1319            offset < e;
1320            offset += kArm64WordSize) {
1321         __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + offset));
1322         __ Str(temp, MemOperand(sp, destination.GetStackIndex() + offset));
1323       }
1324     } else {
1325       const ZRegister temp = temps.AcquireZ();
1326       __ Ldr(temp, SveStackOperandFrom(source));
1327       __ Str(temp, SveStackOperandFrom(destination));
1328     }
1329   }
1330 }
1331 
1332 template <bool is_save>
SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64 * codegen,LocationSummary * locations,int64_t spill_offset)1333 void SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64* codegen,
1334                                            LocationSummary* locations,
1335                                            int64_t spill_offset) {
1336   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
1337   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
1338   DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
1339                                                   codegen->GetNumberOfCoreRegisters(),
1340                                                   fp_spills,
1341                                                   codegen->GetNumberOfFloatingPointRegisters()));
1342   MacroAssembler* masm = codegen->GetVIXLAssembler();
1343   Register base = masm->StackPointer();
1344 
1345   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
1346   int64_t core_spill_size = core_list.GetTotalSizeInBytes();
1347   int64_t fp_spill_offset = spill_offset + core_spill_size;
1348 
1349   if (codegen->GetGraph()->HasSIMD()) {
1350     if (is_save) {
1351       masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1352     } else {
1353       masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1354     }
1355     codegen->GetAssembler()->SaveRestoreZRegisterList<is_save>(fp_spills, fp_spill_offset);
1356     return;
1357   }
1358 
1359   // Case when we only need to restore D-registers.
1360   DCHECK(!codegen->GetGraph()->HasSIMD());
1361   DCHECK_LE(codegen->GetSlowPathFPWidth(), kDRegSizeInBytes);
1362   CPURegList fp_list = CPURegList(CPURegister::kVRegister, kDRegSize, fp_spills);
1363   if (is_save) {
1364     masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1365     masm->StoreCPURegList(fp_list, MemOperand(base, fp_spill_offset));
1366   } else {
1367     masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1368     masm->LoadCPURegList(fp_list, MemOperand(base, fp_spill_offset));
1369   }
1370 }
1371 
SaveLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1372 void InstructionCodeGeneratorARM64Sve::SaveLiveRegistersHelper(LocationSummary* locations,
1373                                                                int64_t spill_offset) {
1374   SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ true>(codegen_, locations, spill_offset);
1375 }
1376 
RestoreLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1377 void InstructionCodeGeneratorARM64Sve::RestoreLiveRegistersHelper(LocationSummary* locations,
1378                                                                   int64_t spill_offset) {
1379   SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ false>(codegen_, locations, spill_offset);
1380 }
1381 
1382 #undef __
1383 
1384 }  // namespace arm64
1385 }  // namespace art
1386