• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include "macro-assembler-aarch64.h"
28 
29 namespace vixl {
30 namespace aarch64 {
31 
AddSubHelper(AddSubHelperOption option,const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)32 void MacroAssembler::AddSubHelper(AddSubHelperOption option,
33                                   const ZRegister& zd,
34                                   const ZRegister& zn,
35                                   IntegerOperand imm) {
36   VIXL_ASSERT(imm.FitsInLane(zd));
37 
38   // Simple, encodable cases.
39   if (TrySingleAddSub(option, zd, zn, imm)) return;
40 
41   VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate));
42   bool add_imm = (option == kAddImmediate);
43 
44   // Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one
45   // instruction. Also interpret the immediate as signed, so we can convert
46   // Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc.
47   IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits()));
48   if (signed_imm.IsNegative()) {
49     AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate;
50     IntegerOperand n_imm(signed_imm.GetMagnitude());
51     // IntegerOperand can represent -INT_MIN, so this is always safe.
52     VIXL_ASSERT(n_imm.IsPositiveOrZero());
53     if (TrySingleAddSub(n_option, zd, zn, n_imm)) return;
54   }
55 
56   // Otherwise, fall back to dup + ADD_z_z/SUB_z_z.
57   UseScratchRegisterScope temps(this);
58   ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
59   Dup(scratch, imm);
60 
61   SingleEmissionCheckScope guard(this);
62   if (add_imm) {
63     add(zd, zn, scratch);
64   } else {
65     sub(zd, zn, scratch);
66   }
67 }
68 
TrySingleAddSub(AddSubHelperOption option,const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)69 bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option,
70                                      const ZRegister& zd,
71                                      const ZRegister& zn,
72                                      IntegerOperand imm) {
73   VIXL_ASSERT(imm.FitsInLane(zd));
74 
75   int imm8;
76   int shift = -1;
77   if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
78       imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
79     MovprfxHelperScope guard(this, zd, zn);
80     switch (option) {
81       case kAddImmediate:
82         add(zd, zd, imm8, shift);
83         return true;
84       case kSubImmediate:
85         sub(zd, zd, imm8, shift);
86         return true;
87     }
88   }
89   return false;
90 }
91 
IntWideImmHelper(IntArithImmFn imm_fn,SVEArithPredicatedFn reg_macro,const ZRegister & zd,const ZRegister & zn,IntegerOperand imm,bool is_signed)92 void MacroAssembler::IntWideImmHelper(IntArithImmFn imm_fn,
93                                       SVEArithPredicatedFn reg_macro,
94                                       const ZRegister& zd,
95                                       const ZRegister& zn,
96                                       IntegerOperand imm,
97                                       bool is_signed) {
98   if (is_signed) {
99     // E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi
100     if (imm.IsInt8()) {
101       MovprfxHelperScope guard(this, zd, zn);
102       (this->*imm_fn)(zd, zd, imm.AsInt8());
103       return;
104     }
105   } else {
106     // E.g. UMIN_z_zi, UMAX_z_zi
107     if (imm.IsUint8()) {
108       MovprfxHelperScope guard(this, zd, zn);
109       (this->*imm_fn)(zd, zd, imm.AsUint8());
110       return;
111     }
112   }
113 
114   UseScratchRegisterScope temps(this);
115   PRegister pg = temps.AcquireGoverningP();
116   Ptrue(pg.WithSameLaneSizeAs(zd));
117 
118   // Try to re-use zd if we can, so we can avoid a movprfx.
119   ZRegister scratch =
120       zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits())
121                      : zd;
122   Dup(scratch, imm);
123 
124   // The vector-form macro for commutative operations will swap the arguments to
125   // avoid movprfx, if necessary.
126   (this->*reg_macro)(zd, pg.Merging(), zn, scratch);
127 }
128 
Mul(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)129 void MacroAssembler::Mul(const ZRegister& zd,
130                          const ZRegister& zn,
131                          IntegerOperand imm) {
132   VIXL_ASSERT(allow_macro_instructions_);
133   IntArithImmFn imm_fn = &Assembler::mul;
134   SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul;
135   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
136 }
137 
Smin(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)138 void MacroAssembler::Smin(const ZRegister& zd,
139                           const ZRegister& zn,
140                           IntegerOperand imm) {
141   VIXL_ASSERT(allow_macro_instructions_);
142   VIXL_ASSERT(imm.FitsInSignedLane(zd));
143   IntArithImmFn imm_fn = &Assembler::smin;
144   SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin;
145   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
146 }
147 
Smax(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)148 void MacroAssembler::Smax(const ZRegister& zd,
149                           const ZRegister& zn,
150                           IntegerOperand imm) {
151   VIXL_ASSERT(allow_macro_instructions_);
152   VIXL_ASSERT(imm.FitsInSignedLane(zd));
153   IntArithImmFn imm_fn = &Assembler::smax;
154   SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax;
155   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
156 }
157 
Umax(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)158 void MacroAssembler::Umax(const ZRegister& zd,
159                           const ZRegister& zn,
160                           IntegerOperand imm) {
161   VIXL_ASSERT(allow_macro_instructions_);
162   VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
163   IntArithImmFn imm_fn = &Assembler::umax;
164   SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax;
165   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
166 }
167 
Umin(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)168 void MacroAssembler::Umin(const ZRegister& zd,
169                           const ZRegister& zn,
170                           IntegerOperand imm) {
171   VIXL_ASSERT(allow_macro_instructions_);
172   VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
173   IntArithImmFn imm_fn = &Assembler::umin;
174   SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin;
175   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
176 }
177 
Addpl(const Register & xd,const Register & xn,int64_t multiplier)178 void MacroAssembler::Addpl(const Register& xd,
179                            const Register& xn,
180                            int64_t multiplier) {
181   VIXL_ASSERT(allow_macro_instructions_);
182 
183   // This macro relies on `Rdvl` to handle some out-of-range cases. Check that
184   // `VL * multiplier` cannot overflow, for any possible value of VL.
185   VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
186   VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
187 
188   if (xd.IsZero()) return;
189   if (xn.IsZero() && xd.IsSP()) {
190     // TODO: This operation doesn't make much sense, but we could support it
191     // with a scratch register if necessary.
192     VIXL_UNIMPLEMENTED();
193   }
194 
195   // Handling xzr requires an extra move, so defer it until later so we can try
196   // to use `rdvl` instead (via `Addvl`).
197   if (IsInt6(multiplier) && !xn.IsZero()) {
198     SingleEmissionCheckScope guard(this);
199     addpl(xd, xn, static_cast<int>(multiplier));
200     return;
201   }
202 
203   // If `multiplier` is a multiple of 8, we can use `Addvl` instead.
204   if ((multiplier % kZRegBitsPerPRegBit) == 0) {
205     Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit);
206     return;
207   }
208 
209   if (IsInt6(multiplier)) {
210     VIXL_ASSERT(xn.IsZero());  // Other cases were handled with `addpl`.
211     // There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so
212     // materialise a zero.
213     MacroEmissionCheckScope guard(this);
214     movz(xd, 0);
215     addpl(xd, xd, static_cast<int>(multiplier));
216     return;
217   }
218 
219   // TODO: Some probable cases result in rather long sequences. For example,
220   // `Addpl(sp, sp, 33)` requires five instructions, even though it's only just
221   // outside the encodable range. We should look for ways to cover such cases
222   // without drastically increasing the complexity of this logic.
223 
224   // For other cases, calculate xn + (PL * multiplier) using discrete
225   // instructions. This requires two scratch registers in the general case, so
226   // try to re-use the destination as a scratch register.
227   UseScratchRegisterScope temps(this);
228   temps.Include(xd);
229   temps.Exclude(xn);
230 
231   Register scratch = temps.AcquireX();
232   // Because there is no `rdpl`, so we have to calculate PL from VL. We can't
233   // scale the multiplier because (we already know) it isn't a multiple of 8.
234   Rdvl(scratch, multiplier);
235 
236   MacroEmissionCheckScope guard(this);
237   if (xn.IsZero()) {
238     asr(xd, scratch, kZRegBitsPerPRegBitLog2);
239   } else if (xd.IsSP() || xn.IsSP()) {
240     // TODO: MacroAssembler::Add should be able to handle this.
241     asr(scratch, scratch, kZRegBitsPerPRegBitLog2);
242     add(xd, xn, scratch);
243   } else {
244     add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2));
245   }
246 }
247 
Addvl(const Register & xd,const Register & xn,int64_t multiplier)248 void MacroAssembler::Addvl(const Register& xd,
249                            const Register& xn,
250                            int64_t multiplier) {
251   VIXL_ASSERT(allow_macro_instructions_);
252   VIXL_ASSERT(xd.IsX());
253   VIXL_ASSERT(xn.IsX());
254 
255   // Check that `VL * multiplier` cannot overflow, for any possible value of VL.
256   VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
257   VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
258 
259   if (xd.IsZero()) return;
260   if (xn.IsZero() && xd.IsSP()) {
261     // TODO: This operation doesn't make much sense, but we could support it
262     // with a scratch register if necessary. `rdvl` cannot write into `sp`.
263     VIXL_UNIMPLEMENTED();
264   }
265 
266   if (IsInt6(multiplier)) {
267     SingleEmissionCheckScope guard(this);
268     if (xn.IsZero()) {
269       rdvl(xd, static_cast<int>(multiplier));
270     } else {
271       addvl(xd, xn, static_cast<int>(multiplier));
272     }
273     return;
274   }
275 
276   // TODO: Some probable cases result in rather long sequences. For example,
277   // `Addvl(sp, sp, 42)` requires four instructions, even though it's only just
278   // outside the encodable range. We should look for ways to cover such cases
279   // without drastically increasing the complexity of this logic.
280 
281   // For other cases, calculate xn + (VL * multiplier) using discrete
282   // instructions. This requires two scratch registers in the general case, so
283   // we try to re-use the destination as a scratch register.
284   UseScratchRegisterScope temps(this);
285   temps.Include(xd);
286   temps.Exclude(xn);
287 
288   Register a = temps.AcquireX();
289   Mov(a, multiplier);
290 
291   MacroEmissionCheckScope guard(this);
292   Register b = temps.AcquireX();
293   rdvl(b, 1);
294   if (xn.IsZero()) {
295     mul(xd, a, b);
296   } else if (xd.IsSP() || xn.IsSP()) {
297     mul(a, a, b);
298     add(xd, xn, a);
299   } else {
300     madd(xd, a, b, xn);
301   }
302 }
303 
CalculateSVEAddress(const Register & xd,const SVEMemOperand & addr,int vl_divisor_log2)304 void MacroAssembler::CalculateSVEAddress(const Register& xd,
305                                          const SVEMemOperand& addr,
306                                          int vl_divisor_log2) {
307   VIXL_ASSERT(allow_macro_instructions_);
308   VIXL_ASSERT(!addr.IsScatterGather());
309   VIXL_ASSERT(xd.IsX());
310 
311   // The lower bound is where a whole Z register is accessed.
312   VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0));
313   // The upper bound is for P register accesses, and for instructions like
314   // "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane.
315   VIXL_ASSERT(vl_divisor_log2 <= static_cast<int>(kZRegBitsPerPRegBitLog2));
316 
317   SVEOffsetModifier mod = addr.GetOffsetModifier();
318   Register base = addr.GetScalarBase();
319 
320   if (addr.IsEquivalentToScalar()) {
321     // For example:
322     //   [x0]
323     //   [x0, #0]
324     //   [x0, xzr, LSL 2]
325     Mov(xd, base);
326   } else if (addr.IsScalarPlusImmediate()) {
327     // For example:
328     //   [x0, #42]
329     //   [x0, #42, MUL VL]
330     int64_t offset = addr.GetImmediateOffset();
331     VIXL_ASSERT(offset != 0);  // Handled by IsEquivalentToScalar.
332     if (addr.IsMulVl()) {
333       int vl_divisor = 1 << vl_divisor_log2;
334       // For all possible values of vl_divisor, we can simply use `Addpl`. This
335       // will select `addvl` if necessary.
336       VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0);
337       Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor));
338     } else {
339       // IsScalarPlusImmediate() ensures that no other modifiers can occur.
340       VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
341       Add(xd, base, offset);
342     }
343   } else if (addr.IsScalarPlusScalar()) {
344     // For example:
345     //   [x0, x1]
346     //   [x0, x1, LSL #4]
347     Register offset = addr.GetScalarOffset();
348     VIXL_ASSERT(!offset.IsZero());  // Handled by IsEquivalentToScalar.
349     if (mod == SVE_LSL) {
350       Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount()));
351     } else {
352       // IsScalarPlusScalar() ensures that no other modifiers can occur.
353       VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
354       Add(xd, base, offset);
355     }
356   } else {
357     // All other forms are scatter-gather addresses, which cannot be evaluated
358     // into an X register.
359     VIXL_UNREACHABLE();
360   }
361 }
362 
Cpy(const ZRegister & zd,const PRegister & pg,IntegerOperand imm)363 void MacroAssembler::Cpy(const ZRegister& zd,
364                          const PRegister& pg,
365                          IntegerOperand imm) {
366   VIXL_ASSERT(allow_macro_instructions_);
367   VIXL_ASSERT(imm.FitsInLane(zd));
368   int imm8;
369   int shift;
370   if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
371       imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
372     SingleEmissionCheckScope guard(this);
373     cpy(zd, pg, imm8, shift);
374     return;
375   }
376 
377   // The fallbacks rely on `cpy` variants that only support merging predication.
378   // If zeroing predication was requested, zero the destination first.
379   if (pg.IsZeroing()) {
380     SingleEmissionCheckScope guard(this);
381     dup(zd, 0);
382   }
383   PRegisterM pg_m = pg.Merging();
384 
385   // Try to encode the immediate using fcpy.
386   VIXL_ASSERT(imm.FitsInLane(zd));
387   if (zd.GetLaneSizeInBits() >= kHRegSize) {
388     double fp_imm = 0.0;
389     switch (zd.GetLaneSizeInBits()) {
390       case kHRegSize:
391         fp_imm =
392             FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN);
393         break;
394       case kSRegSize:
395         fp_imm = RawbitsToFloat(imm.AsUint32());
396         break;
397       case kDRegSize:
398         fp_imm = RawbitsToDouble(imm.AsUint64());
399         break;
400       default:
401         VIXL_UNREACHABLE();
402         break;
403     }
404     // IsImmFP64 is equivalent to IsImmFP<n> for the same arithmetic value, so
405     // we can use IsImmFP64 for all lane sizes.
406     if (IsImmFP64(fp_imm)) {
407       SingleEmissionCheckScope guard(this);
408       fcpy(zd, pg_m, fp_imm);
409       return;
410     }
411   }
412 
413   // Fall back to using a scratch register.
414   UseScratchRegisterScope temps(this);
415   Register scratch = temps.AcquireRegisterToHoldLane(zd);
416   Mov(scratch, imm);
417 
418   SingleEmissionCheckScope guard(this);
419   cpy(zd, pg_m, scratch);
420 }
421 
422 // TODO: We implement Fcpy (amongst other things) for all FP types because it
423 // allows us to preserve user-specified NaNs. We should come up with some
424 // FPImmediate type to abstract this, and avoid all the duplication below (and
425 // elsewhere).
426 
Fcpy(const ZRegister & zd,const PRegisterM & pg,double imm)427 void MacroAssembler::Fcpy(const ZRegister& zd,
428                           const PRegisterM& pg,
429                           double imm) {
430   VIXL_ASSERT(allow_macro_instructions_);
431   VIXL_ASSERT(pg.IsMerging());
432 
433   if (IsImmFP64(imm)) {
434     SingleEmissionCheckScope guard(this);
435     fcpy(zd, pg, imm);
436     return;
437   }
438 
439   // As a fall-back, cast the immediate to the required lane size, and try to
440   // encode the bit pattern using `Cpy`.
441   Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
442 }
443 
Fcpy(const ZRegister & zd,const PRegisterM & pg,float imm)444 void MacroAssembler::Fcpy(const ZRegister& zd,
445                           const PRegisterM& pg,
446                           float imm) {
447   VIXL_ASSERT(allow_macro_instructions_);
448   VIXL_ASSERT(pg.IsMerging());
449 
450   if (IsImmFP32(imm)) {
451     SingleEmissionCheckScope guard(this);
452     fcpy(zd, pg, imm);
453     return;
454   }
455 
456   // As a fall-back, cast the immediate to the required lane size, and try to
457   // encode the bit pattern using `Cpy`.
458   Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
459 }
460 
Fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)461 void MacroAssembler::Fcpy(const ZRegister& zd,
462                           const PRegisterM& pg,
463                           Float16 imm) {
464   VIXL_ASSERT(allow_macro_instructions_);
465   VIXL_ASSERT(pg.IsMerging());
466 
467   if (IsImmFP16(imm)) {
468     SingleEmissionCheckScope guard(this);
469     fcpy(zd, pg, imm);
470     return;
471   }
472 
473   // As a fall-back, cast the immediate to the required lane size, and try to
474   // encode the bit pattern using `Cpy`.
475   Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
476 }
477 
Dup(const ZRegister & zd,IntegerOperand imm)478 void MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) {
479   VIXL_ASSERT(allow_macro_instructions_);
480   VIXL_ASSERT(imm.FitsInLane(zd));
481   unsigned lane_size = zd.GetLaneSizeInBits();
482   int imm8;
483   int shift;
484   if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
485       imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
486     SingleEmissionCheckScope guard(this);
487     dup(zd, imm8, shift);
488   } else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) {
489     SingleEmissionCheckScope guard(this);
490     dupm(zd, imm.AsUintN(lane_size));
491   } else {
492     UseScratchRegisterScope temps(this);
493     Register scratch = temps.AcquireRegisterToHoldLane(zd);
494     Mov(scratch, imm);
495 
496     SingleEmissionCheckScope guard(this);
497     dup(zd, scratch);
498   }
499 }
500 
NoncommutativeArithmeticHelper(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,SVEArithPredicatedFn fn,SVEArithPredicatedFn rev_fn)501 void MacroAssembler::NoncommutativeArithmeticHelper(
502     const ZRegister& zd,
503     const PRegisterM& pg,
504     const ZRegister& zn,
505     const ZRegister& zm,
506     SVEArithPredicatedFn fn,
507     SVEArithPredicatedFn rev_fn) {
508   if (zd.Aliases(zn)) {
509     // E.g. zd = zd / zm
510     SingleEmissionCheckScope guard(this);
511     (this->*fn)(zd, pg, zn, zm);
512   } else if (zd.Aliases(zm)) {
513     // E.g. zd = zn / zd
514     SingleEmissionCheckScope guard(this);
515     (this->*rev_fn)(zd, pg, zm, zn);
516   } else {
517     // E.g. zd = zn / zm
518     MovprfxHelperScope guard(this, zd, pg, zn);
519     (this->*fn)(zd, pg, zd, zm);
520   }
521 }
522 
FPCommutativeArithmeticHelper(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,SVEArithPredicatedFn fn,FPMacroNaNPropagationOption nan_option)523 void MacroAssembler::FPCommutativeArithmeticHelper(
524     const ZRegister& zd,
525     const PRegisterM& pg,
526     const ZRegister& zn,
527     const ZRegister& zm,
528     SVEArithPredicatedFn fn,
529     FPMacroNaNPropagationOption nan_option) {
530   ResolveFPNaNPropagationOption(&nan_option);
531 
532   if (zd.Aliases(zn)) {
533     SingleEmissionCheckScope guard(this);
534     (this->*fn)(zd, pg, zd, zm);
535   } else if (zd.Aliases(zm)) {
536     switch (nan_option) {
537       case FastNaNPropagation: {
538         // Swap the arguments.
539         SingleEmissionCheckScope guard(this);
540         (this->*fn)(zd, pg, zd, zn);
541         return;
542       }
543       case StrictNaNPropagation: {
544         UseScratchRegisterScope temps(this);
545         // Use a scratch register to keep the argument order exactly as
546         // specified.
547         ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
548         {
549           MovprfxHelperScope guard(this, scratch, pg, zn);
550           (this->*fn)(scratch, pg, scratch, zm);
551         }
552         Mov(zd, scratch);
553         return;
554       }
555       case NoFPMacroNaNPropagationSelected:
556         VIXL_UNREACHABLE();
557         return;
558     }
559   } else {
560     MovprfxHelperScope guard(this, zd, pg, zn);
561     (this->*fn)(zd, pg, zd, zm);
562   }
563 }
564 
565 // Instructions of the form "inst zda, zn, zm, #num", where they are
566 // non-commutative and no reversed form is provided.
567 #define VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(V) \
568   V(Cmla, cmla)                              \
569   V(Sqrdcmlah, sqrdcmlah)
570 
571 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                     \
572   void MacroAssembler::MASMFN(const ZRegister& zd,               \
573                               const ZRegister& za,               \
574                               const ZRegister& zn,               \
575                               const ZRegister& zm,               \
576                               int imm) {                         \
577     if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
578       UseScratchRegisterScope temps(this);                       \
579       VIXL_ASSERT(AreSameLaneSize(zn, zm));                      \
580       ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);  \
581       Mov(ztmp, zd.Aliases(zn) ? zn : zm);                       \
582       MovprfxHelperScope guard(this, zd, za);                    \
583       ASMFN(zd,                                                  \
584             (zd.Aliases(zn) ? ztmp : zn),                        \
585             (zd.Aliases(zm) ? ztmp : zm),                        \
586             imm);                                                \
587     } else {                                                     \
588       MovprfxHelperScope guard(this, zd, za);                    \
589       ASMFN(zd, zn, zm, imm);                                    \
590     }                                                            \
591   }
592 VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(VIXL_DEFINE_MASM_FUNC)
593 #undef VIXL_DEFINE_MASM_FUNC
594 
595 // Instructions of the form "inst zda, zn, zm, #num, #num", where they are
596 // non-commutative and no reversed form is provided.
597 #define VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(V) \
598   V(Cmla, cmla)                               \
599   V(Sqrdcmlah, sqrdcmlah)
600 
601 // This doesn't handle zm when it's out of the range that can be encoded in
602 // instruction. The range depends on element size: z0-z7 for H, z0-15 for S.
603 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                     \
604   void MacroAssembler::MASMFN(const ZRegister& zd,               \
605                               const ZRegister& za,               \
606                               const ZRegister& zn,               \
607                               const ZRegister& zm,               \
608                               int index,                         \
609                               int rot) {                         \
610     if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
611       UseScratchRegisterScope temps(this);                       \
612       ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);  \
613       {                                                          \
614         MovprfxHelperScope guard(this, ztmp, za);                \
615         ASMFN(ztmp, zn, zm, index, rot);                         \
616       }                                                          \
617       Mov(zd, ztmp);                                             \
618     } else {                                                     \
619       MovprfxHelperScope guard(this, zd, za);                    \
620       ASMFN(zd, zn, zm, index, rot);                             \
621     }                                                            \
622   }
VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC)623 VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC)
624 #undef VIXL_DEFINE_MASM_FUNC
625 
626 // Instructions of the form "inst zda, pg, zda, zn", where they are
627 // non-commutative and no reversed form is provided.
628 #define VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(V) \
629   V(Addp, addp)                             \
630   V(Faddp, faddp)                           \
631   V(Fmaxnmp, fmaxnmp)                       \
632   V(Fminnmp, fminnmp)                       \
633   V(Fmaxp, fmaxp)                           \
634   V(Fminp, fminp)                           \
635   V(Fscale, fscale)                         \
636   V(Smaxp, smaxp)                           \
637   V(Sminp, sminp)                           \
638   V(Suqadd, suqadd)                         \
639   V(Umaxp, umaxp)                           \
640   V(Uminp, uminp)                           \
641   V(Usqadd, usqadd)
642 
643 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                       \
644   void MacroAssembler::MASMFN(const ZRegister& zd,                 \
645                               const PRegisterM& pg,                \
646                               const ZRegister& zn,                 \
647                               const ZRegister& zm) {               \
648     VIXL_ASSERT(allow_macro_instructions_);                        \
649     if (zd.Aliases(zm) && !zd.Aliases(zn)) {                       \
650       UseScratchRegisterScope temps(this);                         \
651       ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); \
652       Mov(scratch, zm);                                            \
653       MovprfxHelperScope guard(this, zd, pg, zn);                  \
654       ASMFN(zd, pg, zd, scratch);                                  \
655     } else {                                                       \
656       MovprfxHelperScope guard(this, zd, pg, zn);                  \
657       ASMFN(zd, pg, zd, zm);                                       \
658     }                                                              \
659   }
660 VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
661 #undef VIXL_DEFINE_MASM_FUNC
662 
663 // Instructions of the form "inst zda, pg, zda, zn", where they are
664 // non-commutative and a reversed form is provided.
665 #define VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(V) \
666   V(Asr, asr)                                       \
667   V(Fdiv, fdiv)                                     \
668   V(Fsub, fsub)                                     \
669   V(Lsl, lsl)                                       \
670   V(Lsr, lsr)                                       \
671   V(Sdiv, sdiv)                                     \
672   V(Shsub, shsub)                                   \
673   V(Sqrshl, sqrshl)                                 \
674   V(Sqshl, sqshl)                                   \
675   V(Sqsub, sqsub)                                   \
676   V(Srshl, srshl)                                   \
677   V(Sub, sub)                                       \
678   V(Udiv, udiv)                                     \
679   V(Uhsub, uhsub)                                   \
680   V(Uqrshl, uqrshl)                                 \
681   V(Uqshl, uqshl)                                   \
682   V(Uqsub, uqsub)                                   \
683   V(Urshl, urshl)
684 
685 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                          \
686   void MacroAssembler::MASMFN(const ZRegister& zd,                    \
687                               const PRegisterM& pg,                   \
688                               const ZRegister& zn,                    \
689                               const ZRegister& zm) {                  \
690     VIXL_ASSERT(allow_macro_instructions_);                           \
691     NoncommutativeArithmeticHelper(zd,                                \
692                                    pg,                                \
693                                    zn,                                \
694                                    zm,                                \
695                                    static_cast<SVEArithPredicatedFn>( \
696                                        &Assembler::ASMFN),            \
697                                    static_cast<SVEArithPredicatedFn>( \
698                                        &Assembler::ASMFN##r));        \
699   }
700 VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
701 #undef VIXL_DEFINE_MASM_FUNC
702 
703 void MacroAssembler::Fadd(const ZRegister& zd,
704                           const PRegisterM& pg,
705                           const ZRegister& zn,
706                           const ZRegister& zm,
707                           FPMacroNaNPropagationOption nan_option) {
708   VIXL_ASSERT(allow_macro_instructions_);
709   FPCommutativeArithmeticHelper(zd,
710                                 pg,
711                                 zn,
712                                 zm,
713                                 static_cast<SVEArithPredicatedFn>(
714                                     &Assembler::fadd),
715                                 nan_option);
716 }
717 
Fabd(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)718 void MacroAssembler::Fabd(const ZRegister& zd,
719                           const PRegisterM& pg,
720                           const ZRegister& zn,
721                           const ZRegister& zm,
722                           FPMacroNaNPropagationOption nan_option) {
723   VIXL_ASSERT(allow_macro_instructions_);
724   FPCommutativeArithmeticHelper(zd,
725                                 pg,
726                                 zn,
727                                 zm,
728                                 static_cast<SVEArithPredicatedFn>(
729                                     &Assembler::fabd),
730                                 nan_option);
731 }
732 
Fmul(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)733 void MacroAssembler::Fmul(const ZRegister& zd,
734                           const PRegisterM& pg,
735                           const ZRegister& zn,
736                           const ZRegister& zm,
737                           FPMacroNaNPropagationOption nan_option) {
738   VIXL_ASSERT(allow_macro_instructions_);
739   FPCommutativeArithmeticHelper(zd,
740                                 pg,
741                                 zn,
742                                 zm,
743                                 static_cast<SVEArithPredicatedFn>(
744                                     &Assembler::fmul),
745                                 nan_option);
746 }
747 
Fmulx(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)748 void MacroAssembler::Fmulx(const ZRegister& zd,
749                            const PRegisterM& pg,
750                            const ZRegister& zn,
751                            const ZRegister& zm,
752                            FPMacroNaNPropagationOption nan_option) {
753   VIXL_ASSERT(allow_macro_instructions_);
754   FPCommutativeArithmeticHelper(zd,
755                                 pg,
756                                 zn,
757                                 zm,
758                                 static_cast<SVEArithPredicatedFn>(
759                                     &Assembler::fmulx),
760                                 nan_option);
761 }
762 
Fmax(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)763 void MacroAssembler::Fmax(const ZRegister& zd,
764                           const PRegisterM& pg,
765                           const ZRegister& zn,
766                           const ZRegister& zm,
767                           FPMacroNaNPropagationOption nan_option) {
768   VIXL_ASSERT(allow_macro_instructions_);
769   FPCommutativeArithmeticHelper(zd,
770                                 pg,
771                                 zn,
772                                 zm,
773                                 static_cast<SVEArithPredicatedFn>(
774                                     &Assembler::fmax),
775                                 nan_option);
776 }
777 
Fmin(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)778 void MacroAssembler::Fmin(const ZRegister& zd,
779                           const PRegisterM& pg,
780                           const ZRegister& zn,
781                           const ZRegister& zm,
782                           FPMacroNaNPropagationOption nan_option) {
783   VIXL_ASSERT(allow_macro_instructions_);
784   FPCommutativeArithmeticHelper(zd,
785                                 pg,
786                                 zn,
787                                 zm,
788                                 static_cast<SVEArithPredicatedFn>(
789                                     &Assembler::fmin),
790                                 nan_option);
791 }
792 
Fmaxnm(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)793 void MacroAssembler::Fmaxnm(const ZRegister& zd,
794                             const PRegisterM& pg,
795                             const ZRegister& zn,
796                             const ZRegister& zm,
797                             FPMacroNaNPropagationOption nan_option) {
798   VIXL_ASSERT(allow_macro_instructions_);
799   FPCommutativeArithmeticHelper(zd,
800                                 pg,
801                                 zn,
802                                 zm,
803                                 static_cast<SVEArithPredicatedFn>(
804                                     &Assembler::fmaxnm),
805                                 nan_option);
806 }
807 
Fminnm(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)808 void MacroAssembler::Fminnm(const ZRegister& zd,
809                             const PRegisterM& pg,
810                             const ZRegister& zn,
811                             const ZRegister& zm,
812                             FPMacroNaNPropagationOption nan_option) {
813   VIXL_ASSERT(allow_macro_instructions_);
814   FPCommutativeArithmeticHelper(zd,
815                                 pg,
816                                 zn,
817                                 zm,
818                                 static_cast<SVEArithPredicatedFn>(
819                                     &Assembler::fminnm),
820                                 nan_option);
821 }
822 
Fdup(const ZRegister & zd,double imm)823 void MacroAssembler::Fdup(const ZRegister& zd, double imm) {
824   VIXL_ASSERT(allow_macro_instructions_);
825 
826   switch (zd.GetLaneSizeInBits()) {
827     case kHRegSize:
828       Fdup(zd, Float16(imm));
829       break;
830     case kSRegSize:
831       Fdup(zd, static_cast<float>(imm));
832       break;
833     case kDRegSize:
834       if (IsImmFP64(imm)) {
835         SingleEmissionCheckScope guard(this);
836         fdup(zd, imm);
837       } else {
838         Dup(zd, DoubleToRawbits(imm));
839       }
840       break;
841   }
842 }
843 
Fdup(const ZRegister & zd,float imm)844 void MacroAssembler::Fdup(const ZRegister& zd, float imm) {
845   VIXL_ASSERT(allow_macro_instructions_);
846 
847   switch (zd.GetLaneSizeInBits()) {
848     case kHRegSize:
849       Fdup(zd, Float16(imm));
850       break;
851     case kSRegSize:
852       if (IsImmFP32(imm)) {
853         SingleEmissionCheckScope guard(this);
854         fdup(zd, imm);
855       } else {
856         Dup(zd, FloatToRawbits(imm));
857       }
858       break;
859     case kDRegSize:
860       Fdup(zd, static_cast<double>(imm));
861       break;
862   }
863 }
864 
Fdup(const ZRegister & zd,Float16 imm)865 void MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) {
866   VIXL_ASSERT(allow_macro_instructions_);
867 
868   switch (zd.GetLaneSizeInBits()) {
869     case kHRegSize:
870       if (IsImmFP16(imm)) {
871         SingleEmissionCheckScope guard(this);
872         fdup(zd, imm);
873       } else {
874         Dup(zd, Float16ToRawbits(imm));
875       }
876       break;
877     case kSRegSize:
878       Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN));
879       break;
880     case kDRegSize:
881       Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
882       break;
883   }
884 }
885 
Index(const ZRegister & zd,const Operand & start,const Operand & step)886 void MacroAssembler::Index(const ZRegister& zd,
887                            const Operand& start,
888                            const Operand& step) {
889   class IndexOperand : public Operand {
890    public:
891     static IndexOperand Prepare(MacroAssembler* masm,
892                                 UseScratchRegisterScope* temps,
893                                 const Operand& op,
894                                 const ZRegister& zd_inner) {
895       // Look for encodable immediates.
896       int imm;
897       if (op.IsImmediate()) {
898         if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd_inner, &imm)) {
899           return IndexOperand(imm);
900         }
901         Register scratch = temps->AcquireRegisterToHoldLane(zd_inner);
902         masm->Mov(scratch, op);
903         return IndexOperand(scratch);
904       } else {
905         // Plain registers can be encoded directly.
906         VIXL_ASSERT(op.IsPlainRegister());
907         return IndexOperand(op.GetRegister());
908       }
909     }
910 
911     int GetImm5() const {
912       int64_t imm = GetImmediate();
913       VIXL_ASSERT(IsInt5(imm));
914       return static_cast<int>(imm);
915     }
916 
917    private:
918     explicit IndexOperand(const Register& reg) : Operand(reg) {}
919     explicit IndexOperand(int64_t imm) : Operand(imm) {}
920   };
921 
922   UseScratchRegisterScope temps(this);
923   IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd);
924   IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd);
925 
926   SingleEmissionCheckScope guard(this);
927   if (start_enc.IsImmediate()) {
928     if (step_enc.IsImmediate()) {
929       index(zd, start_enc.GetImm5(), step_enc.GetImm5());
930     } else {
931       index(zd, start_enc.GetImm5(), step_enc.GetRegister());
932     }
933   } else {
934     if (step_enc.IsImmediate()) {
935       index(zd, start_enc.GetRegister(), step_enc.GetImm5());
936     } else {
937       index(zd, start_enc.GetRegister(), step_enc.GetRegister());
938     }
939   }
940 }
941 
Insr(const ZRegister & zdn,IntegerOperand imm)942 void MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) {
943   VIXL_ASSERT(allow_macro_instructions_);
944   VIXL_ASSERT(imm.FitsInLane(zdn));
945 
946   if (imm.IsZero()) {
947     SingleEmissionCheckScope guard(this);
948     insr(zdn, xzr);
949     return;
950   }
951 
952   UseScratchRegisterScope temps(this);
953   Register scratch = temps.AcquireRegisterToHoldLane(zdn);
954 
955   // TODO: There are many cases where we could optimise immediates, such as by
956   // detecting repeating patterns or FP immediates. We should optimise and
957   // abstract this for use in other SVE mov-immediate-like macros.
958   Mov(scratch, imm);
959 
960   SingleEmissionCheckScope guard(this);
961   insr(zdn, scratch);
962 }
963 
Mla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)964 void MacroAssembler::Mla(const ZRegister& zd,
965                          const PRegisterM& pg,
966                          const ZRegister& za,
967                          const ZRegister& zn,
968                          const ZRegister& zm) {
969   VIXL_ASSERT(allow_macro_instructions_);
970   if (zd.Aliases(za)) {
971     // zda = zda + (zn * zm)
972     SingleEmissionCheckScope guard(this);
973     mla(zd, pg, zn, zm);
974   } else if (zd.Aliases(zn)) {
975     // zdn = za + (zdn * zm)
976     SingleEmissionCheckScope guard(this);
977     mad(zd, pg, zm, za);
978   } else if (zd.Aliases(zm)) {
979     // Multiplication is commutative, so we can swap zn and zm.
980     // zdm = za + (zdm * zn)
981     SingleEmissionCheckScope guard(this);
982     mad(zd, pg, zn, za);
983   } else {
984     // zd = za + (zn * zm)
985     ExactAssemblyScope guard(this, 2 * kInstructionSize);
986     movprfx(zd, pg, za);
987     mla(zd, pg, zn, zm);
988   }
989 }
990 
Mls(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)991 void MacroAssembler::Mls(const ZRegister& zd,
992                          const PRegisterM& pg,
993                          const ZRegister& za,
994                          const ZRegister& zn,
995                          const ZRegister& zm) {
996   VIXL_ASSERT(allow_macro_instructions_);
997   if (zd.Aliases(za)) {
998     // zda = zda - (zn * zm)
999     SingleEmissionCheckScope guard(this);
1000     mls(zd, pg, zn, zm);
1001   } else if (zd.Aliases(zn)) {
1002     // zdn = za - (zdn * zm)
1003     SingleEmissionCheckScope guard(this);
1004     msb(zd, pg, zm, za);
1005   } else if (zd.Aliases(zm)) {
1006     // Multiplication is commutative, so we can swap zn and zm.
1007     // zdm = za - (zdm * zn)
1008     SingleEmissionCheckScope guard(this);
1009     msb(zd, pg, zn, za);
1010   } else {
1011     // zd = za - (zn * zm)
1012     ExactAssemblyScope guard(this, 2 * kInstructionSize);
1013     movprfx(zd, pg, za);
1014     mls(zd, pg, zn, zm);
1015   }
1016 }
1017 
CompareHelper(Condition cond,const PRegisterWithLaneSize & pd,const PRegisterZ & pg,const ZRegister & zn,IntegerOperand imm)1018 void MacroAssembler::CompareHelper(Condition cond,
1019                                    const PRegisterWithLaneSize& pd,
1020                                    const PRegisterZ& pg,
1021                                    const ZRegister& zn,
1022                                    IntegerOperand imm) {
1023   UseScratchRegisterScope temps(this);
1024   ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1025   Dup(zm, imm);
1026   SingleEmissionCheckScope guard(this);
1027   cmp(cond, pd, pg, zn, zm);
1028 }
1029 
Pfirst(const PRegisterWithLaneSize & pd,const PRegister & pg,const PRegisterWithLaneSize & pn)1030 void MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd,
1031                             const PRegister& pg,
1032                             const PRegisterWithLaneSize& pn) {
1033   VIXL_ASSERT(allow_macro_instructions_);
1034   VIXL_ASSERT(pd.IsLaneSizeB());
1035   VIXL_ASSERT(pn.IsLaneSizeB());
1036   if (pd.Is(pn)) {
1037     SingleEmissionCheckScope guard(this);
1038     pfirst(pd, pg, pn);
1039   } else {
1040     UseScratchRegisterScope temps(this);
1041     PRegister temp_pg = pg;
1042     if (pd.Aliases(pg)) {
1043       temp_pg = temps.AcquireP();
1044       Mov(temp_pg.VnB(), pg.VnB());
1045     }
1046     Mov(pd, pn);
1047     SingleEmissionCheckScope guard(this);
1048     pfirst(pd, temp_pg, pd);
1049   }
1050 }
1051 
Pnext(const PRegisterWithLaneSize & pd,const PRegister & pg,const PRegisterWithLaneSize & pn)1052 void MacroAssembler::Pnext(const PRegisterWithLaneSize& pd,
1053                            const PRegister& pg,
1054                            const PRegisterWithLaneSize& pn) {
1055   VIXL_ASSERT(allow_macro_instructions_);
1056   VIXL_ASSERT(AreSameFormat(pd, pn));
1057   if (pd.Is(pn)) {
1058     SingleEmissionCheckScope guard(this);
1059     pnext(pd, pg, pn);
1060   } else {
1061     UseScratchRegisterScope temps(this);
1062     PRegister temp_pg = pg;
1063     if (pd.Aliases(pg)) {
1064       temp_pg = temps.AcquireP();
1065       Mov(temp_pg.VnB(), pg.VnB());
1066     }
1067     Mov(pd.VnB(), pn.VnB());
1068     SingleEmissionCheckScope guard(this);
1069     pnext(pd, temp_pg, pd);
1070   }
1071 }
1072 
Ptrue(const PRegisterWithLaneSize & pd,SVEPredicateConstraint pattern,FlagsUpdate s)1073 void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd,
1074                            SVEPredicateConstraint pattern,
1075                            FlagsUpdate s) {
1076   VIXL_ASSERT(allow_macro_instructions_);
1077   switch (s) {
1078     case LeaveFlags:
1079       Ptrue(pd, pattern);
1080       return;
1081     case SetFlags:
1082       Ptrues(pd, pattern);
1083       return;
1084   }
1085   VIXL_UNREACHABLE();
1086 }
1087 
Sub(const ZRegister & zd,IntegerOperand imm,const ZRegister & zm)1088 void MacroAssembler::Sub(const ZRegister& zd,
1089                          IntegerOperand imm,
1090                          const ZRegister& zm) {
1091   VIXL_ASSERT(allow_macro_instructions_);
1092 
1093   int imm8;
1094   int shift = -1;
1095   if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
1096       imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
1097     MovprfxHelperScope guard(this, zd, zm);
1098     subr(zd, zd, imm8, shift);
1099   } else {
1100     UseScratchRegisterScope temps(this);
1101     ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits());
1102     Dup(scratch, imm);
1103 
1104     SingleEmissionCheckScope guard(this);
1105     sub(zd, scratch, zm);
1106   }
1107 }
1108 
SVELoadBroadcastImmHelper(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr,SVELoadBroadcastFn fn,int divisor)1109 void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt,
1110                                                const PRegisterZ& pg,
1111                                                const SVEMemOperand& addr,
1112                                                SVELoadBroadcastFn fn,
1113                                                int divisor) {
1114   VIXL_ASSERT(addr.IsScalarPlusImmediate());
1115   int64_t imm = addr.GetImmediateOffset();
1116   if ((imm % divisor == 0) && IsUint6(imm / divisor)) {
1117     SingleEmissionCheckScope guard(this);
1118     (this->*fn)(zt, pg, addr);
1119   } else {
1120     UseScratchRegisterScope temps(this);
1121     Register scratch = temps.AcquireX();
1122     CalculateSVEAddress(scratch, addr, zt);
1123     SingleEmissionCheckScope guard(this);
1124     (this->*fn)(zt, pg, SVEMemOperand(scratch));
1125   }
1126 }
1127 
SVELoadStoreScalarImmHelper(const CPURegister & rt,const SVEMemOperand & addr,SVELoadStoreFn fn)1128 void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt,
1129                                                  const SVEMemOperand& addr,
1130                                                  SVELoadStoreFn fn) {
1131   VIXL_ASSERT(allow_macro_instructions_);
1132   VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister());
1133 
1134   if (addr.IsPlainScalar() ||
1135       (addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) &&
1136        addr.IsMulVl())) {
1137     SingleEmissionCheckScope guard(this);
1138     (this->*fn)(rt, addr);
1139     return;
1140   }
1141 
1142   if (addr.IsEquivalentToScalar()) {
1143     SingleEmissionCheckScope guard(this);
1144     (this->*fn)(rt, SVEMemOperand(addr.GetScalarBase()));
1145     return;
1146   }
1147 
1148   UseScratchRegisterScope temps(this);
1149   Register scratch = temps.AcquireX();
1150   CalculateSVEAddress(scratch, addr, rt);
1151   SingleEmissionCheckScope guard(this);
1152   (this->*fn)(rt, SVEMemOperand(scratch));
1153 }
1154 
1155 template <typename Tg, typename Tf>
SVELoadStoreNTBroadcastQOHelper(const ZRegister & zt,const Tg & pg,const SVEMemOperand & addr,Tf fn,int imm_bits,int shift_amount,SVEOffsetModifier supported_modifier,int vl_divisor_log2)1156 void MacroAssembler::SVELoadStoreNTBroadcastQOHelper(
1157     const ZRegister& zt,
1158     const Tg& pg,
1159     const SVEMemOperand& addr,
1160     Tf fn,
1161     int imm_bits,
1162     int shift_amount,
1163     SVEOffsetModifier supported_modifier,
1164     int vl_divisor_log2) {
1165   VIXL_ASSERT(allow_macro_instructions_);
1166   int imm_divisor = 1 << shift_amount;
1167 
1168   if (addr.IsPlainScalar() ||
1169       (addr.IsScalarPlusImmediate() &&
1170        IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) &&
1171        ((addr.GetImmediateOffset() % imm_divisor) == 0) &&
1172        (addr.GetOffsetModifier() == supported_modifier))) {
1173     SingleEmissionCheckScope guard(this);
1174     (this->*fn)(zt, pg, addr);
1175     return;
1176   }
1177 
1178   if (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
1179       addr.IsEquivalentToLSL(zt.GetLaneSizeInBytesLog2())) {
1180     SingleEmissionCheckScope guard(this);
1181     (this->*fn)(zt, pg, addr);
1182     return;
1183   }
1184 
1185   if (addr.IsEquivalentToScalar()) {
1186     SingleEmissionCheckScope guard(this);
1187     (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
1188     return;
1189   }
1190 
1191   if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) &&
1192       (vl_divisor_log2 == -1)) {
1193     // We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL
1194     // dependent.
1195     VIXL_UNIMPLEMENTED();
1196   }
1197 
1198   UseScratchRegisterScope temps(this);
1199   Register scratch = temps.AcquireX();
1200   CalculateSVEAddress(scratch, addr, vl_divisor_log2);
1201   SingleEmissionCheckScope guard(this);
1202   (this->*fn)(zt, pg, SVEMemOperand(scratch));
1203 }
1204 
1205 template <typename Tg, typename Tf>
SVELoadStore1Helper(int msize_in_bytes_log2,const ZRegister & zt,const Tg & pg,const SVEMemOperand & addr,Tf fn)1206 void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2,
1207                                          const ZRegister& zt,
1208                                          const Tg& pg,
1209                                          const SVEMemOperand& addr,
1210                                          Tf fn) {
1211   if (addr.IsPlainScalar() ||
1212       (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
1213        addr.IsEquivalentToLSL(msize_in_bytes_log2)) ||
1214       (addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) &&
1215        addr.IsMulVl())) {
1216     SingleEmissionCheckScope guard(this);
1217     (this->*fn)(zt, pg, addr);
1218     return;
1219   }
1220 
1221   if (addr.IsEquivalentToScalar()) {
1222     SingleEmissionCheckScope guard(this);
1223     (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
1224     return;
1225   }
1226 
1227   if (addr.IsVectorPlusImmediate()) {
1228     uint64_t offset = addr.GetImmediateOffset();
1229     if (IsMultiple(offset, (1 << msize_in_bytes_log2)) &&
1230         IsUint5(offset >> msize_in_bytes_log2)) {
1231       SingleEmissionCheckScope guard(this);
1232       (this->*fn)(zt, pg, addr);
1233       return;
1234     }
1235   }
1236 
1237   if (addr.IsScalarPlusVector()) {
1238     VIXL_ASSERT(addr.IsScatterGather());
1239     SingleEmissionCheckScope guard(this);
1240     (this->*fn)(zt, pg, addr);
1241     return;
1242   }
1243 
1244   UseScratchRegisterScope temps(this);
1245   if (addr.IsScatterGather()) {
1246     // In scatter-gather modes, zt and zn/zm have the same lane size. However,
1247     // for 32-bit accesses, the result of each lane's address calculation still
1248     // requires 64 bits; we can't naively use `Adr` for the address calculation
1249     // because it would truncate each address to 32 bits.
1250 
1251     if (addr.IsVectorPlusImmediate()) {
1252       // Synthesise the immediate in an X register, then use a
1253       // scalar-plus-vector access with the original vector.
1254       Register scratch = temps.AcquireX();
1255       Mov(scratch, addr.GetImmediateOffset());
1256       SingleEmissionCheckScope guard(this);
1257       SVEOffsetModifier om =
1258           zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER;
1259       (this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om));
1260       return;
1261     }
1262 
1263     VIXL_UNIMPLEMENTED();
1264   } else {
1265     Register scratch = temps.AcquireX();
1266     // TODO: If we have an immediate offset that is a multiple of
1267     // msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to
1268     // save an instruction.
1269     int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2;
1270     CalculateSVEAddress(scratch, addr, vl_divisor_log2);
1271     SingleEmissionCheckScope guard(this);
1272     (this->*fn)(zt, pg, SVEMemOperand(scratch));
1273   }
1274 }
1275 
1276 template <typename Tf>
SVELoadFFHelper(int msize_in_bytes_log2,const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr,Tf fn)1277 void MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2,
1278                                      const ZRegister& zt,
1279                                      const PRegisterZ& pg,
1280                                      const SVEMemOperand& addr,
1281                                      Tf fn) {
1282   if (addr.IsScatterGather()) {
1283     // Scatter-gather first-fault loads share encodings with normal loads.
1284     SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn);
1285     return;
1286   }
1287 
1288   // Contiguous first-faulting loads have no scalar-plus-immediate form at all,
1289   // so we don't do immediate synthesis.
1290 
1291   // We cannot currently distinguish "[x0]" from "[x0, #0]", and this
1292   // is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here.
1293   if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() &&
1294                                addr.IsEquivalentToLSL(msize_in_bytes_log2))) {
1295     SingleEmissionCheckScope guard(this);
1296     (this->*fn)(zt, pg, addr);
1297     return;
1298   }
1299 
1300   VIXL_UNIMPLEMENTED();
1301 }
1302 
Ld1b(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1303 void MacroAssembler::Ld1b(const ZRegister& zt,
1304                           const PRegisterZ& pg,
1305                           const SVEMemOperand& addr) {
1306   VIXL_ASSERT(allow_macro_instructions_);
1307   SVELoadStore1Helper(kBRegSizeInBytesLog2,
1308                       zt,
1309                       pg,
1310                       addr,
1311                       static_cast<SVELoad1Fn>(&Assembler::ld1b));
1312 }
1313 
Ld1h(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1314 void MacroAssembler::Ld1h(const ZRegister& zt,
1315                           const PRegisterZ& pg,
1316                           const SVEMemOperand& addr) {
1317   VIXL_ASSERT(allow_macro_instructions_);
1318   SVELoadStore1Helper(kHRegSizeInBytesLog2,
1319                       zt,
1320                       pg,
1321                       addr,
1322                       static_cast<SVELoad1Fn>(&Assembler::ld1h));
1323 }
1324 
Ld1w(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1325 void MacroAssembler::Ld1w(const ZRegister& zt,
1326                           const PRegisterZ& pg,
1327                           const SVEMemOperand& addr) {
1328   VIXL_ASSERT(allow_macro_instructions_);
1329   SVELoadStore1Helper(kWRegSizeInBytesLog2,
1330                       zt,
1331                       pg,
1332                       addr,
1333                       static_cast<SVELoad1Fn>(&Assembler::ld1w));
1334 }
1335 
Ld1d(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1336 void MacroAssembler::Ld1d(const ZRegister& zt,
1337                           const PRegisterZ& pg,
1338                           const SVEMemOperand& addr) {
1339   VIXL_ASSERT(allow_macro_instructions_);
1340   SVELoadStore1Helper(kDRegSizeInBytesLog2,
1341                       zt,
1342                       pg,
1343                       addr,
1344                       static_cast<SVELoad1Fn>(&Assembler::ld1d));
1345 }
1346 
Ld1sb(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1347 void MacroAssembler::Ld1sb(const ZRegister& zt,
1348                            const PRegisterZ& pg,
1349                            const SVEMemOperand& addr) {
1350   VIXL_ASSERT(allow_macro_instructions_);
1351   SVELoadStore1Helper(kBRegSizeInBytesLog2,
1352                       zt,
1353                       pg,
1354                       addr,
1355                       static_cast<SVELoad1Fn>(&Assembler::ld1sb));
1356 }
1357 
Ld1sh(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1358 void MacroAssembler::Ld1sh(const ZRegister& zt,
1359                            const PRegisterZ& pg,
1360                            const SVEMemOperand& addr) {
1361   VIXL_ASSERT(allow_macro_instructions_);
1362   SVELoadStore1Helper(kHRegSizeInBytesLog2,
1363                       zt,
1364                       pg,
1365                       addr,
1366                       static_cast<SVELoad1Fn>(&Assembler::ld1sh));
1367 }
1368 
Ld1sw(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1369 void MacroAssembler::Ld1sw(const ZRegister& zt,
1370                            const PRegisterZ& pg,
1371                            const SVEMemOperand& addr) {
1372   VIXL_ASSERT(allow_macro_instructions_);
1373   SVELoadStore1Helper(kSRegSizeInBytesLog2,
1374                       zt,
1375                       pg,
1376                       addr,
1377                       static_cast<SVELoad1Fn>(&Assembler::ld1sw));
1378 }
1379 
St1b(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1380 void MacroAssembler::St1b(const ZRegister& zt,
1381                           const PRegister& pg,
1382                           const SVEMemOperand& addr) {
1383   VIXL_ASSERT(allow_macro_instructions_);
1384   SVELoadStore1Helper(kBRegSizeInBytesLog2,
1385                       zt,
1386                       pg,
1387                       addr,
1388                       static_cast<SVEStore1Fn>(&Assembler::st1b));
1389 }
1390 
St1h(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1391 void MacroAssembler::St1h(const ZRegister& zt,
1392                           const PRegister& pg,
1393                           const SVEMemOperand& addr) {
1394   VIXL_ASSERT(allow_macro_instructions_);
1395   SVELoadStore1Helper(kHRegSizeInBytesLog2,
1396                       zt,
1397                       pg,
1398                       addr,
1399                       static_cast<SVEStore1Fn>(&Assembler::st1h));
1400 }
1401 
St1w(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1402 void MacroAssembler::St1w(const ZRegister& zt,
1403                           const PRegister& pg,
1404                           const SVEMemOperand& addr) {
1405   VIXL_ASSERT(allow_macro_instructions_);
1406   SVELoadStore1Helper(kSRegSizeInBytesLog2,
1407                       zt,
1408                       pg,
1409                       addr,
1410                       static_cast<SVEStore1Fn>(&Assembler::st1w));
1411 }
1412 
St1d(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1413 void MacroAssembler::St1d(const ZRegister& zt,
1414                           const PRegister& pg,
1415                           const SVEMemOperand& addr) {
1416   VIXL_ASSERT(allow_macro_instructions_);
1417   SVELoadStore1Helper(kDRegSizeInBytesLog2,
1418                       zt,
1419                       pg,
1420                       addr,
1421                       static_cast<SVEStore1Fn>(&Assembler::st1d));
1422 }
1423 
Ldff1b(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1424 void MacroAssembler::Ldff1b(const ZRegister& zt,
1425                             const PRegisterZ& pg,
1426                             const SVEMemOperand& addr) {
1427   VIXL_ASSERT(allow_macro_instructions_);
1428   SVELoadFFHelper(kBRegSizeInBytesLog2,
1429                   zt,
1430                   pg,
1431                   addr,
1432                   static_cast<SVELoad1Fn>(&Assembler::ldff1b));
1433 }
1434 
Ldff1h(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1435 void MacroAssembler::Ldff1h(const ZRegister& zt,
1436                             const PRegisterZ& pg,
1437                             const SVEMemOperand& addr) {
1438   VIXL_ASSERT(allow_macro_instructions_);
1439   SVELoadFFHelper(kHRegSizeInBytesLog2,
1440                   zt,
1441                   pg,
1442                   addr,
1443                   static_cast<SVELoad1Fn>(&Assembler::ldff1h));
1444 }
1445 
Ldff1w(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1446 void MacroAssembler::Ldff1w(const ZRegister& zt,
1447                             const PRegisterZ& pg,
1448                             const SVEMemOperand& addr) {
1449   VIXL_ASSERT(allow_macro_instructions_);
1450   SVELoadFFHelper(kSRegSizeInBytesLog2,
1451                   zt,
1452                   pg,
1453                   addr,
1454                   static_cast<SVELoad1Fn>(&Assembler::ldff1w));
1455 }
1456 
Ldff1d(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1457 void MacroAssembler::Ldff1d(const ZRegister& zt,
1458                             const PRegisterZ& pg,
1459                             const SVEMemOperand& addr) {
1460   VIXL_ASSERT(allow_macro_instructions_);
1461   SVELoadFFHelper(kDRegSizeInBytesLog2,
1462                   zt,
1463                   pg,
1464                   addr,
1465                   static_cast<SVELoad1Fn>(&Assembler::ldff1d));
1466 }
1467 
Ldff1sb(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1468 void MacroAssembler::Ldff1sb(const ZRegister& zt,
1469                              const PRegisterZ& pg,
1470                              const SVEMemOperand& addr) {
1471   VIXL_ASSERT(allow_macro_instructions_);
1472   SVELoadFFHelper(kBRegSizeInBytesLog2,
1473                   zt,
1474                   pg,
1475                   addr,
1476                   static_cast<SVELoad1Fn>(&Assembler::ldff1sb));
1477 }
1478 
Ldff1sh(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1479 void MacroAssembler::Ldff1sh(const ZRegister& zt,
1480                              const PRegisterZ& pg,
1481                              const SVEMemOperand& addr) {
1482   VIXL_ASSERT(allow_macro_instructions_);
1483   SVELoadFFHelper(kHRegSizeInBytesLog2,
1484                   zt,
1485                   pg,
1486                   addr,
1487                   static_cast<SVELoad1Fn>(&Assembler::ldff1sh));
1488 }
1489 
Ldff1sw(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1490 void MacroAssembler::Ldff1sw(const ZRegister& zt,
1491                              const PRegisterZ& pg,
1492                              const SVEMemOperand& addr) {
1493   VIXL_ASSERT(allow_macro_instructions_);
1494   SVELoadFFHelper(kSRegSizeInBytesLog2,
1495                   zt,
1496                   pg,
1497                   addr,
1498                   static_cast<SVELoad1Fn>(&Assembler::ldff1sw));
1499 }
1500 
1501 #define VIXL_SVE_LD1R_LIST(V) \
1502   V(qb, 4) V(qh, 4) V(qw, 4) V(qd, 4) V(ob, 5) V(oh, 5) V(ow, 5) V(od, 5)
1503 
1504 #define VIXL_DEFINE_MASM_FUNC(SZ, SH)                          \
1505   void MacroAssembler::Ld1r##SZ(const ZRegister& zt,           \
1506                                 const PRegisterZ& pg,          \
1507                                 const SVEMemOperand& addr) {   \
1508     VIXL_ASSERT(allow_macro_instructions_);                    \
1509     SVELoadStoreNTBroadcastQOHelper(zt,                        \
1510                                     pg,                        \
1511                                     addr,                      \
1512                                     &MacroAssembler::ld1r##SZ, \
1513                                     4,                         \
1514                                     SH,                        \
1515                                     NO_SVE_OFFSET_MODIFIER,    \
1516                                     -1);                       \
1517   }
1518 
VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC)1519 VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC)
1520 
1521 #undef VIXL_DEFINE_MASM_FUNC
1522 #undef VIXL_SVE_LD1R_LIST
1523 
1524 void MacroAssembler::Ldnt1b(const ZRegister& zt,
1525                             const PRegisterZ& pg,
1526                             const SVEMemOperand& addr) {
1527   VIXL_ASSERT(allow_macro_instructions_);
1528   if (addr.IsVectorPlusScalar()) {
1529     SingleEmissionCheckScope guard(this);
1530     ldnt1b(zt, pg, addr);
1531   } else {
1532     SVELoadStoreNTBroadcastQOHelper(zt,
1533                                     pg,
1534                                     addr,
1535                                     &MacroAssembler::ldnt1b,
1536                                     4,
1537                                     0,
1538                                     SVE_MUL_VL);
1539   }
1540 }
1541 
Ldnt1d(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1542 void MacroAssembler::Ldnt1d(const ZRegister& zt,
1543                             const PRegisterZ& pg,
1544                             const SVEMemOperand& addr) {
1545   VIXL_ASSERT(allow_macro_instructions_);
1546   if (addr.IsVectorPlusScalar()) {
1547     SingleEmissionCheckScope guard(this);
1548     ldnt1d(zt, pg, addr);
1549   } else {
1550     SVELoadStoreNTBroadcastQOHelper(zt,
1551                                     pg,
1552                                     addr,
1553                                     &MacroAssembler::ldnt1d,
1554                                     4,
1555                                     0,
1556                                     SVE_MUL_VL);
1557   }
1558 }
1559 
Ldnt1h(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1560 void MacroAssembler::Ldnt1h(const ZRegister& zt,
1561                             const PRegisterZ& pg,
1562                             const SVEMemOperand& addr) {
1563   VIXL_ASSERT(allow_macro_instructions_);
1564   if (addr.IsVectorPlusScalar()) {
1565     SingleEmissionCheckScope guard(this);
1566     ldnt1h(zt, pg, addr);
1567   } else {
1568     SVELoadStoreNTBroadcastQOHelper(zt,
1569                                     pg,
1570                                     addr,
1571                                     &MacroAssembler::ldnt1h,
1572                                     4,
1573                                     0,
1574                                     SVE_MUL_VL);
1575   }
1576 }
1577 
Ldnt1w(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1578 void MacroAssembler::Ldnt1w(const ZRegister& zt,
1579                             const PRegisterZ& pg,
1580                             const SVEMemOperand& addr) {
1581   VIXL_ASSERT(allow_macro_instructions_);
1582   if (addr.IsVectorPlusScalar()) {
1583     SingleEmissionCheckScope guard(this);
1584     ldnt1w(zt, pg, addr);
1585   } else {
1586     SVELoadStoreNTBroadcastQOHelper(zt,
1587                                     pg,
1588                                     addr,
1589                                     &MacroAssembler::ldnt1w,
1590                                     4,
1591                                     0,
1592                                     SVE_MUL_VL);
1593   }
1594 }
1595 
Stnt1b(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1596 void MacroAssembler::Stnt1b(const ZRegister& zt,
1597                             const PRegister& pg,
1598                             const SVEMemOperand& addr) {
1599   VIXL_ASSERT(allow_macro_instructions_);
1600   if (addr.IsVectorPlusScalar()) {
1601     SingleEmissionCheckScope guard(this);
1602     stnt1b(zt, pg, addr);
1603   } else {
1604     SVELoadStoreNTBroadcastQOHelper(zt,
1605                                     pg,
1606                                     addr,
1607                                     &MacroAssembler::stnt1b,
1608                                     4,
1609                                     0,
1610                                     SVE_MUL_VL);
1611   }
1612 }
Stnt1d(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1613 void MacroAssembler::Stnt1d(const ZRegister& zt,
1614                             const PRegister& pg,
1615                             const SVEMemOperand& addr) {
1616   VIXL_ASSERT(allow_macro_instructions_);
1617   if (addr.IsVectorPlusScalar()) {
1618     SingleEmissionCheckScope guard(this);
1619     stnt1d(zt, pg, addr);
1620   } else {
1621     SVELoadStoreNTBroadcastQOHelper(zt,
1622                                     pg,
1623                                     addr,
1624                                     &MacroAssembler::stnt1d,
1625                                     4,
1626                                     0,
1627                                     SVE_MUL_VL);
1628   }
1629 }
Stnt1h(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1630 void MacroAssembler::Stnt1h(const ZRegister& zt,
1631                             const PRegister& pg,
1632                             const SVEMemOperand& addr) {
1633   VIXL_ASSERT(allow_macro_instructions_);
1634   if (addr.IsVectorPlusScalar()) {
1635     SingleEmissionCheckScope guard(this);
1636     stnt1h(zt, pg, addr);
1637   } else {
1638     SVELoadStoreNTBroadcastQOHelper(zt,
1639                                     pg,
1640                                     addr,
1641                                     &MacroAssembler::stnt1h,
1642                                     4,
1643                                     0,
1644                                     SVE_MUL_VL);
1645   }
1646 }
Stnt1w(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1647 void MacroAssembler::Stnt1w(const ZRegister& zt,
1648                             const PRegister& pg,
1649                             const SVEMemOperand& addr) {
1650   VIXL_ASSERT(allow_macro_instructions_);
1651   if (addr.IsVectorPlusScalar()) {
1652     SingleEmissionCheckScope guard(this);
1653     stnt1w(zt, pg, addr);
1654   } else {
1655     SVELoadStoreNTBroadcastQOHelper(zt,
1656                                     pg,
1657                                     addr,
1658                                     &MacroAssembler::stnt1w,
1659                                     4,
1660                                     0,
1661                                     SVE_MUL_VL);
1662   }
1663 }
1664 
SVEDotIndexHelper(ZZZImmFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1665 void MacroAssembler::SVEDotIndexHelper(ZZZImmFn fn,
1666                                        const ZRegister& zd,
1667                                        const ZRegister& za,
1668                                        const ZRegister& zn,
1669                                        const ZRegister& zm,
1670                                        int index) {
1671   if (zd.Aliases(za)) {
1672     // zda = zda + (zn . zm)
1673     SingleEmissionCheckScope guard(this);
1674     (this->*fn)(zd, zn, zm, index);
1675 
1676   } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
1677     // zdn = za + (zdn . zm[index])
1678     // zdm = za + (zn . zdm[index])
1679     // zdnm = za + (zdnm . zdnm[index])
1680     UseScratchRegisterScope temps(this);
1681     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1682     {
1683       MovprfxHelperScope guard(this, scratch, za);
1684       (this->*fn)(scratch, zn, zm, index);
1685     }
1686 
1687     Mov(zd, scratch);
1688   } else {
1689     // zd = za + (zn . zm)
1690     MovprfxHelperScope guard(this, zd, za);
1691     (this->*fn)(zd, zn, zm, index);
1692   }
1693 }
1694 
FourRegDestructiveHelper(Int3ArithFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)1695 void MacroAssembler::FourRegDestructiveHelper(Int3ArithFn fn,
1696                                               const ZRegister& zd,
1697                                               const ZRegister& za,
1698                                               const ZRegister& zn,
1699                                               const ZRegister& zm) {
1700   if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1701     // zd = za . zd . zm
1702     // zd = za . zn . zd
1703     // zd = za . zd . zd
1704     UseScratchRegisterScope temps(this);
1705     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1706     {
1707       MovprfxHelperScope guard(this, scratch, za);
1708       (this->*fn)(scratch, zn, zm);
1709     }
1710 
1711     Mov(zd, scratch);
1712   } else {
1713     MovprfxHelperScope guard(this, zd, za);
1714     (this->*fn)(zd, zn, zm);
1715   }
1716 }
1717 
FourRegDestructiveHelper(Int4ArithFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)1718 void MacroAssembler::FourRegDestructiveHelper(Int4ArithFn fn,
1719                                               const ZRegister& zd,
1720                                               const ZRegister& za,
1721                                               const ZRegister& zn,
1722                                               const ZRegister& zm) {
1723   if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1724     // zd = za . zd . zm
1725     // zd = za . zn . zd
1726     // zd = za . zd . zd
1727     UseScratchRegisterScope temps(this);
1728     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1729     {
1730       MovprfxHelperScope guard(this, scratch, za);
1731       (this->*fn)(scratch, scratch, zn, zm);
1732     }
1733 
1734     Mov(zd, scratch);
1735   } else {
1736     MovprfxHelperScope guard(this, zd, za);
1737     (this->*fn)(zd, zd, zn, zm);
1738   }
1739 }
1740 
FourRegOneImmDestructiveHelper(ZZZImmFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int imm)1741 void MacroAssembler::FourRegOneImmDestructiveHelper(ZZZImmFn fn,
1742                                                     const ZRegister& zd,
1743                                                     const ZRegister& za,
1744                                                     const ZRegister& zn,
1745                                                     const ZRegister& zm,
1746                                                     int imm) {
1747   if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1748     // zd = za . zd . zm[i]
1749     // zd = za . zn . zd[i]
1750     // zd = za . zd . zd[i]
1751     UseScratchRegisterScope temps(this);
1752     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1753     {
1754       MovprfxHelperScope guard(this, scratch, za);
1755       (this->*fn)(scratch, zn, zm, imm);
1756     }
1757 
1758     Mov(zd, scratch);
1759   } else {
1760     // zd = za . zn . zm[i]
1761     MovprfxHelperScope guard(this, zd, za);
1762     (this->*fn)(zd, zn, zm, imm);
1763   }
1764 }
1765 
AbsoluteDifferenceAccumulate(Int3ArithFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)1766 void MacroAssembler::AbsoluteDifferenceAccumulate(Int3ArithFn fn,
1767                                                   const ZRegister& zd,
1768                                                   const ZRegister& za,
1769                                                   const ZRegister& zn,
1770                                                   const ZRegister& zm) {
1771   if (zn.Aliases(zm)) {
1772     // If zn == zm, the difference is zero.
1773     if (!zd.Aliases(za)) {
1774       Mov(zd, za);
1775     }
1776   } else if (zd.Aliases(za)) {
1777     SingleEmissionCheckScope guard(this);
1778     (this->*fn)(zd, zn, zm);
1779   } else if (zd.Aliases(zn)) {
1780     UseScratchRegisterScope temps(this);
1781     ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1782     Mov(ztmp, zn);
1783     MovprfxHelperScope guard(this, zd, za);
1784     (this->*fn)(zd, ztmp, zm);
1785   } else if (zd.Aliases(zm)) {
1786     UseScratchRegisterScope temps(this);
1787     ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1788     Mov(ztmp, zm);
1789     MovprfxHelperScope guard(this, zd, za);
1790     (this->*fn)(zd, zn, ztmp);
1791   } else {
1792     MovprfxHelperScope guard(this, zd, za);
1793     (this->*fn)(zd, zn, zm);
1794   }
1795 }
1796 
1797 #define VIXL_SVE_4REG_LIST(V)                       \
1798   V(Saba, saba, AbsoluteDifferenceAccumulate)       \
1799   V(Uaba, uaba, AbsoluteDifferenceAccumulate)       \
1800   V(Sabalb, sabalb, AbsoluteDifferenceAccumulate)   \
1801   V(Sabalt, sabalt, AbsoluteDifferenceAccumulate)   \
1802   V(Uabalb, uabalb, AbsoluteDifferenceAccumulate)   \
1803   V(Uabalt, uabalt, AbsoluteDifferenceAccumulate)   \
1804   V(Sdot, sdot, FourRegDestructiveHelper)           \
1805   V(Udot, udot, FourRegDestructiveHelper)           \
1806   V(Adclb, adclb, FourRegDestructiveHelper)         \
1807   V(Adclt, adclt, FourRegDestructiveHelper)         \
1808   V(Sbclb, sbclb, FourRegDestructiveHelper)         \
1809   V(Sbclt, sbclt, FourRegDestructiveHelper)         \
1810   V(Smlalb, smlalb, FourRegDestructiveHelper)       \
1811   V(Smlalt, smlalt, FourRegDestructiveHelper)       \
1812   V(Smlslb, smlslb, FourRegDestructiveHelper)       \
1813   V(Smlslt, smlslt, FourRegDestructiveHelper)       \
1814   V(Umlalb, umlalb, FourRegDestructiveHelper)       \
1815   V(Umlalt, umlalt, FourRegDestructiveHelper)       \
1816   V(Umlslb, umlslb, FourRegDestructiveHelper)       \
1817   V(Umlslt, umlslt, FourRegDestructiveHelper)       \
1818   V(Bcax, bcax, FourRegDestructiveHelper)           \
1819   V(Bsl, bsl, FourRegDestructiveHelper)             \
1820   V(Bsl1n, bsl1n, FourRegDestructiveHelper)         \
1821   V(Bsl2n, bsl2n, FourRegDestructiveHelper)         \
1822   V(Eor3, eor3, FourRegDestructiveHelper)           \
1823   V(Nbsl, nbsl, FourRegDestructiveHelper)           \
1824   V(Fmlalb, fmlalb, FourRegDestructiveHelper)       \
1825   V(Fmlalt, fmlalt, FourRegDestructiveHelper)       \
1826   V(Fmlslb, fmlslb, FourRegDestructiveHelper)       \
1827   V(Fmlslt, fmlslt, FourRegDestructiveHelper)       \
1828   V(Sqdmlalb, sqdmlalb, FourRegDestructiveHelper)   \
1829   V(Sqdmlalbt, sqdmlalbt, FourRegDestructiveHelper) \
1830   V(Sqdmlalt, sqdmlalt, FourRegDestructiveHelper)   \
1831   V(Sqdmlslb, sqdmlslb, FourRegDestructiveHelper)   \
1832   V(Sqdmlslbt, sqdmlslbt, FourRegDestructiveHelper) \
1833   V(Sqdmlslt, sqdmlslt, FourRegDestructiveHelper)   \
1834   V(Sqrdmlah, sqrdmlah, FourRegDestructiveHelper)   \
1835   V(Sqrdmlsh, sqrdmlsh, FourRegDestructiveHelper)   \
1836   V(Fmmla, fmmla, FourRegDestructiveHelper)         \
1837   V(Smmla, smmla, FourRegDestructiveHelper)         \
1838   V(Ummla, ummla, FourRegDestructiveHelper)         \
1839   V(Usmmla, usmmla, FourRegDestructiveHelper)       \
1840   V(Usdot, usdot, FourRegDestructiveHelper)
1841 
1842 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
1843   void MacroAssembler::MASMFN(const ZRegister& zd,   \
1844                               const ZRegister& za,   \
1845                               const ZRegister& zn,   \
1846                               const ZRegister& zm) { \
1847     VIXL_ASSERT(allow_macro_instructions_);          \
1848     HELPER(&Assembler::ASMFN, zd, za, zn, zm);       \
1849   }
1850 VIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC)
1851 #undef VIXL_DEFINE_MASM_FUNC
1852 
1853 #define VIXL_SVE_4REG_1IMM_LIST(V)                      \
1854   V(Fmla, fmla, FourRegOneImmDestructiveHelper)         \
1855   V(Fmls, fmls, FourRegOneImmDestructiveHelper)         \
1856   V(Fmlalb, fmlalb, FourRegOneImmDestructiveHelper)     \
1857   V(Fmlalt, fmlalt, FourRegOneImmDestructiveHelper)     \
1858   V(Fmlslb, fmlslb, FourRegOneImmDestructiveHelper)     \
1859   V(Fmlslt, fmlslt, FourRegOneImmDestructiveHelper)     \
1860   V(Mla, mla, FourRegOneImmDestructiveHelper)           \
1861   V(Mls, mls, FourRegOneImmDestructiveHelper)           \
1862   V(Smlalb, smlalb, FourRegOneImmDestructiveHelper)     \
1863   V(Smlalt, smlalt, FourRegOneImmDestructiveHelper)     \
1864   V(Smlslb, smlslb, FourRegOneImmDestructiveHelper)     \
1865   V(Smlslt, smlslt, FourRegOneImmDestructiveHelper)     \
1866   V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \
1867   V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \
1868   V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \
1869   V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \
1870   V(Sqrdmlah, sqrdmlah, FourRegOneImmDestructiveHelper) \
1871   V(Sqrdmlsh, sqrdmlsh, FourRegOneImmDestructiveHelper) \
1872   V(Umlalb, umlalb, FourRegOneImmDestructiveHelper)     \
1873   V(Umlalt, umlalt, FourRegOneImmDestructiveHelper)     \
1874   V(Umlslb, umlslb, FourRegOneImmDestructiveHelper)     \
1875   V(Umlslt, umlslt, FourRegOneImmDestructiveHelper)
1876 
1877 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
1878   void MacroAssembler::MASMFN(const ZRegister& zd,   \
1879                               const ZRegister& za,   \
1880                               const ZRegister& zn,   \
1881                               const ZRegister& zm,   \
1882                               int imm) {             \
1883     VIXL_ASSERT(allow_macro_instructions_);          \
1884     HELPER(&Assembler::ASMFN, zd, za, zn, zm, imm);  \
1885   }
VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC)1886 VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC)
1887 #undef VIXL_DEFINE_MASM_FUNC
1888 
1889 void MacroAssembler::Sdot(const ZRegister& zd,
1890                           const ZRegister& za,
1891                           const ZRegister& zn,
1892                           const ZRegister& zm,
1893                           int index) {
1894   VIXL_ASSERT(allow_macro_instructions_);
1895   SVEDotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index);
1896 }
1897 
Udot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1898 void MacroAssembler::Udot(const ZRegister& zd,
1899                           const ZRegister& za,
1900                           const ZRegister& zn,
1901                           const ZRegister& zm,
1902                           int index) {
1903   VIXL_ASSERT(allow_macro_instructions_);
1904   SVEDotIndexHelper(&Assembler::udot, zd, za, zn, zm, index);
1905 }
1906 
Sudot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1907 void MacroAssembler::Sudot(const ZRegister& zd,
1908                            const ZRegister& za,
1909                            const ZRegister& zn,
1910                            const ZRegister& zm,
1911                            int index) {
1912   VIXL_ASSERT(allow_macro_instructions_);
1913   SVEDotIndexHelper(&Assembler::sudot, zd, za, zn, zm, index);
1914 }
1915 
Usdot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1916 void MacroAssembler::Usdot(const ZRegister& zd,
1917                            const ZRegister& za,
1918                            const ZRegister& zn,
1919                            const ZRegister& zm,
1920                            int index) {
1921   VIXL_ASSERT(allow_macro_instructions_);
1922   SVEDotIndexHelper(&Assembler::usdot, zd, za, zn, zm, index);
1923 }
1924 
Cdot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index,int rot)1925 void MacroAssembler::Cdot(const ZRegister& zd,
1926                           const ZRegister& za,
1927                           const ZRegister& zn,
1928                           const ZRegister& zm,
1929                           int index,
1930                           int rot) {
1931   // This doesn't handle zm when it's out of the range that can be encoded in
1932   // instruction. The range depends on element size: z0-z7 for B, z0-15 for H.
1933   if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
1934     UseScratchRegisterScope temps(this);
1935     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
1936     {
1937       MovprfxHelperScope guard(this, ztmp, za);
1938       cdot(ztmp, zn, zm, index, rot);
1939     }
1940     Mov(zd, ztmp);
1941   } else {
1942     MovprfxHelperScope guard(this, zd, za);
1943     cdot(zd, zn, zm, index, rot);
1944   }
1945 }
1946 
Cdot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int rot)1947 void MacroAssembler::Cdot(const ZRegister& zd,
1948                           const ZRegister& za,
1949                           const ZRegister& zn,
1950                           const ZRegister& zm,
1951                           int rot) {
1952   if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
1953     UseScratchRegisterScope temps(this);
1954     VIXL_ASSERT(AreSameLaneSize(zn, zm));
1955     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
1956     Mov(ztmp, zd.Aliases(zn) ? zn : zm);
1957     MovprfxHelperScope guard(this, zd, za);
1958     cdot(zd, (zd.Aliases(zn) ? ztmp : zn), (zd.Aliases(zm) ? ztmp : zm), rot);
1959   } else {
1960     MovprfxHelperScope guard(this, zd, za);
1961     cdot(zd, zn, zm, rot);
1962   }
1963 }
1964 
FPMulAddHelper(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,SVEMulAddPredicatedZdaFn fn_zda,SVEMulAddPredicatedZdnFn fn_zdn,FPMacroNaNPropagationOption nan_option)1965 void MacroAssembler::FPMulAddHelper(const ZRegister& zd,
1966                                     const PRegisterM& pg,
1967                                     const ZRegister& za,
1968                                     const ZRegister& zn,
1969                                     const ZRegister& zm,
1970                                     SVEMulAddPredicatedZdaFn fn_zda,
1971                                     SVEMulAddPredicatedZdnFn fn_zdn,
1972                                     FPMacroNaNPropagationOption nan_option) {
1973   ResolveFPNaNPropagationOption(&nan_option);
1974 
1975   if (zd.Aliases(za)) {
1976     // zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
1977     SingleEmissionCheckScope guard(this);
1978     (this->*fn_zda)(zd, pg, zn, zm);
1979   } else if (zd.Aliases(zn)) {
1980     // zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb.
1981     SingleEmissionCheckScope guard(this);
1982     (this->*fn_zdn)(zd, pg, zm, za);
1983   } else if (zd.Aliases(zm)) {
1984     switch (nan_option) {
1985       case FastNaNPropagation: {
1986         // We treat multiplication as commutative in the fast mode, so we can
1987         // swap zn and zm.
1988         // zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb.
1989         SingleEmissionCheckScope guard(this);
1990         (this->*fn_zdn)(zd, pg, zn, za);
1991         return;
1992       }
1993       case StrictNaNPropagation: {
1994         UseScratchRegisterScope temps(this);
1995         // Use a scratch register to keep the argument order exactly as
1996         // specified.
1997         ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
1998         {
1999           MovprfxHelperScope guard(this, scratch, pg, za);
2000           // scratch = (-)za + ((-)zn * zm)
2001           (this->*fn_zda)(scratch, pg, zn, zm);
2002         }
2003         Mov(zd, scratch);
2004         return;
2005       }
2006       case NoFPMacroNaNPropagationSelected:
2007         VIXL_UNREACHABLE();
2008         return;
2009     }
2010   } else {
2011     // zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
2012     MovprfxHelperScope guard(this, zd, pg, za);
2013     (this->*fn_zda)(zd, pg, zn, zm);
2014   }
2015 }
2016 
Fmla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2017 void MacroAssembler::Fmla(const ZRegister& zd,
2018                           const PRegisterM& pg,
2019                           const ZRegister& za,
2020                           const ZRegister& zn,
2021                           const ZRegister& zm,
2022                           FPMacroNaNPropagationOption nan_option) {
2023   VIXL_ASSERT(allow_macro_instructions_);
2024   FPMulAddHelper(zd,
2025                  pg,
2026                  za,
2027                  zn,
2028                  zm,
2029                  &Assembler::fmla,
2030                  &Assembler::fmad,
2031                  nan_option);
2032 }
2033 
Fmls(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2034 void MacroAssembler::Fmls(const ZRegister& zd,
2035                           const PRegisterM& pg,
2036                           const ZRegister& za,
2037                           const ZRegister& zn,
2038                           const ZRegister& zm,
2039                           FPMacroNaNPropagationOption nan_option) {
2040   VIXL_ASSERT(allow_macro_instructions_);
2041   FPMulAddHelper(zd,
2042                  pg,
2043                  za,
2044                  zn,
2045                  zm,
2046                  &Assembler::fmls,
2047                  &Assembler::fmsb,
2048                  nan_option);
2049 }
2050 
Fnmla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2051 void MacroAssembler::Fnmla(const ZRegister& zd,
2052                            const PRegisterM& pg,
2053                            const ZRegister& za,
2054                            const ZRegister& zn,
2055                            const ZRegister& zm,
2056                            FPMacroNaNPropagationOption nan_option) {
2057   VIXL_ASSERT(allow_macro_instructions_);
2058   FPMulAddHelper(zd,
2059                  pg,
2060                  za,
2061                  zn,
2062                  zm,
2063                  &Assembler::fnmla,
2064                  &Assembler::fnmad,
2065                  nan_option);
2066 }
2067 
Fnmls(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2068 void MacroAssembler::Fnmls(const ZRegister& zd,
2069                            const PRegisterM& pg,
2070                            const ZRegister& za,
2071                            const ZRegister& zn,
2072                            const ZRegister& zm,
2073                            FPMacroNaNPropagationOption nan_option) {
2074   VIXL_ASSERT(allow_macro_instructions_);
2075   FPMulAddHelper(zd,
2076                  pg,
2077                  za,
2078                  zn,
2079                  zm,
2080                  &Assembler::fnmls,
2081                  &Assembler::fnmsb,
2082                  nan_option);
2083 }
2084 
Ftmad(const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int imm3)2085 void MacroAssembler::Ftmad(const ZRegister& zd,
2086                            const ZRegister& zn,
2087                            const ZRegister& zm,
2088                            int imm3) {
2089   VIXL_ASSERT(allow_macro_instructions_);
2090   if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2091     UseScratchRegisterScope temps(this);
2092     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm);
2093     Mov(scratch, zm);
2094     MovprfxHelperScope guard(this, zd, zn);
2095     ftmad(zd, zd, scratch, imm3);
2096   } else {
2097     MovprfxHelperScope guard(this, zd, zn);
2098     ftmad(zd, zd, zm, imm3);
2099   }
2100 }
2101 
Fcadd(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,int rot)2102 void MacroAssembler::Fcadd(const ZRegister& zd,
2103                            const PRegisterM& pg,
2104                            const ZRegister& zn,
2105                            const ZRegister& zm,
2106                            int rot) {
2107   VIXL_ASSERT(allow_macro_instructions_);
2108   if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2109     UseScratchRegisterScope temps(this);
2110     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2111     {
2112       MovprfxHelperScope guard(this, scratch, pg, zn);
2113       fcadd(scratch, pg, scratch, zm, rot);
2114     }
2115     Mov(zd, scratch);
2116   } else {
2117     MovprfxHelperScope guard(this, zd, pg, zn);
2118     fcadd(zd, pg, zd, zm, rot);
2119   }
2120 }
2121 
Fcmla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int rot)2122 void MacroAssembler::Fcmla(const ZRegister& zd,
2123                            const PRegisterM& pg,
2124                            const ZRegister& za,
2125                            const ZRegister& zn,
2126                            const ZRegister& zm,
2127                            int rot) {
2128   VIXL_ASSERT(allow_macro_instructions_);
2129   if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
2130     UseScratchRegisterScope temps(this);
2131     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
2132     {
2133       MovprfxHelperScope guard(this, ztmp, za);
2134       fcmla(ztmp, pg, zn, zm, rot);
2135     }
2136     Mov(zd, pg, ztmp);
2137   } else {
2138     MovprfxHelperScope guard(this, zd, pg, za);
2139     fcmla(zd, pg, zn, zm, rot);
2140   }
2141 }
2142 
Splice(const ZRegister & zd,const PRegister & pg,const ZRegister & zn,const ZRegister & zm)2143 void MacroAssembler::Splice(const ZRegister& zd,
2144                             const PRegister& pg,
2145                             const ZRegister& zn,
2146                             const ZRegister& zm) {
2147   VIXL_ASSERT(allow_macro_instructions_);
2148   if (CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm) && !zd.Aliases(zn)) {
2149     SingleEmissionCheckScope guard(this);
2150     splice(zd, pg, zn, zm);
2151   } else if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2152     UseScratchRegisterScope temps(this);
2153     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2154     {
2155       MovprfxHelperScope guard(this, scratch, zn);
2156       splice(scratch, pg, scratch, zm);
2157     }
2158     Mov(zd, scratch);
2159   } else {
2160     MovprfxHelperScope guard(this, zd, zn);
2161     splice(zd, pg, zd, zm);
2162   }
2163 }
2164 
Clasta(const ZRegister & zd,const PRegister & pg,const ZRegister & zn,const ZRegister & zm)2165 void MacroAssembler::Clasta(const ZRegister& zd,
2166                             const PRegister& pg,
2167                             const ZRegister& zn,
2168                             const ZRegister& zm) {
2169   VIXL_ASSERT(allow_macro_instructions_);
2170   if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2171     UseScratchRegisterScope temps(this);
2172     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2173     {
2174       MovprfxHelperScope guard(this, scratch, zn);
2175       clasta(scratch, pg, scratch, zm);
2176     }
2177     Mov(zd, scratch);
2178   } else {
2179     MovprfxHelperScope guard(this, zd, zn);
2180     clasta(zd, pg, zd, zm);
2181   }
2182 }
2183 
Clastb(const ZRegister & zd,const PRegister & pg,const ZRegister & zn,const ZRegister & zm)2184 void MacroAssembler::Clastb(const ZRegister& zd,
2185                             const PRegister& pg,
2186                             const ZRegister& zn,
2187                             const ZRegister& zm) {
2188   VIXL_ASSERT(allow_macro_instructions_);
2189   if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2190     UseScratchRegisterScope temps(this);
2191     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2192     {
2193       MovprfxHelperScope guard(this, scratch, zn);
2194       clastb(scratch, pg, scratch, zm);
2195     }
2196     Mov(zd, scratch);
2197   } else {
2198     MovprfxHelperScope guard(this, zd, zn);
2199     clastb(zd, pg, zd, zm);
2200   }
2201 }
2202 
ShiftRightAccumulate(IntArithImmFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2203 void MacroAssembler::ShiftRightAccumulate(IntArithImmFn fn,
2204                                           const ZRegister& zd,
2205                                           const ZRegister& za,
2206                                           const ZRegister& zn,
2207                                           int shift) {
2208   VIXL_ASSERT(allow_macro_instructions_);
2209   if (!zd.Aliases(za) && zd.Aliases(zn)) {
2210     UseScratchRegisterScope temps(this);
2211     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
2212     Mov(ztmp, zn);
2213     {
2214       MovprfxHelperScope guard(this, zd, za);
2215       (this->*fn)(zd, ztmp, shift);
2216     }
2217   } else {
2218     MovprfxHelperScope guard(this, zd, za);
2219     (this->*fn)(zd, zn, shift);
2220   }
2221 }
2222 
Srsra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2223 void MacroAssembler::Srsra(const ZRegister& zd,
2224                            const ZRegister& za,
2225                            const ZRegister& zn,
2226                            int shift) {
2227   ShiftRightAccumulate(&Assembler::srsra, zd, za, zn, shift);
2228 }
2229 
Ssra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2230 void MacroAssembler::Ssra(const ZRegister& zd,
2231                           const ZRegister& za,
2232                           const ZRegister& zn,
2233                           int shift) {
2234   ShiftRightAccumulate(&Assembler::ssra, zd, za, zn, shift);
2235 }
2236 
Ursra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2237 void MacroAssembler::Ursra(const ZRegister& zd,
2238                            const ZRegister& za,
2239                            const ZRegister& zn,
2240                            int shift) {
2241   ShiftRightAccumulate(&Assembler::ursra, zd, za, zn, shift);
2242 }
2243 
Usra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2244 void MacroAssembler::Usra(const ZRegister& zd,
2245                           const ZRegister& za,
2246                           const ZRegister& zn,
2247                           int shift) {
2248   ShiftRightAccumulate(&Assembler::usra, zd, za, zn, shift);
2249 }
2250 
ComplexAddition(ZZZImmFn fn,const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int rot)2251 void MacroAssembler::ComplexAddition(ZZZImmFn fn,
2252                                      const ZRegister& zd,
2253                                      const ZRegister& zn,
2254                                      const ZRegister& zm,
2255                                      int rot) {
2256   VIXL_ASSERT(allow_macro_instructions_);
2257   if (!zd.Aliases(zn) && zd.Aliases(zm)) {
2258     UseScratchRegisterScope temps(this);
2259     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zm);
2260     Mov(ztmp, zm);
2261     {
2262       MovprfxHelperScope guard(this, zd, zn);
2263       (this->*fn)(zd, zd, ztmp, rot);
2264     }
2265   } else {
2266     MovprfxHelperScope guard(this, zd, zn);
2267     (this->*fn)(zd, zd, zm, rot);
2268   }
2269 }
2270 
Cadd(const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int rot)2271 void MacroAssembler::Cadd(const ZRegister& zd,
2272                           const ZRegister& zn,
2273                           const ZRegister& zm,
2274                           int rot) {
2275   ComplexAddition(&Assembler::cadd, zd, zn, zm, rot);
2276 }
2277 
Sqcadd(const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int rot)2278 void MacroAssembler::Sqcadd(const ZRegister& zd,
2279                             const ZRegister& zn,
2280                             const ZRegister& zm,
2281                             int rot) {
2282   ComplexAddition(&Assembler::sqcadd, zd, zn, zm, rot);
2283 }
2284 
2285 }  // namespace aarch64
2286 }  // namespace vixl
2287