• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include "macro-assembler-aarch64.h"
28 
29 namespace vixl {
30 namespace aarch64 {
31 
AddSubHelper(AddSubHelperOption option,const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)32 void MacroAssembler::AddSubHelper(AddSubHelperOption option,
33                                   const ZRegister& zd,
34                                   const ZRegister& zn,
35                                   IntegerOperand imm) {
36   VIXL_ASSERT(imm.FitsInLane(zd));
37 
38   // Simple, encodable cases.
39   if (TrySingleAddSub(option, zd, zn, imm)) return;
40 
41   VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate));
42   bool add_imm = (option == kAddImmediate);
43 
44   // Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one
45   // instruction. Also interpret the immediate as signed, so we can convert
46   // Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc.
47   IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits()));
48   if (signed_imm.IsNegative()) {
49     AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate;
50     IntegerOperand n_imm(signed_imm.GetMagnitude());
51     // IntegerOperand can represent -INT_MIN, so this is always safe.
52     VIXL_ASSERT(n_imm.IsPositiveOrZero());
53     if (TrySingleAddSub(n_option, zd, zn, n_imm)) return;
54   }
55 
56   // Otherwise, fall back to dup + ADD_z_z/SUB_z_z.
57   UseScratchRegisterScope temps(this);
58   ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
59   Dup(scratch, imm);
60 
61   SingleEmissionCheckScope guard(this);
62   if (add_imm) {
63     add(zd, zn, scratch);
64   } else {
65     sub(zd, zn, scratch);
66   }
67 }
68 
TrySingleAddSub(AddSubHelperOption option,const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)69 bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option,
70                                      const ZRegister& zd,
71                                      const ZRegister& zn,
72                                      IntegerOperand imm) {
73   VIXL_ASSERT(imm.FitsInLane(zd));
74 
75   int imm8;
76   int shift = -1;
77   if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
78       imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
79     MovprfxHelperScope guard(this, zd, zn);
80     switch (option) {
81       case kAddImmediate:
82         add(zd, zd, imm8, shift);
83         return true;
84       case kSubImmediate:
85         sub(zd, zd, imm8, shift);
86         return true;
87     }
88   }
89   return false;
90 }
91 
IntWideImmHelper(IntArithImmFn imm_fn,SVEArithPredicatedFn reg_macro,const ZRegister & zd,const ZRegister & zn,IntegerOperand imm,bool is_signed)92 void MacroAssembler::IntWideImmHelper(IntArithImmFn imm_fn,
93                                       SVEArithPredicatedFn reg_macro,
94                                       const ZRegister& zd,
95                                       const ZRegister& zn,
96                                       IntegerOperand imm,
97                                       bool is_signed) {
98   if (is_signed) {
99     // E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi
100     if (imm.IsInt8()) {
101       MovprfxHelperScope guard(this, zd, zn);
102       (this->*imm_fn)(zd, zd, imm.AsInt8());
103       return;
104     }
105   } else {
106     // E.g. UMIN_z_zi, UMAX_z_zi
107     if (imm.IsUint8()) {
108       MovprfxHelperScope guard(this, zd, zn);
109       (this->*imm_fn)(zd, zd, imm.AsUint8());
110       return;
111     }
112   }
113 
114   UseScratchRegisterScope temps(this);
115   PRegister pg = temps.AcquireGoverningP();
116   Ptrue(pg.WithSameLaneSizeAs(zd));
117 
118   // Try to re-use zd if we can, so we can avoid a movprfx.
119   ZRegister scratch =
120       zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits())
121                      : zd;
122   Dup(scratch, imm);
123 
124   // The vector-form macro for commutative operations will swap the arguments to
125   // avoid movprfx, if necessary.
126   (this->*reg_macro)(zd, pg.Merging(), zn, scratch);
127 }
128 
Mul(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)129 void MacroAssembler::Mul(const ZRegister& zd,
130                          const ZRegister& zn,
131                          IntegerOperand imm) {
132   VIXL_ASSERT(allow_macro_instructions_);
133   IntArithImmFn imm_fn = &Assembler::mul;
134   SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul;
135   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
136 }
137 
Smin(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)138 void MacroAssembler::Smin(const ZRegister& zd,
139                           const ZRegister& zn,
140                           IntegerOperand imm) {
141   VIXL_ASSERT(allow_macro_instructions_);
142   VIXL_ASSERT(imm.FitsInSignedLane(zd));
143   IntArithImmFn imm_fn = &Assembler::smin;
144   SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin;
145   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
146 }
147 
Smax(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)148 void MacroAssembler::Smax(const ZRegister& zd,
149                           const ZRegister& zn,
150                           IntegerOperand imm) {
151   VIXL_ASSERT(allow_macro_instructions_);
152   VIXL_ASSERT(imm.FitsInSignedLane(zd));
153   IntArithImmFn imm_fn = &Assembler::smax;
154   SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax;
155   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
156 }
157 
Umax(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)158 void MacroAssembler::Umax(const ZRegister& zd,
159                           const ZRegister& zn,
160                           IntegerOperand imm) {
161   VIXL_ASSERT(allow_macro_instructions_);
162   VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
163   IntArithImmFn imm_fn = &Assembler::umax;
164   SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax;
165   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
166 }
167 
Umin(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)168 void MacroAssembler::Umin(const ZRegister& zd,
169                           const ZRegister& zn,
170                           IntegerOperand imm) {
171   VIXL_ASSERT(allow_macro_instructions_);
172   VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
173   IntArithImmFn imm_fn = &Assembler::umin;
174   SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin;
175   IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
176 }
177 
Addpl(const Register & xd,const Register & xn,int64_t multiplier)178 void MacroAssembler::Addpl(const Register& xd,
179                            const Register& xn,
180                            int64_t multiplier) {
181   VIXL_ASSERT(allow_macro_instructions_);
182 
183   // This macro relies on `Rdvl` to handle some out-of-range cases. Check that
184   // `VL * multiplier` cannot overflow, for any possible value of VL.
185   VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
186   VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
187 
188   if (xd.IsZero()) return;
189   if (xn.IsZero() && xd.IsSP()) {
190     // TODO: This operation doesn't make much sense, but we could support it
191     // with a scratch register if necessary.
192     VIXL_UNIMPLEMENTED();
193   }
194 
195   // Handling xzr requires an extra move, so defer it until later so we can try
196   // to use `rdvl` instead (via `Addvl`).
197   if (IsInt6(multiplier) && !xn.IsZero()) {
198     SingleEmissionCheckScope guard(this);
199     addpl(xd, xn, static_cast<int>(multiplier));
200     return;
201   }
202 
203   // If `multiplier` is a multiple of 8, we can use `Addvl` instead.
204   if ((multiplier % kZRegBitsPerPRegBit) == 0) {
205     Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit);
206     return;
207   }
208 
209   if (IsInt6(multiplier)) {
210     VIXL_ASSERT(xn.IsZero());  // Other cases were handled with `addpl`.
211     // There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so
212     // materialise a zero.
213     MacroEmissionCheckScope guard(this);
214     movz(xd, 0);
215     addpl(xd, xd, static_cast<int>(multiplier));
216     return;
217   }
218 
219   // TODO: Some probable cases result in rather long sequences. For example,
220   // `Addpl(sp, sp, 33)` requires five instructions, even though it's only just
221   // outside the encodable range. We should look for ways to cover such cases
222   // without drastically increasing the complexity of this logic.
223 
224   // For other cases, calculate xn + (PL * multiplier) using discrete
225   // instructions. This requires two scratch registers in the general case, so
226   // try to re-use the destination as a scratch register.
227   UseScratchRegisterScope temps(this);
228   temps.Include(xd);
229   temps.Exclude(xn);
230 
231   Register scratch = temps.AcquireX();
232   // Because there is no `rdpl`, so we have to calculate PL from VL. We can't
233   // scale the multiplier because (we already know) it isn't a multiple of 8.
234   Rdvl(scratch, multiplier);
235 
236   MacroEmissionCheckScope guard(this);
237   if (xn.IsZero()) {
238     asr(xd, scratch, kZRegBitsPerPRegBitLog2);
239   } else if (xd.IsSP() || xn.IsSP()) {
240     // TODO: MacroAssembler::Add should be able to handle this.
241     asr(scratch, scratch, kZRegBitsPerPRegBitLog2);
242     add(xd, xn, scratch);
243   } else {
244     add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2));
245   }
246 }
247 
Addvl(const Register & xd,const Register & xn,int64_t multiplier)248 void MacroAssembler::Addvl(const Register& xd,
249                            const Register& xn,
250                            int64_t multiplier) {
251   VIXL_ASSERT(allow_macro_instructions_);
252   VIXL_ASSERT(xd.IsX());
253   VIXL_ASSERT(xn.IsX());
254 
255   // Check that `VL * multiplier` cannot overflow, for any possible value of VL.
256   VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
257   VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
258 
259   if (xd.IsZero()) return;
260   if (xn.IsZero() && xd.IsSP()) {
261     // TODO: This operation doesn't make much sense, but we could support it
262     // with a scratch register if necessary. `rdvl` cannot write into `sp`.
263     VIXL_UNIMPLEMENTED();
264   }
265 
266   if (IsInt6(multiplier)) {
267     SingleEmissionCheckScope guard(this);
268     if (xn.IsZero()) {
269       rdvl(xd, static_cast<int>(multiplier));
270     } else {
271       addvl(xd, xn, static_cast<int>(multiplier));
272     }
273     return;
274   }
275 
276   // TODO: Some probable cases result in rather long sequences. For example,
277   // `Addvl(sp, sp, 42)` requires four instructions, even though it's only just
278   // outside the encodable range. We should look for ways to cover such cases
279   // without drastically increasing the complexity of this logic.
280 
281   // For other cases, calculate xn + (VL * multiplier) using discrete
282   // instructions. This requires two scratch registers in the general case, so
283   // we try to re-use the destination as a scratch register.
284   UseScratchRegisterScope temps(this);
285   temps.Include(xd);
286   temps.Exclude(xn);
287 
288   Register a = temps.AcquireX();
289   Mov(a, multiplier);
290 
291   MacroEmissionCheckScope guard(this);
292   Register b = temps.AcquireX();
293   rdvl(b, 1);
294   if (xn.IsZero()) {
295     mul(xd, a, b);
296   } else if (xd.IsSP() || xn.IsSP()) {
297     mul(a, a, b);
298     add(xd, xn, a);
299   } else {
300     madd(xd, a, b, xn);
301   }
302 }
303 
CalculateSVEAddress(const Register & xd,const SVEMemOperand & addr,int vl_divisor_log2)304 void MacroAssembler::CalculateSVEAddress(const Register& xd,
305                                          const SVEMemOperand& addr,
306                                          int vl_divisor_log2) {
307   VIXL_ASSERT(allow_macro_instructions_);
308   VIXL_ASSERT(!addr.IsScatterGather());
309   VIXL_ASSERT(xd.IsX());
310 
311   // The lower bound is where a whole Z register is accessed.
312   VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0));
313   // The upper bound is for P register accesses, and for instructions like
314   // "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane.
315   VIXL_ASSERT(vl_divisor_log2 <= static_cast<int>(kZRegBitsPerPRegBitLog2));
316 
317   SVEOffsetModifier mod = addr.GetOffsetModifier();
318   Register base = addr.GetScalarBase();
319 
320   if (addr.IsEquivalentToScalar()) {
321     // For example:
322     //   [x0]
323     //   [x0, #0]
324     //   [x0, xzr, LSL 2]
325     Mov(xd, base);
326   } else if (addr.IsScalarPlusImmediate()) {
327     // For example:
328     //   [x0, #42]
329     //   [x0, #42, MUL VL]
330     int64_t offset = addr.GetImmediateOffset();
331     VIXL_ASSERT(offset != 0);  // Handled by IsEquivalentToScalar.
332     if (addr.IsMulVl()) {
333       int vl_divisor = 1 << vl_divisor_log2;
334       // For all possible values of vl_divisor, we can simply use `Addpl`. This
335       // will select `addvl` if necessary.
336       VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0);
337       Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor));
338     } else {
339       // IsScalarPlusImmediate() ensures that no other modifiers can occur.
340       VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
341       Add(xd, base, offset);
342     }
343   } else if (addr.IsScalarPlusScalar()) {
344     // For example:
345     //   [x0, x1]
346     //   [x0, x1, LSL #4]
347     Register offset = addr.GetScalarOffset();
348     VIXL_ASSERT(!offset.IsZero());  // Handled by IsEquivalentToScalar.
349     if (mod == SVE_LSL) {
350       Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount()));
351     } else {
352       // IsScalarPlusScalar() ensures that no other modifiers can occur.
353       VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
354       Add(xd, base, offset);
355     }
356   } else {
357     // All other forms are scatter-gather addresses, which cannot be evaluated
358     // into an X register.
359     VIXL_UNREACHABLE();
360   }
361 }
362 
Cpy(const ZRegister & zd,const PRegister & pg,IntegerOperand imm)363 void MacroAssembler::Cpy(const ZRegister& zd,
364                          const PRegister& pg,
365                          IntegerOperand imm) {
366   VIXL_ASSERT(allow_macro_instructions_);
367   VIXL_ASSERT(imm.FitsInLane(zd));
368   int imm8;
369   int shift;
370   if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
371       imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
372     SingleEmissionCheckScope guard(this);
373     cpy(zd, pg, imm8, shift);
374     return;
375   }
376 
377   // The fallbacks rely on `cpy` variants that only support merging predication.
378   // If zeroing predication was requested, zero the destination first.
379   if (pg.IsZeroing()) {
380     SingleEmissionCheckScope guard(this);
381     dup(zd, 0);
382   }
383   PRegisterM pg_m = pg.Merging();
384 
385   // Try to encode the immediate using fcpy.
386   VIXL_ASSERT(imm.FitsInLane(zd));
387   if (zd.GetLaneSizeInBits() >= kHRegSize) {
388     double fp_imm = 0.0;
389     switch (zd.GetLaneSizeInBits()) {
390       case kHRegSize:
391         fp_imm =
392             FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN);
393         break;
394       case kSRegSize:
395         fp_imm = RawbitsToFloat(imm.AsUint32());
396         break;
397       case kDRegSize:
398         fp_imm = RawbitsToDouble(imm.AsUint64());
399         break;
400       default:
401         VIXL_UNREACHABLE();
402         break;
403     }
404     // IsImmFP64 is equivalent to IsImmFP<n> for the same arithmetic value, so
405     // we can use IsImmFP64 for all lane sizes.
406     if (IsImmFP64(fp_imm)) {
407       SingleEmissionCheckScope guard(this);
408       fcpy(zd, pg_m, fp_imm);
409       return;
410     }
411   }
412 
413   // Fall back to using a scratch register.
414   UseScratchRegisterScope temps(this);
415   Register scratch = temps.AcquireRegisterToHoldLane(zd);
416   Mov(scratch, imm);
417 
418   SingleEmissionCheckScope guard(this);
419   cpy(zd, pg_m, scratch);
420 }
421 
422 // TODO: We implement Fcpy (amongst other things) for all FP types because it
423 // allows us to preserve user-specified NaNs. We should come up with some
424 // FPImmediate type to abstract this, and avoid all the duplication below (and
425 // elsewhere).
426 
Fcpy(const ZRegister & zd,const PRegisterM & pg,double imm)427 void MacroAssembler::Fcpy(const ZRegister& zd,
428                           const PRegisterM& pg,
429                           double imm) {
430   VIXL_ASSERT(allow_macro_instructions_);
431   VIXL_ASSERT(pg.IsMerging());
432 
433   if (IsImmFP64(imm)) {
434     SingleEmissionCheckScope guard(this);
435     fcpy(zd, pg, imm);
436     return;
437   }
438 
439   // As a fall-back, cast the immediate to the required lane size, and try to
440   // encode the bit pattern using `Cpy`.
441   Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
442 }
443 
Fcpy(const ZRegister & zd,const PRegisterM & pg,float imm)444 void MacroAssembler::Fcpy(const ZRegister& zd,
445                           const PRegisterM& pg,
446                           float imm) {
447   VIXL_ASSERT(allow_macro_instructions_);
448   VIXL_ASSERT(pg.IsMerging());
449 
450   if (IsImmFP32(imm)) {
451     SingleEmissionCheckScope guard(this);
452     fcpy(zd, pg, imm);
453     return;
454   }
455 
456   // As a fall-back, cast the immediate to the required lane size, and try to
457   // encode the bit pattern using `Cpy`.
458   Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
459 }
460 
Fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)461 void MacroAssembler::Fcpy(const ZRegister& zd,
462                           const PRegisterM& pg,
463                           Float16 imm) {
464   VIXL_ASSERT(allow_macro_instructions_);
465   VIXL_ASSERT(pg.IsMerging());
466 
467   if (IsImmFP16(imm)) {
468     SingleEmissionCheckScope guard(this);
469     fcpy(zd, pg, imm);
470     return;
471   }
472 
473   // As a fall-back, cast the immediate to the required lane size, and try to
474   // encode the bit pattern using `Cpy`.
475   Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
476 }
477 
Dup(const ZRegister & zd,IntegerOperand imm)478 void MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) {
479   VIXL_ASSERT(allow_macro_instructions_);
480   VIXL_ASSERT(imm.FitsInLane(zd));
481   unsigned lane_size = zd.GetLaneSizeInBits();
482   int imm8;
483   int shift;
484   if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
485       imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
486     SingleEmissionCheckScope guard(this);
487     dup(zd, imm8, shift);
488   } else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) {
489     SingleEmissionCheckScope guard(this);
490     dupm(zd, imm.AsUintN(lane_size));
491   } else {
492     UseScratchRegisterScope temps(this);
493     Register scratch = temps.AcquireRegisterToHoldLane(zd);
494     Mov(scratch, imm);
495 
496     SingleEmissionCheckScope guard(this);
497     dup(zd, scratch);
498   }
499 }
500 
NoncommutativeArithmeticHelper(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,SVEArithPredicatedFn fn,SVEArithPredicatedFn rev_fn)501 void MacroAssembler::NoncommutativeArithmeticHelper(
502     const ZRegister& zd,
503     const PRegisterM& pg,
504     const ZRegister& zn,
505     const ZRegister& zm,
506     SVEArithPredicatedFn fn,
507     SVEArithPredicatedFn rev_fn) {
508   if (zd.Aliases(zn)) {
509     // E.g. zd = zd / zm
510     SingleEmissionCheckScope guard(this);
511     (this->*fn)(zd, pg, zn, zm);
512   } else if (zd.Aliases(zm)) {
513     // E.g. zd = zn / zd
514     SingleEmissionCheckScope guard(this);
515     (this->*rev_fn)(zd, pg, zm, zn);
516   } else {
517     // E.g. zd = zn / zm
518     MovprfxHelperScope guard(this, zd, pg, zn);
519     (this->*fn)(zd, pg, zd, zm);
520   }
521 }
522 
FPCommutativeArithmeticHelper(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,SVEArithPredicatedFn fn,FPMacroNaNPropagationOption nan_option)523 void MacroAssembler::FPCommutativeArithmeticHelper(
524     const ZRegister& zd,
525     const PRegisterM& pg,
526     const ZRegister& zn,
527     const ZRegister& zm,
528     SVEArithPredicatedFn fn,
529     FPMacroNaNPropagationOption nan_option) {
530   ResolveFPNaNPropagationOption(&nan_option);
531 
532   if (zd.Aliases(zn)) {
533     SingleEmissionCheckScope guard(this);
534     (this->*fn)(zd, pg, zd, zm);
535   } else if (zd.Aliases(zm)) {
536     switch (nan_option) {
537       case FastNaNPropagation: {
538         // Swap the arguments.
539         SingleEmissionCheckScope guard(this);
540         (this->*fn)(zd, pg, zd, zn);
541         return;
542       }
543       case StrictNaNPropagation: {
544         UseScratchRegisterScope temps(this);
545         // Use a scratch register to keep the argument order exactly as
546         // specified.
547         ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
548         {
549           MovprfxHelperScope guard(this, scratch, pg, zn);
550           (this->*fn)(scratch, pg, scratch, zm);
551         }
552         Mov(zd, scratch);
553         return;
554       }
555       case NoFPMacroNaNPropagationSelected:
556         VIXL_UNREACHABLE();
557         return;
558     }
559   } else {
560     MovprfxHelperScope guard(this, zd, pg, zn);
561     (this->*fn)(zd, pg, zd, zm);
562   }
563 }
564 
565 // Instructions of the form "inst zda, zn, zm, #num", where they are
566 // non-commutative and no reversed form is provided.
567 #define VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(V) \
568   V(Cmla, cmla)                              \
569   V(Sqrdcmlah, sqrdcmlah)
570 
571 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                     \
572   void MacroAssembler::MASMFN(const ZRegister& zd,               \
573                               const ZRegister& za,               \
574                               const ZRegister& zn,               \
575                               const ZRegister& zm,               \
576                               int imm) {                         \
577     if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
578       UseScratchRegisterScope temps(this);                       \
579       VIXL_ASSERT(AreSameLaneSize(zn, zm));                      \
580       ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);  \
581       Mov(ztmp, zd.Aliases(zn) ? zn : zm);                       \
582       MovprfxHelperScope guard(this, zd, za);                    \
583       ASMFN(zd,                                                  \
584             (zd.Aliases(zn) ? ztmp : zn),                        \
585             (zd.Aliases(zm) ? ztmp : zm),                        \
586             imm);                                                \
587     } else {                                                     \
588       MovprfxHelperScope guard(this, zd, za);                    \
589       ASMFN(zd, zn, zm, imm);                                    \
590     }                                                            \
591   }
592 VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(VIXL_DEFINE_MASM_FUNC)
593 #undef VIXL_DEFINE_MASM_FUNC
594 
595 // Instructions of the form "inst zda, zn, zm, #num, #num", where they are
596 // non-commutative and no reversed form is provided.
597 #define VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(V) \
598   V(Cmla, cmla)                               \
599   V(Sqrdcmlah, sqrdcmlah)
600 
601 // This doesn't handle zm when it's out of the range that can be encoded in
602 // instruction. The range depends on element size: z0-z7 for H, z0-15 for S.
603 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                     \
604   void MacroAssembler::MASMFN(const ZRegister& zd,               \
605                               const ZRegister& za,               \
606                               const ZRegister& zn,               \
607                               const ZRegister& zm,               \
608                               int index,                         \
609                               int rot) {                         \
610     if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
611       UseScratchRegisterScope temps(this);                       \
612       ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);  \
613       {                                                          \
614         MovprfxHelperScope guard(this, ztmp, za);                \
615         ASMFN(ztmp, zn, zm, index, rot);                         \
616       }                                                          \
617       Mov(zd, ztmp);                                             \
618     } else {                                                     \
619       MovprfxHelperScope guard(this, zd, za);                    \
620       ASMFN(zd, zn, zm, index, rot);                             \
621     }                                                            \
622   }
VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC)623 VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC)
624 #undef VIXL_DEFINE_MASM_FUNC
625 
626 // Instructions of the form "inst zda, pg, zda, zn", where they are
627 // non-commutative and no reversed form is provided.
628 #define VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(V) \
629   V(Addp, addp)                             \
630   V(Bic, bic)                               \
631   V(Faddp, faddp)                           \
632   V(Fmaxnmp, fmaxnmp)                       \
633   V(Fminnmp, fminnmp)                       \
634   V(Fmaxp, fmaxp)                           \
635   V(Fminp, fminp)                           \
636   V(Fscale, fscale)                         \
637   V(Smaxp, smaxp)                           \
638   V(Sminp, sminp)                           \
639   V(Suqadd, suqadd)                         \
640   V(Umaxp, umaxp)                           \
641   V(Uminp, uminp)                           \
642   V(Usqadd, usqadd)
643 
644 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                       \
645   void MacroAssembler::MASMFN(const ZRegister& zd,                 \
646                               const PRegisterM& pg,                \
647                               const ZRegister& zn,                 \
648                               const ZRegister& zm) {               \
649     VIXL_ASSERT(allow_macro_instructions_);                        \
650     if (zd.Aliases(zm) && !zd.Aliases(zn)) {                       \
651       UseScratchRegisterScope temps(this);                         \
652       ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); \
653       Mov(scratch, zm);                                            \
654       MovprfxHelperScope guard(this, zd, pg, zn);                  \
655       ASMFN(zd, pg, zd, scratch);                                  \
656     } else {                                                       \
657       MovprfxHelperScope guard(this, zd, pg, zn);                  \
658       ASMFN(zd, pg, zd, zm);                                       \
659     }                                                              \
660   }
661 VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
662 #undef VIXL_DEFINE_MASM_FUNC
663 
664 // Instructions of the form "inst zda, pg, zda, zn", where they are
665 // non-commutative and a reversed form is provided.
666 #define VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(V) \
667   V(Asr, asr)                                       \
668   V(Fdiv, fdiv)                                     \
669   V(Fsub, fsub)                                     \
670   V(Lsl, lsl)                                       \
671   V(Lsr, lsr)                                       \
672   V(Sdiv, sdiv)                                     \
673   V(Shsub, shsub)                                   \
674   V(Sqrshl, sqrshl)                                 \
675   V(Sqshl, sqshl)                                   \
676   V(Sqsub, sqsub)                                   \
677   V(Srshl, srshl)                                   \
678   V(Sub, sub)                                       \
679   V(Udiv, udiv)                                     \
680   V(Uhsub, uhsub)                                   \
681   V(Uqrshl, uqrshl)                                 \
682   V(Uqshl, uqshl)                                   \
683   V(Uqsub, uqsub)                                   \
684   V(Urshl, urshl)
685 
686 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN)                          \
687   void MacroAssembler::MASMFN(const ZRegister& zd,                    \
688                               const PRegisterM& pg,                   \
689                               const ZRegister& zn,                    \
690                               const ZRegister& zm) {                  \
691     VIXL_ASSERT(allow_macro_instructions_);                           \
692     NoncommutativeArithmeticHelper(zd,                                \
693                                    pg,                                \
694                                    zn,                                \
695                                    zm,                                \
696                                    static_cast<SVEArithPredicatedFn>( \
697                                        &Assembler::ASMFN),            \
698                                    static_cast<SVEArithPredicatedFn>( \
699                                        &Assembler::ASMFN##r));        \
700   }
701 VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
702 #undef VIXL_DEFINE_MASM_FUNC
703 
704 void MacroAssembler::Fadd(const ZRegister& zd,
705                           const PRegisterM& pg,
706                           const ZRegister& zn,
707                           const ZRegister& zm,
708                           FPMacroNaNPropagationOption nan_option) {
709   VIXL_ASSERT(allow_macro_instructions_);
710   FPCommutativeArithmeticHelper(zd,
711                                 pg,
712                                 zn,
713                                 zm,
714                                 static_cast<SVEArithPredicatedFn>(
715                                     &Assembler::fadd),
716                                 nan_option);
717 }
718 
Fabd(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)719 void MacroAssembler::Fabd(const ZRegister& zd,
720                           const PRegisterM& pg,
721                           const ZRegister& zn,
722                           const ZRegister& zm,
723                           FPMacroNaNPropagationOption nan_option) {
724   VIXL_ASSERT(allow_macro_instructions_);
725   FPCommutativeArithmeticHelper(zd,
726                                 pg,
727                                 zn,
728                                 zm,
729                                 static_cast<SVEArithPredicatedFn>(
730                                     &Assembler::fabd),
731                                 nan_option);
732 }
733 
Fmul(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)734 void MacroAssembler::Fmul(const ZRegister& zd,
735                           const PRegisterM& pg,
736                           const ZRegister& zn,
737                           const ZRegister& zm,
738                           FPMacroNaNPropagationOption nan_option) {
739   VIXL_ASSERT(allow_macro_instructions_);
740   FPCommutativeArithmeticHelper(zd,
741                                 pg,
742                                 zn,
743                                 zm,
744                                 static_cast<SVEArithPredicatedFn>(
745                                     &Assembler::fmul),
746                                 nan_option);
747 }
748 
Fmulx(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)749 void MacroAssembler::Fmulx(const ZRegister& zd,
750                            const PRegisterM& pg,
751                            const ZRegister& zn,
752                            const ZRegister& zm,
753                            FPMacroNaNPropagationOption nan_option) {
754   VIXL_ASSERT(allow_macro_instructions_);
755   FPCommutativeArithmeticHelper(zd,
756                                 pg,
757                                 zn,
758                                 zm,
759                                 static_cast<SVEArithPredicatedFn>(
760                                     &Assembler::fmulx),
761                                 nan_option);
762 }
763 
Fmax(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)764 void MacroAssembler::Fmax(const ZRegister& zd,
765                           const PRegisterM& pg,
766                           const ZRegister& zn,
767                           const ZRegister& zm,
768                           FPMacroNaNPropagationOption nan_option) {
769   VIXL_ASSERT(allow_macro_instructions_);
770   FPCommutativeArithmeticHelper(zd,
771                                 pg,
772                                 zn,
773                                 zm,
774                                 static_cast<SVEArithPredicatedFn>(
775                                     &Assembler::fmax),
776                                 nan_option);
777 }
778 
Fmin(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)779 void MacroAssembler::Fmin(const ZRegister& zd,
780                           const PRegisterM& pg,
781                           const ZRegister& zn,
782                           const ZRegister& zm,
783                           FPMacroNaNPropagationOption nan_option) {
784   VIXL_ASSERT(allow_macro_instructions_);
785   FPCommutativeArithmeticHelper(zd,
786                                 pg,
787                                 zn,
788                                 zm,
789                                 static_cast<SVEArithPredicatedFn>(
790                                     &Assembler::fmin),
791                                 nan_option);
792 }
793 
Fmaxnm(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)794 void MacroAssembler::Fmaxnm(const ZRegister& zd,
795                             const PRegisterM& pg,
796                             const ZRegister& zn,
797                             const ZRegister& zm,
798                             FPMacroNaNPropagationOption nan_option) {
799   VIXL_ASSERT(allow_macro_instructions_);
800   FPCommutativeArithmeticHelper(zd,
801                                 pg,
802                                 zn,
803                                 zm,
804                                 static_cast<SVEArithPredicatedFn>(
805                                     &Assembler::fmaxnm),
806                                 nan_option);
807 }
808 
Fminnm(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)809 void MacroAssembler::Fminnm(const ZRegister& zd,
810                             const PRegisterM& pg,
811                             const ZRegister& zn,
812                             const ZRegister& zm,
813                             FPMacroNaNPropagationOption nan_option) {
814   VIXL_ASSERT(allow_macro_instructions_);
815   FPCommutativeArithmeticHelper(zd,
816                                 pg,
817                                 zn,
818                                 zm,
819                                 static_cast<SVEArithPredicatedFn>(
820                                     &Assembler::fminnm),
821                                 nan_option);
822 }
823 
Fdup(const ZRegister & zd,double imm)824 void MacroAssembler::Fdup(const ZRegister& zd, double imm) {
825   VIXL_ASSERT(allow_macro_instructions_);
826 
827   switch (zd.GetLaneSizeInBits()) {
828     case kHRegSize:
829       Fdup(zd, Float16(imm));
830       break;
831     case kSRegSize:
832       Fdup(zd, static_cast<float>(imm));
833       break;
834     case kDRegSize:
835       uint64_t bits = DoubleToRawbits(imm);
836       if (IsImmFP64(bits)) {
837         SingleEmissionCheckScope guard(this);
838         fdup(zd, imm);
839       } else {
840         Dup(zd, bits);
841       }
842       break;
843   }
844 }
845 
Fdup(const ZRegister & zd,float imm)846 void MacroAssembler::Fdup(const ZRegister& zd, float imm) {
847   VIXL_ASSERT(allow_macro_instructions_);
848 
849   switch (zd.GetLaneSizeInBits()) {
850     case kHRegSize:
851       Fdup(zd, Float16(imm));
852       break;
853     case kSRegSize:
854       if (IsImmFP32(imm)) {
855         SingleEmissionCheckScope guard(this);
856         fdup(zd, imm);
857       } else {
858         Dup(zd, FloatToRawbits(imm));
859       }
860       break;
861     case kDRegSize:
862       Fdup(zd, static_cast<double>(imm));
863       break;
864   }
865 }
866 
Fdup(const ZRegister & zd,Float16 imm)867 void MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) {
868   VIXL_ASSERT(allow_macro_instructions_);
869 
870   switch (zd.GetLaneSizeInBits()) {
871     case kHRegSize:
872       if (IsImmFP16(imm)) {
873         SingleEmissionCheckScope guard(this);
874         fdup(zd, imm);
875       } else {
876         Dup(zd, Float16ToRawbits(imm));
877       }
878       break;
879     case kSRegSize:
880       Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN));
881       break;
882     case kDRegSize:
883       Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
884       break;
885   }
886 }
887 
Index(const ZRegister & zd,const Operand & start,const Operand & step)888 void MacroAssembler::Index(const ZRegister& zd,
889                            const Operand& start,
890                            const Operand& step) {
891   class IndexOperand : public Operand {
892    public:
893     static IndexOperand Prepare(MacroAssembler* masm,
894                                 UseScratchRegisterScope* temps,
895                                 const Operand& op,
896                                 const ZRegister& zd_inner) {
897       // Look for encodable immediates.
898       int imm;
899       if (op.IsImmediate()) {
900         if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd_inner, &imm)) {
901           return IndexOperand(imm);
902         }
903         Register scratch = temps->AcquireRegisterToHoldLane(zd_inner);
904         masm->Mov(scratch, op);
905         return IndexOperand(scratch);
906       } else {
907         // Plain registers can be encoded directly.
908         VIXL_ASSERT(op.IsPlainRegister());
909         return IndexOperand(op.GetRegister());
910       }
911     }
912 
913     int GetImm5() const {
914       int64_t imm = GetImmediate();
915       VIXL_ASSERT(IsInt5(imm));
916       return static_cast<int>(imm);
917     }
918 
919    private:
920     explicit IndexOperand(const Register& reg) : Operand(reg) {}
921     explicit IndexOperand(int64_t imm) : Operand(imm) {}
922   };
923 
924   UseScratchRegisterScope temps(this);
925   IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd);
926   IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd);
927 
928   SingleEmissionCheckScope guard(this);
929   if (start_enc.IsImmediate()) {
930     if (step_enc.IsImmediate()) {
931       index(zd, start_enc.GetImm5(), step_enc.GetImm5());
932     } else {
933       index(zd, start_enc.GetImm5(), step_enc.GetRegister());
934     }
935   } else {
936     if (step_enc.IsImmediate()) {
937       index(zd, start_enc.GetRegister(), step_enc.GetImm5());
938     } else {
939       index(zd, start_enc.GetRegister(), step_enc.GetRegister());
940     }
941   }
942 }
943 
Insr(const ZRegister & zdn,IntegerOperand imm)944 void MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) {
945   VIXL_ASSERT(allow_macro_instructions_);
946   VIXL_ASSERT(imm.FitsInLane(zdn));
947 
948   if (imm.IsZero()) {
949     SingleEmissionCheckScope guard(this);
950     insr(zdn, xzr);
951     return;
952   }
953 
954   UseScratchRegisterScope temps(this);
955   Register scratch = temps.AcquireRegisterToHoldLane(zdn);
956 
957   // TODO: There are many cases where we could optimise immediates, such as by
958   // detecting repeating patterns or FP immediates. We should optimise and
959   // abstract this for use in other SVE mov-immediate-like macros.
960   Mov(scratch, imm);
961 
962   SingleEmissionCheckScope guard(this);
963   insr(zdn, scratch);
964 }
965 
Mla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)966 void MacroAssembler::Mla(const ZRegister& zd,
967                          const PRegisterM& pg,
968                          const ZRegister& za,
969                          const ZRegister& zn,
970                          const ZRegister& zm) {
971   VIXL_ASSERT(allow_macro_instructions_);
972   if (zd.Aliases(za)) {
973     // zda = zda + (zn * zm)
974     SingleEmissionCheckScope guard(this);
975     mla(zd, pg, zn, zm);
976   } else if (zd.Aliases(zn)) {
977     // zdn = za + (zdn * zm)
978     SingleEmissionCheckScope guard(this);
979     mad(zd, pg, zm, za);
980   } else if (zd.Aliases(zm)) {
981     // Multiplication is commutative, so we can swap zn and zm.
982     // zdm = za + (zdm * zn)
983     SingleEmissionCheckScope guard(this);
984     mad(zd, pg, zn, za);
985   } else {
986     // zd = za + (zn * zm)
987     ExactAssemblyScope guard(this, 2 * kInstructionSize);
988     movprfx(zd, pg, za);
989     mla(zd, pg, zn, zm);
990   }
991 }
992 
Mls(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)993 void MacroAssembler::Mls(const ZRegister& zd,
994                          const PRegisterM& pg,
995                          const ZRegister& za,
996                          const ZRegister& zn,
997                          const ZRegister& zm) {
998   VIXL_ASSERT(allow_macro_instructions_);
999   if (zd.Aliases(za)) {
1000     // zda = zda - (zn * zm)
1001     SingleEmissionCheckScope guard(this);
1002     mls(zd, pg, zn, zm);
1003   } else if (zd.Aliases(zn)) {
1004     // zdn = za - (zdn * zm)
1005     SingleEmissionCheckScope guard(this);
1006     msb(zd, pg, zm, za);
1007   } else if (zd.Aliases(zm)) {
1008     // Multiplication is commutative, so we can swap zn and zm.
1009     // zdm = za - (zdm * zn)
1010     SingleEmissionCheckScope guard(this);
1011     msb(zd, pg, zn, za);
1012   } else {
1013     // zd = za - (zn * zm)
1014     ExactAssemblyScope guard(this, 2 * kInstructionSize);
1015     movprfx(zd, pg, za);
1016     mls(zd, pg, zn, zm);
1017   }
1018 }
1019 
CompareHelper(Condition cond,const PRegisterWithLaneSize & pd,const PRegisterZ & pg,const ZRegister & zn,IntegerOperand imm)1020 void MacroAssembler::CompareHelper(Condition cond,
1021                                    const PRegisterWithLaneSize& pd,
1022                                    const PRegisterZ& pg,
1023                                    const ZRegister& zn,
1024                                    IntegerOperand imm) {
1025   UseScratchRegisterScope temps(this);
1026   ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1027   Dup(zm, imm);
1028   SingleEmissionCheckScope guard(this);
1029   cmp(cond, pd, pg, zn, zm);
1030 }
1031 
Pfirst(const PRegisterWithLaneSize & pd,const PRegister & pg,const PRegisterWithLaneSize & pn)1032 void MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd,
1033                             const PRegister& pg,
1034                             const PRegisterWithLaneSize& pn) {
1035   VIXL_ASSERT(allow_macro_instructions_);
1036   VIXL_ASSERT(pd.IsLaneSizeB());
1037   VIXL_ASSERT(pn.IsLaneSizeB());
1038   if (pd.Is(pn)) {
1039     SingleEmissionCheckScope guard(this);
1040     pfirst(pd, pg, pn);
1041   } else {
1042     UseScratchRegisterScope temps(this);
1043     PRegister temp_pg = pg;
1044     if (pd.Aliases(pg)) {
1045       temp_pg = temps.AcquireP();
1046       Mov(temp_pg.VnB(), pg.VnB());
1047     }
1048     Mov(pd, pn);
1049     SingleEmissionCheckScope guard(this);
1050     pfirst(pd, temp_pg, pd);
1051   }
1052 }
1053 
Pnext(const PRegisterWithLaneSize & pd,const PRegister & pg,const PRegisterWithLaneSize & pn)1054 void MacroAssembler::Pnext(const PRegisterWithLaneSize& pd,
1055                            const PRegister& pg,
1056                            const PRegisterWithLaneSize& pn) {
1057   VIXL_ASSERT(allow_macro_instructions_);
1058   VIXL_ASSERT(AreSameFormat(pd, pn));
1059   if (pd.Is(pn)) {
1060     SingleEmissionCheckScope guard(this);
1061     pnext(pd, pg, pn);
1062   } else {
1063     UseScratchRegisterScope temps(this);
1064     PRegister temp_pg = pg;
1065     if (pd.Aliases(pg)) {
1066       temp_pg = temps.AcquireP();
1067       Mov(temp_pg.VnB(), pg.VnB());
1068     }
1069     Mov(pd.VnB(), pn.VnB());
1070     SingleEmissionCheckScope guard(this);
1071     pnext(pd, temp_pg, pd);
1072   }
1073 }
1074 
Ptrue(const PRegisterWithLaneSize & pd,SVEPredicateConstraint pattern,FlagsUpdate s)1075 void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd,
1076                            SVEPredicateConstraint pattern,
1077                            FlagsUpdate s) {
1078   VIXL_ASSERT(allow_macro_instructions_);
1079   switch (s) {
1080     case LeaveFlags:
1081       Ptrue(pd, pattern);
1082       return;
1083     case SetFlags:
1084       Ptrues(pd, pattern);
1085       return;
1086   }
1087   VIXL_UNREACHABLE();
1088 }
1089 
Sub(const ZRegister & zd,IntegerOperand imm,const ZRegister & zm)1090 void MacroAssembler::Sub(const ZRegister& zd,
1091                          IntegerOperand imm,
1092                          const ZRegister& zm) {
1093   VIXL_ASSERT(allow_macro_instructions_);
1094 
1095   int imm8;
1096   int shift = -1;
1097   if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
1098       imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
1099     MovprfxHelperScope guard(this, zd, zm);
1100     subr(zd, zd, imm8, shift);
1101   } else {
1102     UseScratchRegisterScope temps(this);
1103     ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits());
1104     Dup(scratch, imm);
1105 
1106     SingleEmissionCheckScope guard(this);
1107     sub(zd, scratch, zm);
1108   }
1109 }
1110 
SVELoadBroadcastImmHelper(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr,SVELoadBroadcastFn fn,int divisor)1111 void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt,
1112                                                const PRegisterZ& pg,
1113                                                const SVEMemOperand& addr,
1114                                                SVELoadBroadcastFn fn,
1115                                                int divisor) {
1116   VIXL_ASSERT(addr.IsScalarPlusImmediate());
1117   int64_t imm = addr.GetImmediateOffset();
1118   if ((imm % divisor == 0) && IsUint6(imm / divisor)) {
1119     SingleEmissionCheckScope guard(this);
1120     (this->*fn)(zt, pg, addr);
1121   } else {
1122     UseScratchRegisterScope temps(this);
1123     Register scratch = temps.AcquireX();
1124     CalculateSVEAddress(scratch, addr, zt);
1125     SingleEmissionCheckScope guard(this);
1126     (this->*fn)(zt, pg, SVEMemOperand(scratch));
1127   }
1128 }
1129 
SVELoadStoreScalarImmHelper(const CPURegister & rt,const SVEMemOperand & addr,SVELoadStoreFn fn)1130 void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt,
1131                                                  const SVEMemOperand& addr,
1132                                                  SVELoadStoreFn fn) {
1133   VIXL_ASSERT(allow_macro_instructions_);
1134   VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister());
1135 
1136   if (addr.IsPlainScalar() ||
1137       (addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) &&
1138        addr.IsMulVl())) {
1139     SingleEmissionCheckScope guard(this);
1140     (this->*fn)(rt, addr);
1141     return;
1142   }
1143 
1144   if (addr.IsEquivalentToScalar()) {
1145     SingleEmissionCheckScope guard(this);
1146     (this->*fn)(rt, SVEMemOperand(addr.GetScalarBase()));
1147     return;
1148   }
1149 
1150   UseScratchRegisterScope temps(this);
1151   Register scratch = temps.AcquireX();
1152   CalculateSVEAddress(scratch, addr, rt);
1153   SingleEmissionCheckScope guard(this);
1154   (this->*fn)(rt, SVEMemOperand(scratch));
1155 }
1156 
1157 template <typename Tg, typename Tf>
SVELoadStoreNTBroadcastQOHelper(const ZRegister & zt,const Tg & pg,const SVEMemOperand & addr,Tf fn,int imm_bits,int shift_amount,SVEOffsetModifier supported_modifier,int vl_divisor_log2)1158 void MacroAssembler::SVELoadStoreNTBroadcastQOHelper(
1159     const ZRegister& zt,
1160     const Tg& pg,
1161     const SVEMemOperand& addr,
1162     Tf fn,
1163     int imm_bits,
1164     int shift_amount,
1165     SVEOffsetModifier supported_modifier,
1166     int vl_divisor_log2) {
1167   VIXL_ASSERT(allow_macro_instructions_);
1168   int imm_divisor = 1 << shift_amount;
1169 
1170   if (addr.IsPlainScalar() ||
1171       (addr.IsScalarPlusImmediate() &&
1172        IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) &&
1173        ((addr.GetImmediateOffset() % imm_divisor) == 0) &&
1174        (addr.GetOffsetModifier() == supported_modifier))) {
1175     SingleEmissionCheckScope guard(this);
1176     (this->*fn)(zt, pg, addr);
1177     return;
1178   }
1179 
1180   if (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
1181       addr.IsEquivalentToLSL(zt.GetLaneSizeInBytesLog2())) {
1182     SingleEmissionCheckScope guard(this);
1183     (this->*fn)(zt, pg, addr);
1184     return;
1185   }
1186 
1187   if (addr.IsEquivalentToScalar()) {
1188     SingleEmissionCheckScope guard(this);
1189     (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
1190     return;
1191   }
1192 
1193   if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) &&
1194       (vl_divisor_log2 == -1)) {
1195     // We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL
1196     // dependent.
1197     VIXL_UNIMPLEMENTED();
1198   }
1199 
1200   UseScratchRegisterScope temps(this);
1201   Register scratch = temps.AcquireX();
1202   CalculateSVEAddress(scratch, addr, vl_divisor_log2);
1203   SingleEmissionCheckScope guard(this);
1204   (this->*fn)(zt, pg, SVEMemOperand(scratch));
1205 }
1206 
1207 template <typename Tg, typename Tf>
SVELoadStore1Helper(int msize_in_bytes_log2,const ZRegister & zt,const Tg & pg,const SVEMemOperand & addr,Tf fn)1208 void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2,
1209                                          const ZRegister& zt,
1210                                          const Tg& pg,
1211                                          const SVEMemOperand& addr,
1212                                          Tf fn) {
1213   if (addr.IsPlainScalar() ||
1214       (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
1215        addr.IsEquivalentToLSL(msize_in_bytes_log2)) ||
1216       (addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) &&
1217        addr.IsMulVl())) {
1218     SingleEmissionCheckScope guard(this);
1219     (this->*fn)(zt, pg, addr);
1220     return;
1221   }
1222 
1223   if (addr.IsEquivalentToScalar()) {
1224     SingleEmissionCheckScope guard(this);
1225     (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
1226     return;
1227   }
1228 
1229   if (addr.IsVectorPlusImmediate()) {
1230     uint64_t offset = addr.GetImmediateOffset();
1231     if (IsMultiple(offset, (1 << msize_in_bytes_log2)) &&
1232         IsUint5(offset >> msize_in_bytes_log2)) {
1233       SingleEmissionCheckScope guard(this);
1234       (this->*fn)(zt, pg, addr);
1235       return;
1236     }
1237   }
1238 
1239   if (addr.IsScalarPlusVector()) {
1240     VIXL_ASSERT(addr.IsScatterGather());
1241     SingleEmissionCheckScope guard(this);
1242     (this->*fn)(zt, pg, addr);
1243     return;
1244   }
1245 
1246   UseScratchRegisterScope temps(this);
1247   if (addr.IsScatterGather()) {
1248     // In scatter-gather modes, zt and zn/zm have the same lane size. However,
1249     // for 32-bit accesses, the result of each lane's address calculation still
1250     // requires 64 bits; we can't naively use `Adr` for the address calculation
1251     // because it would truncate each address to 32 bits.
1252 
1253     if (addr.IsVectorPlusImmediate()) {
1254       // Synthesise the immediate in an X register, then use a
1255       // scalar-plus-vector access with the original vector.
1256       Register scratch = temps.AcquireX();
1257       Mov(scratch, addr.GetImmediateOffset());
1258       SingleEmissionCheckScope guard(this);
1259       SVEOffsetModifier om =
1260           zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER;
1261       (this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om));
1262       return;
1263     }
1264 
1265     VIXL_UNIMPLEMENTED();
1266   } else {
1267     Register scratch = temps.AcquireX();
1268     // TODO: If we have an immediate offset that is a multiple of
1269     // msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to
1270     // save an instruction.
1271     int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2;
1272     CalculateSVEAddress(scratch, addr, vl_divisor_log2);
1273     SingleEmissionCheckScope guard(this);
1274     (this->*fn)(zt, pg, SVEMemOperand(scratch));
1275   }
1276 }
1277 
1278 template <typename Tf>
SVELoadFFHelper(int msize_in_bytes_log2,const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr,Tf fn)1279 void MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2,
1280                                      const ZRegister& zt,
1281                                      const PRegisterZ& pg,
1282                                      const SVEMemOperand& addr,
1283                                      Tf fn) {
1284   if (addr.IsScatterGather()) {
1285     // Scatter-gather first-fault loads share encodings with normal loads.
1286     SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn);
1287     return;
1288   }
1289 
1290   // Contiguous first-faulting loads have no scalar-plus-immediate form at all,
1291   // so we don't do immediate synthesis.
1292 
1293   // We cannot currently distinguish "[x0]" from "[x0, #0]", and this
1294   // is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here.
1295   if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() &&
1296                                addr.IsEquivalentToLSL(msize_in_bytes_log2))) {
1297     SingleEmissionCheckScope guard(this);
1298     (this->*fn)(zt, pg, addr);
1299     return;
1300   }
1301 
1302   VIXL_UNIMPLEMENTED();
1303 }
1304 
Ld1b(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1305 void MacroAssembler::Ld1b(const ZRegister& zt,
1306                           const PRegisterZ& pg,
1307                           const SVEMemOperand& addr) {
1308   VIXL_ASSERT(allow_macro_instructions_);
1309   SVELoadStore1Helper(kBRegSizeInBytesLog2,
1310                       zt,
1311                       pg,
1312                       addr,
1313                       static_cast<SVELoad1Fn>(&Assembler::ld1b));
1314 }
1315 
Ld1h(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1316 void MacroAssembler::Ld1h(const ZRegister& zt,
1317                           const PRegisterZ& pg,
1318                           const SVEMemOperand& addr) {
1319   VIXL_ASSERT(allow_macro_instructions_);
1320   SVELoadStore1Helper(kHRegSizeInBytesLog2,
1321                       zt,
1322                       pg,
1323                       addr,
1324                       static_cast<SVELoad1Fn>(&Assembler::ld1h));
1325 }
1326 
Ld1w(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1327 void MacroAssembler::Ld1w(const ZRegister& zt,
1328                           const PRegisterZ& pg,
1329                           const SVEMemOperand& addr) {
1330   VIXL_ASSERT(allow_macro_instructions_);
1331   SVELoadStore1Helper(kWRegSizeInBytesLog2,
1332                       zt,
1333                       pg,
1334                       addr,
1335                       static_cast<SVELoad1Fn>(&Assembler::ld1w));
1336 }
1337 
Ld1d(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1338 void MacroAssembler::Ld1d(const ZRegister& zt,
1339                           const PRegisterZ& pg,
1340                           const SVEMemOperand& addr) {
1341   VIXL_ASSERT(allow_macro_instructions_);
1342   SVELoadStore1Helper(kDRegSizeInBytesLog2,
1343                       zt,
1344                       pg,
1345                       addr,
1346                       static_cast<SVELoad1Fn>(&Assembler::ld1d));
1347 }
1348 
Ld1sb(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1349 void MacroAssembler::Ld1sb(const ZRegister& zt,
1350                            const PRegisterZ& pg,
1351                            const SVEMemOperand& addr) {
1352   VIXL_ASSERT(allow_macro_instructions_);
1353   SVELoadStore1Helper(kBRegSizeInBytesLog2,
1354                       zt,
1355                       pg,
1356                       addr,
1357                       static_cast<SVELoad1Fn>(&Assembler::ld1sb));
1358 }
1359 
Ld1sh(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1360 void MacroAssembler::Ld1sh(const ZRegister& zt,
1361                            const PRegisterZ& pg,
1362                            const SVEMemOperand& addr) {
1363   VIXL_ASSERT(allow_macro_instructions_);
1364   SVELoadStore1Helper(kHRegSizeInBytesLog2,
1365                       zt,
1366                       pg,
1367                       addr,
1368                       static_cast<SVELoad1Fn>(&Assembler::ld1sh));
1369 }
1370 
Ld1sw(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1371 void MacroAssembler::Ld1sw(const ZRegister& zt,
1372                            const PRegisterZ& pg,
1373                            const SVEMemOperand& addr) {
1374   VIXL_ASSERT(allow_macro_instructions_);
1375   SVELoadStore1Helper(kSRegSizeInBytesLog2,
1376                       zt,
1377                       pg,
1378                       addr,
1379                       static_cast<SVELoad1Fn>(&Assembler::ld1sw));
1380 }
1381 
St1b(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1382 void MacroAssembler::St1b(const ZRegister& zt,
1383                           const PRegister& pg,
1384                           const SVEMemOperand& addr) {
1385   VIXL_ASSERT(allow_macro_instructions_);
1386   SVELoadStore1Helper(kBRegSizeInBytesLog2,
1387                       zt,
1388                       pg,
1389                       addr,
1390                       static_cast<SVEStore1Fn>(&Assembler::st1b));
1391 }
1392 
St1h(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1393 void MacroAssembler::St1h(const ZRegister& zt,
1394                           const PRegister& pg,
1395                           const SVEMemOperand& addr) {
1396   VIXL_ASSERT(allow_macro_instructions_);
1397   SVELoadStore1Helper(kHRegSizeInBytesLog2,
1398                       zt,
1399                       pg,
1400                       addr,
1401                       static_cast<SVEStore1Fn>(&Assembler::st1h));
1402 }
1403 
St1w(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1404 void MacroAssembler::St1w(const ZRegister& zt,
1405                           const PRegister& pg,
1406                           const SVEMemOperand& addr) {
1407   VIXL_ASSERT(allow_macro_instructions_);
1408   SVELoadStore1Helper(kSRegSizeInBytesLog2,
1409                       zt,
1410                       pg,
1411                       addr,
1412                       static_cast<SVEStore1Fn>(&Assembler::st1w));
1413 }
1414 
St1d(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1415 void MacroAssembler::St1d(const ZRegister& zt,
1416                           const PRegister& pg,
1417                           const SVEMemOperand& addr) {
1418   VIXL_ASSERT(allow_macro_instructions_);
1419   SVELoadStore1Helper(kDRegSizeInBytesLog2,
1420                       zt,
1421                       pg,
1422                       addr,
1423                       static_cast<SVEStore1Fn>(&Assembler::st1d));
1424 }
1425 
Ldff1b(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1426 void MacroAssembler::Ldff1b(const ZRegister& zt,
1427                             const PRegisterZ& pg,
1428                             const SVEMemOperand& addr) {
1429   VIXL_ASSERT(allow_macro_instructions_);
1430   SVELoadFFHelper(kBRegSizeInBytesLog2,
1431                   zt,
1432                   pg,
1433                   addr,
1434                   static_cast<SVELoad1Fn>(&Assembler::ldff1b));
1435 }
1436 
Ldff1h(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1437 void MacroAssembler::Ldff1h(const ZRegister& zt,
1438                             const PRegisterZ& pg,
1439                             const SVEMemOperand& addr) {
1440   VIXL_ASSERT(allow_macro_instructions_);
1441   SVELoadFFHelper(kHRegSizeInBytesLog2,
1442                   zt,
1443                   pg,
1444                   addr,
1445                   static_cast<SVELoad1Fn>(&Assembler::ldff1h));
1446 }
1447 
Ldff1w(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1448 void MacroAssembler::Ldff1w(const ZRegister& zt,
1449                             const PRegisterZ& pg,
1450                             const SVEMemOperand& addr) {
1451   VIXL_ASSERT(allow_macro_instructions_);
1452   SVELoadFFHelper(kSRegSizeInBytesLog2,
1453                   zt,
1454                   pg,
1455                   addr,
1456                   static_cast<SVELoad1Fn>(&Assembler::ldff1w));
1457 }
1458 
Ldff1d(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1459 void MacroAssembler::Ldff1d(const ZRegister& zt,
1460                             const PRegisterZ& pg,
1461                             const SVEMemOperand& addr) {
1462   VIXL_ASSERT(allow_macro_instructions_);
1463   SVELoadFFHelper(kDRegSizeInBytesLog2,
1464                   zt,
1465                   pg,
1466                   addr,
1467                   static_cast<SVELoad1Fn>(&Assembler::ldff1d));
1468 }
1469 
Ldff1sb(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1470 void MacroAssembler::Ldff1sb(const ZRegister& zt,
1471                              const PRegisterZ& pg,
1472                              const SVEMemOperand& addr) {
1473   VIXL_ASSERT(allow_macro_instructions_);
1474   SVELoadFFHelper(kBRegSizeInBytesLog2,
1475                   zt,
1476                   pg,
1477                   addr,
1478                   static_cast<SVELoad1Fn>(&Assembler::ldff1sb));
1479 }
1480 
Ldff1sh(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1481 void MacroAssembler::Ldff1sh(const ZRegister& zt,
1482                              const PRegisterZ& pg,
1483                              const SVEMemOperand& addr) {
1484   VIXL_ASSERT(allow_macro_instructions_);
1485   SVELoadFFHelper(kHRegSizeInBytesLog2,
1486                   zt,
1487                   pg,
1488                   addr,
1489                   static_cast<SVELoad1Fn>(&Assembler::ldff1sh));
1490 }
1491 
Ldff1sw(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1492 void MacroAssembler::Ldff1sw(const ZRegister& zt,
1493                              const PRegisterZ& pg,
1494                              const SVEMemOperand& addr) {
1495   VIXL_ASSERT(allow_macro_instructions_);
1496   SVELoadFFHelper(kSRegSizeInBytesLog2,
1497                   zt,
1498                   pg,
1499                   addr,
1500                   static_cast<SVELoad1Fn>(&Assembler::ldff1sw));
1501 }
1502 
1503 #define VIXL_SVE_LD1R_LIST(V) \
1504   V(qb, 4) V(qh, 4) V(qw, 4) V(qd, 4) V(ob, 5) V(oh, 5) V(ow, 5) V(od, 5)
1505 
1506 #define VIXL_DEFINE_MASM_FUNC(SZ, SH)                          \
1507   void MacroAssembler::Ld1r##SZ(const ZRegister& zt,           \
1508                                 const PRegisterZ& pg,          \
1509                                 const SVEMemOperand& addr) {   \
1510     VIXL_ASSERT(allow_macro_instructions_);                    \
1511     SVELoadStoreNTBroadcastQOHelper(zt,                        \
1512                                     pg,                        \
1513                                     addr,                      \
1514                                     &MacroAssembler::ld1r##SZ, \
1515                                     4,                         \
1516                                     SH,                        \
1517                                     NO_SVE_OFFSET_MODIFIER,    \
1518                                     -1);                       \
1519   }
1520 
VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC)1521 VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC)
1522 
1523 #undef VIXL_DEFINE_MASM_FUNC
1524 #undef VIXL_SVE_LD1R_LIST
1525 
1526 void MacroAssembler::Ldnt1b(const ZRegister& zt,
1527                             const PRegisterZ& pg,
1528                             const SVEMemOperand& addr) {
1529   VIXL_ASSERT(allow_macro_instructions_);
1530   if (addr.IsVectorPlusScalar()) {
1531     SingleEmissionCheckScope guard(this);
1532     ldnt1b(zt, pg, addr);
1533   } else {
1534     SVELoadStoreNTBroadcastQOHelper(zt,
1535                                     pg,
1536                                     addr,
1537                                     &MacroAssembler::ldnt1b,
1538                                     4,
1539                                     0,
1540                                     SVE_MUL_VL);
1541   }
1542 }
1543 
Ldnt1d(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1544 void MacroAssembler::Ldnt1d(const ZRegister& zt,
1545                             const PRegisterZ& pg,
1546                             const SVEMemOperand& addr) {
1547   VIXL_ASSERT(allow_macro_instructions_);
1548   if (addr.IsVectorPlusScalar()) {
1549     SingleEmissionCheckScope guard(this);
1550     ldnt1d(zt, pg, addr);
1551   } else {
1552     SVELoadStoreNTBroadcastQOHelper(zt,
1553                                     pg,
1554                                     addr,
1555                                     &MacroAssembler::ldnt1d,
1556                                     4,
1557                                     0,
1558                                     SVE_MUL_VL);
1559   }
1560 }
1561 
Ldnt1h(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1562 void MacroAssembler::Ldnt1h(const ZRegister& zt,
1563                             const PRegisterZ& pg,
1564                             const SVEMemOperand& addr) {
1565   VIXL_ASSERT(allow_macro_instructions_);
1566   if (addr.IsVectorPlusScalar()) {
1567     SingleEmissionCheckScope guard(this);
1568     ldnt1h(zt, pg, addr);
1569   } else {
1570     SVELoadStoreNTBroadcastQOHelper(zt,
1571                                     pg,
1572                                     addr,
1573                                     &MacroAssembler::ldnt1h,
1574                                     4,
1575                                     0,
1576                                     SVE_MUL_VL);
1577   }
1578 }
1579 
Ldnt1w(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1580 void MacroAssembler::Ldnt1w(const ZRegister& zt,
1581                             const PRegisterZ& pg,
1582                             const SVEMemOperand& addr) {
1583   VIXL_ASSERT(allow_macro_instructions_);
1584   if (addr.IsVectorPlusScalar()) {
1585     SingleEmissionCheckScope guard(this);
1586     ldnt1w(zt, pg, addr);
1587   } else {
1588     SVELoadStoreNTBroadcastQOHelper(zt,
1589                                     pg,
1590                                     addr,
1591                                     &MacroAssembler::ldnt1w,
1592                                     4,
1593                                     0,
1594                                     SVE_MUL_VL);
1595   }
1596 }
1597 
Stnt1b(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1598 void MacroAssembler::Stnt1b(const ZRegister& zt,
1599                             const PRegister& pg,
1600                             const SVEMemOperand& addr) {
1601   VIXL_ASSERT(allow_macro_instructions_);
1602   if (addr.IsVectorPlusScalar()) {
1603     SingleEmissionCheckScope guard(this);
1604     stnt1b(zt, pg, addr);
1605   } else {
1606     SVELoadStoreNTBroadcastQOHelper(zt,
1607                                     pg,
1608                                     addr,
1609                                     &MacroAssembler::stnt1b,
1610                                     4,
1611                                     0,
1612                                     SVE_MUL_VL);
1613   }
1614 }
Stnt1d(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1615 void MacroAssembler::Stnt1d(const ZRegister& zt,
1616                             const PRegister& pg,
1617                             const SVEMemOperand& addr) {
1618   VIXL_ASSERT(allow_macro_instructions_);
1619   if (addr.IsVectorPlusScalar()) {
1620     SingleEmissionCheckScope guard(this);
1621     stnt1d(zt, pg, addr);
1622   } else {
1623     SVELoadStoreNTBroadcastQOHelper(zt,
1624                                     pg,
1625                                     addr,
1626                                     &MacroAssembler::stnt1d,
1627                                     4,
1628                                     0,
1629                                     SVE_MUL_VL);
1630   }
1631 }
Stnt1h(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1632 void MacroAssembler::Stnt1h(const ZRegister& zt,
1633                             const PRegister& pg,
1634                             const SVEMemOperand& addr) {
1635   VIXL_ASSERT(allow_macro_instructions_);
1636   if (addr.IsVectorPlusScalar()) {
1637     SingleEmissionCheckScope guard(this);
1638     stnt1h(zt, pg, addr);
1639   } else {
1640     SVELoadStoreNTBroadcastQOHelper(zt,
1641                                     pg,
1642                                     addr,
1643                                     &MacroAssembler::stnt1h,
1644                                     4,
1645                                     0,
1646                                     SVE_MUL_VL);
1647   }
1648 }
Stnt1w(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1649 void MacroAssembler::Stnt1w(const ZRegister& zt,
1650                             const PRegister& pg,
1651                             const SVEMemOperand& addr) {
1652   VIXL_ASSERT(allow_macro_instructions_);
1653   if (addr.IsVectorPlusScalar()) {
1654     SingleEmissionCheckScope guard(this);
1655     stnt1w(zt, pg, addr);
1656   } else {
1657     SVELoadStoreNTBroadcastQOHelper(zt,
1658                                     pg,
1659                                     addr,
1660                                     &MacroAssembler::stnt1w,
1661                                     4,
1662                                     0,
1663                                     SVE_MUL_VL);
1664   }
1665 }
1666 
SVEDotIndexHelper(ZZZImmFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1667 void MacroAssembler::SVEDotIndexHelper(ZZZImmFn fn,
1668                                        const ZRegister& zd,
1669                                        const ZRegister& za,
1670                                        const ZRegister& zn,
1671                                        const ZRegister& zm,
1672                                        int index) {
1673   if (zd.Aliases(za)) {
1674     // zda = zda + (zn . zm)
1675     SingleEmissionCheckScope guard(this);
1676     (this->*fn)(zd, zn, zm, index);
1677 
1678   } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
1679     // zdn = za + (zdn . zm[index])
1680     // zdm = za + (zn . zdm[index])
1681     // zdnm = za + (zdnm . zdnm[index])
1682     UseScratchRegisterScope temps(this);
1683     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1684     {
1685       MovprfxHelperScope guard(this, scratch, za);
1686       (this->*fn)(scratch, zn, zm, index);
1687     }
1688 
1689     Mov(zd, scratch);
1690   } else {
1691     // zd = za + (zn . zm)
1692     MovprfxHelperScope guard(this, zd, za);
1693     (this->*fn)(zd, zn, zm, index);
1694   }
1695 }
1696 
FourRegDestructiveHelper(Int3ArithFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)1697 void MacroAssembler::FourRegDestructiveHelper(Int3ArithFn fn,
1698                                               const ZRegister& zd,
1699                                               const ZRegister& za,
1700                                               const ZRegister& zn,
1701                                               const ZRegister& zm) {
1702   if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1703     // zd = za . zd . zm
1704     // zd = za . zn . zd
1705     // zd = za . zd . zd
1706     UseScratchRegisterScope temps(this);
1707     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1708     {
1709       MovprfxHelperScope guard(this, scratch, za);
1710       (this->*fn)(scratch, zn, zm);
1711     }
1712 
1713     Mov(zd, scratch);
1714   } else {
1715     MovprfxHelperScope guard(this, zd, za);
1716     (this->*fn)(zd, zn, zm);
1717   }
1718 }
1719 
FourRegDestructiveHelper(Int4ArithFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)1720 void MacroAssembler::FourRegDestructiveHelper(Int4ArithFn fn,
1721                                               const ZRegister& zd,
1722                                               const ZRegister& za,
1723                                               const ZRegister& zn,
1724                                               const ZRegister& zm) {
1725   if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1726     // zd = za . zd . zm
1727     // zd = za . zn . zd
1728     // zd = za . zd . zd
1729     UseScratchRegisterScope temps(this);
1730     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1731     {
1732       MovprfxHelperScope guard(this, scratch, za);
1733       (this->*fn)(scratch, scratch, zn, zm);
1734     }
1735 
1736     Mov(zd, scratch);
1737   } else {
1738     MovprfxHelperScope guard(this, zd, za);
1739     (this->*fn)(zd, zd, zn, zm);
1740   }
1741 }
1742 
FourRegOneImmDestructiveHelper(ZZZImmFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int imm)1743 void MacroAssembler::FourRegOneImmDestructiveHelper(ZZZImmFn fn,
1744                                                     const ZRegister& zd,
1745                                                     const ZRegister& za,
1746                                                     const ZRegister& zn,
1747                                                     const ZRegister& zm,
1748                                                     int imm) {
1749   if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1750     // zd = za . zd . zm[i]
1751     // zd = za . zn . zd[i]
1752     // zd = za . zd . zd[i]
1753     UseScratchRegisterScope temps(this);
1754     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1755     {
1756       MovprfxHelperScope guard(this, scratch, za);
1757       (this->*fn)(scratch, zn, zm, imm);
1758     }
1759 
1760     Mov(zd, scratch);
1761   } else {
1762     // zd = za . zn . zm[i]
1763     MovprfxHelperScope guard(this, zd, za);
1764     (this->*fn)(zd, zn, zm, imm);
1765   }
1766 }
1767 
AbsoluteDifferenceAccumulate(Int3ArithFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)1768 void MacroAssembler::AbsoluteDifferenceAccumulate(Int3ArithFn fn,
1769                                                   const ZRegister& zd,
1770                                                   const ZRegister& za,
1771                                                   const ZRegister& zn,
1772                                                   const ZRegister& zm) {
1773   if (zn.Aliases(zm)) {
1774     // If zn == zm, the difference is zero.
1775     if (!zd.Aliases(za)) {
1776       Mov(zd, za);
1777     }
1778   } else if (zd.Aliases(za)) {
1779     SingleEmissionCheckScope guard(this);
1780     (this->*fn)(zd, zn, zm);
1781   } else if (zd.Aliases(zn)) {
1782     UseScratchRegisterScope temps(this);
1783     ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1784     Mov(ztmp, zn);
1785     MovprfxHelperScope guard(this, zd, za);
1786     (this->*fn)(zd, ztmp, zm);
1787   } else if (zd.Aliases(zm)) {
1788     UseScratchRegisterScope temps(this);
1789     ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1790     Mov(ztmp, zm);
1791     MovprfxHelperScope guard(this, zd, za);
1792     (this->*fn)(zd, zn, ztmp);
1793   } else {
1794     MovprfxHelperScope guard(this, zd, za);
1795     (this->*fn)(zd, zn, zm);
1796   }
1797 }
1798 
1799 #define VIXL_SVE_4REG_LIST(V)                       \
1800   V(Saba, saba, AbsoluteDifferenceAccumulate)       \
1801   V(Uaba, uaba, AbsoluteDifferenceAccumulate)       \
1802   V(Sabalb, sabalb, AbsoluteDifferenceAccumulate)   \
1803   V(Sabalt, sabalt, AbsoluteDifferenceAccumulate)   \
1804   V(Uabalb, uabalb, AbsoluteDifferenceAccumulate)   \
1805   V(Uabalt, uabalt, AbsoluteDifferenceAccumulate)   \
1806   V(Sdot, sdot, FourRegDestructiveHelper)           \
1807   V(Udot, udot, FourRegDestructiveHelper)           \
1808   V(Adclb, adclb, FourRegDestructiveHelper)         \
1809   V(Adclt, adclt, FourRegDestructiveHelper)         \
1810   V(Sbclb, sbclb, FourRegDestructiveHelper)         \
1811   V(Sbclt, sbclt, FourRegDestructiveHelper)         \
1812   V(Smlalb, smlalb, FourRegDestructiveHelper)       \
1813   V(Smlalt, smlalt, FourRegDestructiveHelper)       \
1814   V(Smlslb, smlslb, FourRegDestructiveHelper)       \
1815   V(Smlslt, smlslt, FourRegDestructiveHelper)       \
1816   V(Umlalb, umlalb, FourRegDestructiveHelper)       \
1817   V(Umlalt, umlalt, FourRegDestructiveHelper)       \
1818   V(Umlslb, umlslb, FourRegDestructiveHelper)       \
1819   V(Umlslt, umlslt, FourRegDestructiveHelper)       \
1820   V(Bcax, bcax, FourRegDestructiveHelper)           \
1821   V(Bsl, bsl, FourRegDestructiveHelper)             \
1822   V(Bsl1n, bsl1n, FourRegDestructiveHelper)         \
1823   V(Bsl2n, bsl2n, FourRegDestructiveHelper)         \
1824   V(Eor3, eor3, FourRegDestructiveHelper)           \
1825   V(Nbsl, nbsl, FourRegDestructiveHelper)           \
1826   V(Fmlalb, fmlalb, FourRegDestructiveHelper)       \
1827   V(Fmlalt, fmlalt, FourRegDestructiveHelper)       \
1828   V(Fmlslb, fmlslb, FourRegDestructiveHelper)       \
1829   V(Fmlslt, fmlslt, FourRegDestructiveHelper)       \
1830   V(Sqdmlalb, sqdmlalb, FourRegDestructiveHelper)   \
1831   V(Sqdmlalbt, sqdmlalbt, FourRegDestructiveHelper) \
1832   V(Sqdmlalt, sqdmlalt, FourRegDestructiveHelper)   \
1833   V(Sqdmlslb, sqdmlslb, FourRegDestructiveHelper)   \
1834   V(Sqdmlslbt, sqdmlslbt, FourRegDestructiveHelper) \
1835   V(Sqdmlslt, sqdmlslt, FourRegDestructiveHelper)   \
1836   V(Sqrdmlah, sqrdmlah, FourRegDestructiveHelper)   \
1837   V(Sqrdmlsh, sqrdmlsh, FourRegDestructiveHelper)   \
1838   V(Fmmla, fmmla, FourRegDestructiveHelper)         \
1839   V(Smmla, smmla, FourRegDestructiveHelper)         \
1840   V(Ummla, ummla, FourRegDestructiveHelper)         \
1841   V(Usmmla, usmmla, FourRegDestructiveHelper)       \
1842   V(Usdot, usdot, FourRegDestructiveHelper)
1843 
1844 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
1845   void MacroAssembler::MASMFN(const ZRegister& zd,   \
1846                               const ZRegister& za,   \
1847                               const ZRegister& zn,   \
1848                               const ZRegister& zm) { \
1849     VIXL_ASSERT(allow_macro_instructions_);          \
1850     HELPER(&Assembler::ASMFN, zd, za, zn, zm);       \
1851   }
1852 VIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC)
1853 #undef VIXL_DEFINE_MASM_FUNC
1854 
1855 #define VIXL_SVE_4REG_1IMM_LIST(V)                      \
1856   V(Fmla, fmla, FourRegOneImmDestructiveHelper)         \
1857   V(Fmls, fmls, FourRegOneImmDestructiveHelper)         \
1858   V(Fmlalb, fmlalb, FourRegOneImmDestructiveHelper)     \
1859   V(Fmlalt, fmlalt, FourRegOneImmDestructiveHelper)     \
1860   V(Fmlslb, fmlslb, FourRegOneImmDestructiveHelper)     \
1861   V(Fmlslt, fmlslt, FourRegOneImmDestructiveHelper)     \
1862   V(Mla, mla, FourRegOneImmDestructiveHelper)           \
1863   V(Mls, mls, FourRegOneImmDestructiveHelper)           \
1864   V(Smlalb, smlalb, FourRegOneImmDestructiveHelper)     \
1865   V(Smlalt, smlalt, FourRegOneImmDestructiveHelper)     \
1866   V(Smlslb, smlslb, FourRegOneImmDestructiveHelper)     \
1867   V(Smlslt, smlslt, FourRegOneImmDestructiveHelper)     \
1868   V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \
1869   V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \
1870   V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \
1871   V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \
1872   V(Sqrdmlah, sqrdmlah, FourRegOneImmDestructiveHelper) \
1873   V(Sqrdmlsh, sqrdmlsh, FourRegOneImmDestructiveHelper) \
1874   V(Umlalb, umlalb, FourRegOneImmDestructiveHelper)     \
1875   V(Umlalt, umlalt, FourRegOneImmDestructiveHelper)     \
1876   V(Umlslb, umlslb, FourRegOneImmDestructiveHelper)     \
1877   V(Umlslt, umlslt, FourRegOneImmDestructiveHelper)
1878 
1879 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
1880   void MacroAssembler::MASMFN(const ZRegister& zd,   \
1881                               const ZRegister& za,   \
1882                               const ZRegister& zn,   \
1883                               const ZRegister& zm,   \
1884                               int imm) {             \
1885     VIXL_ASSERT(allow_macro_instructions_);          \
1886     HELPER(&Assembler::ASMFN, zd, za, zn, zm, imm);  \
1887   }
VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC)1888 VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC)
1889 #undef VIXL_DEFINE_MASM_FUNC
1890 
1891 void MacroAssembler::Sdot(const ZRegister& zd,
1892                           const ZRegister& za,
1893                           const ZRegister& zn,
1894                           const ZRegister& zm,
1895                           int index) {
1896   VIXL_ASSERT(allow_macro_instructions_);
1897   SVEDotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index);
1898 }
1899 
Udot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1900 void MacroAssembler::Udot(const ZRegister& zd,
1901                           const ZRegister& za,
1902                           const ZRegister& zn,
1903                           const ZRegister& zm,
1904                           int index) {
1905   VIXL_ASSERT(allow_macro_instructions_);
1906   SVEDotIndexHelper(&Assembler::udot, zd, za, zn, zm, index);
1907 }
1908 
Sudot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1909 void MacroAssembler::Sudot(const ZRegister& zd,
1910                            const ZRegister& za,
1911                            const ZRegister& zn,
1912                            const ZRegister& zm,
1913                            int index) {
1914   VIXL_ASSERT(allow_macro_instructions_);
1915   SVEDotIndexHelper(&Assembler::sudot, zd, za, zn, zm, index);
1916 }
1917 
Usdot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1918 void MacroAssembler::Usdot(const ZRegister& zd,
1919                            const ZRegister& za,
1920                            const ZRegister& zn,
1921                            const ZRegister& zm,
1922                            int index) {
1923   VIXL_ASSERT(allow_macro_instructions_);
1924   SVEDotIndexHelper(&Assembler::usdot, zd, za, zn, zm, index);
1925 }
1926 
Cdot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index,int rot)1927 void MacroAssembler::Cdot(const ZRegister& zd,
1928                           const ZRegister& za,
1929                           const ZRegister& zn,
1930                           const ZRegister& zm,
1931                           int index,
1932                           int rot) {
1933   // This doesn't handle zm when it's out of the range that can be encoded in
1934   // instruction. The range depends on element size: z0-z7 for B, z0-15 for H.
1935   if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
1936     UseScratchRegisterScope temps(this);
1937     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
1938     {
1939       MovprfxHelperScope guard(this, ztmp, za);
1940       cdot(ztmp, zn, zm, index, rot);
1941     }
1942     Mov(zd, ztmp);
1943   } else {
1944     MovprfxHelperScope guard(this, zd, za);
1945     cdot(zd, zn, zm, index, rot);
1946   }
1947 }
1948 
Cdot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int rot)1949 void MacroAssembler::Cdot(const ZRegister& zd,
1950                           const ZRegister& za,
1951                           const ZRegister& zn,
1952                           const ZRegister& zm,
1953                           int rot) {
1954   if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
1955     UseScratchRegisterScope temps(this);
1956     VIXL_ASSERT(AreSameLaneSize(zn, zm));
1957     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
1958     Mov(ztmp, zd.Aliases(zn) ? zn : zm);
1959     MovprfxHelperScope guard(this, zd, za);
1960     cdot(zd, (zd.Aliases(zn) ? ztmp : zn), (zd.Aliases(zm) ? ztmp : zm), rot);
1961   } else {
1962     MovprfxHelperScope guard(this, zd, za);
1963     cdot(zd, zn, zm, rot);
1964   }
1965 }
1966 
FPMulAddHelper(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,SVEMulAddPredicatedZdaFn fn_zda,SVEMulAddPredicatedZdnFn fn_zdn,FPMacroNaNPropagationOption nan_option)1967 void MacroAssembler::FPMulAddHelper(const ZRegister& zd,
1968                                     const PRegisterM& pg,
1969                                     const ZRegister& za,
1970                                     const ZRegister& zn,
1971                                     const ZRegister& zm,
1972                                     SVEMulAddPredicatedZdaFn fn_zda,
1973                                     SVEMulAddPredicatedZdnFn fn_zdn,
1974                                     FPMacroNaNPropagationOption nan_option) {
1975   ResolveFPNaNPropagationOption(&nan_option);
1976 
1977   if (zd.Aliases(za)) {
1978     // zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
1979     SingleEmissionCheckScope guard(this);
1980     (this->*fn_zda)(zd, pg, zn, zm);
1981   } else if (zd.Aliases(zn)) {
1982     // zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb.
1983     SingleEmissionCheckScope guard(this);
1984     (this->*fn_zdn)(zd, pg, zm, za);
1985   } else if (zd.Aliases(zm)) {
1986     switch (nan_option) {
1987       case FastNaNPropagation: {
1988         // We treat multiplication as commutative in the fast mode, so we can
1989         // swap zn and zm.
1990         // zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb.
1991         SingleEmissionCheckScope guard(this);
1992         (this->*fn_zdn)(zd, pg, zn, za);
1993         return;
1994       }
1995       case StrictNaNPropagation: {
1996         UseScratchRegisterScope temps(this);
1997         // Use a scratch register to keep the argument order exactly as
1998         // specified.
1999         ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
2000         {
2001           MovprfxHelperScope guard(this, scratch, pg, za);
2002           // scratch = (-)za + ((-)zn * zm)
2003           (this->*fn_zda)(scratch, pg, zn, zm);
2004         }
2005         Mov(zd, scratch);
2006         return;
2007       }
2008       case NoFPMacroNaNPropagationSelected:
2009         VIXL_UNREACHABLE();
2010         return;
2011     }
2012   } else {
2013     // zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
2014     MovprfxHelperScope guard(this, zd, pg, za);
2015     (this->*fn_zda)(zd, pg, zn, zm);
2016   }
2017 }
2018 
Fmla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2019 void MacroAssembler::Fmla(const ZRegister& zd,
2020                           const PRegisterM& pg,
2021                           const ZRegister& za,
2022                           const ZRegister& zn,
2023                           const ZRegister& zm,
2024                           FPMacroNaNPropagationOption nan_option) {
2025   VIXL_ASSERT(allow_macro_instructions_);
2026   FPMulAddHelper(zd,
2027                  pg,
2028                  za,
2029                  zn,
2030                  zm,
2031                  &Assembler::fmla,
2032                  &Assembler::fmad,
2033                  nan_option);
2034 }
2035 
Fmls(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2036 void MacroAssembler::Fmls(const ZRegister& zd,
2037                           const PRegisterM& pg,
2038                           const ZRegister& za,
2039                           const ZRegister& zn,
2040                           const ZRegister& zm,
2041                           FPMacroNaNPropagationOption nan_option) {
2042   VIXL_ASSERT(allow_macro_instructions_);
2043   FPMulAddHelper(zd,
2044                  pg,
2045                  za,
2046                  zn,
2047                  zm,
2048                  &Assembler::fmls,
2049                  &Assembler::fmsb,
2050                  nan_option);
2051 }
2052 
Fnmla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2053 void MacroAssembler::Fnmla(const ZRegister& zd,
2054                            const PRegisterM& pg,
2055                            const ZRegister& za,
2056                            const ZRegister& zn,
2057                            const ZRegister& zm,
2058                            FPMacroNaNPropagationOption nan_option) {
2059   VIXL_ASSERT(allow_macro_instructions_);
2060   FPMulAddHelper(zd,
2061                  pg,
2062                  za,
2063                  zn,
2064                  zm,
2065                  &Assembler::fnmla,
2066                  &Assembler::fnmad,
2067                  nan_option);
2068 }
2069 
Fnmls(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2070 void MacroAssembler::Fnmls(const ZRegister& zd,
2071                            const PRegisterM& pg,
2072                            const ZRegister& za,
2073                            const ZRegister& zn,
2074                            const ZRegister& zm,
2075                            FPMacroNaNPropagationOption nan_option) {
2076   VIXL_ASSERT(allow_macro_instructions_);
2077   FPMulAddHelper(zd,
2078                  pg,
2079                  za,
2080                  zn,
2081                  zm,
2082                  &Assembler::fnmls,
2083                  &Assembler::fnmsb,
2084                  nan_option);
2085 }
2086 
Ftmad(const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int imm3)2087 void MacroAssembler::Ftmad(const ZRegister& zd,
2088                            const ZRegister& zn,
2089                            const ZRegister& zm,
2090                            int imm3) {
2091   VIXL_ASSERT(allow_macro_instructions_);
2092   if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2093     UseScratchRegisterScope temps(this);
2094     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm);
2095     Mov(scratch, zm);
2096     MovprfxHelperScope guard(this, zd, zn);
2097     ftmad(zd, zd, scratch, imm3);
2098   } else {
2099     MovprfxHelperScope guard(this, zd, zn);
2100     ftmad(zd, zd, zm, imm3);
2101   }
2102 }
2103 
Fcadd(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,int rot)2104 void MacroAssembler::Fcadd(const ZRegister& zd,
2105                            const PRegisterM& pg,
2106                            const ZRegister& zn,
2107                            const ZRegister& zm,
2108                            int rot) {
2109   VIXL_ASSERT(allow_macro_instructions_);
2110   if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2111     UseScratchRegisterScope temps(this);
2112     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2113     {
2114       MovprfxHelperScope guard(this, scratch, pg, zn);
2115       fcadd(scratch, pg, scratch, zm, rot);
2116     }
2117     Mov(zd, scratch);
2118   } else {
2119     MovprfxHelperScope guard(this, zd, pg, zn);
2120     fcadd(zd, pg, zd, zm, rot);
2121   }
2122 }
2123 
Fcmla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int rot)2124 void MacroAssembler::Fcmla(const ZRegister& zd,
2125                            const PRegisterM& pg,
2126                            const ZRegister& za,
2127                            const ZRegister& zn,
2128                            const ZRegister& zm,
2129                            int rot) {
2130   VIXL_ASSERT(allow_macro_instructions_);
2131   if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
2132     UseScratchRegisterScope temps(this);
2133     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
2134     {
2135       MovprfxHelperScope guard(this, ztmp, za);
2136       fcmla(ztmp, pg, zn, zm, rot);
2137     }
2138     Mov(zd, pg, ztmp);
2139   } else {
2140     MovprfxHelperScope guard(this, zd, pg, za);
2141     fcmla(zd, pg, zn, zm, rot);
2142   }
2143 }
2144 
Splice(const ZRegister & zd,const PRegister & pg,const ZRegister & zn,const ZRegister & zm)2145 void MacroAssembler::Splice(const ZRegister& zd,
2146                             const PRegister& pg,
2147                             const ZRegister& zn,
2148                             const ZRegister& zm) {
2149   VIXL_ASSERT(allow_macro_instructions_);
2150   if (CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm) && !zd.Aliases(zn)) {
2151     SingleEmissionCheckScope guard(this);
2152     splice(zd, pg, zn, zm);
2153   } else if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2154     UseScratchRegisterScope temps(this);
2155     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2156     {
2157       MovprfxHelperScope guard(this, scratch, zn);
2158       splice(scratch, pg, scratch, zm);
2159     }
2160     Mov(zd, scratch);
2161   } else {
2162     MovprfxHelperScope guard(this, zd, zn);
2163     splice(zd, pg, zd, zm);
2164   }
2165 }
2166 
Clasta(const ZRegister & zd,const PRegister & pg,const ZRegister & zn,const ZRegister & zm)2167 void MacroAssembler::Clasta(const ZRegister& zd,
2168                             const PRegister& pg,
2169                             const ZRegister& zn,
2170                             const ZRegister& zm) {
2171   VIXL_ASSERT(allow_macro_instructions_);
2172   if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2173     UseScratchRegisterScope temps(this);
2174     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2175     {
2176       MovprfxHelperScope guard(this, scratch, zn);
2177       clasta(scratch, pg, scratch, zm);
2178     }
2179     Mov(zd, scratch);
2180   } else {
2181     MovprfxHelperScope guard(this, zd, zn);
2182     clasta(zd, pg, zd, zm);
2183   }
2184 }
2185 
Clastb(const ZRegister & zd,const PRegister & pg,const ZRegister & zn,const ZRegister & zm)2186 void MacroAssembler::Clastb(const ZRegister& zd,
2187                             const PRegister& pg,
2188                             const ZRegister& zn,
2189                             const ZRegister& zm) {
2190   VIXL_ASSERT(allow_macro_instructions_);
2191   if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2192     UseScratchRegisterScope temps(this);
2193     ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2194     {
2195       MovprfxHelperScope guard(this, scratch, zn);
2196       clastb(scratch, pg, scratch, zm);
2197     }
2198     Mov(zd, scratch);
2199   } else {
2200     MovprfxHelperScope guard(this, zd, zn);
2201     clastb(zd, pg, zd, zm);
2202   }
2203 }
2204 
ShiftRightAccumulate(IntArithImmFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2205 void MacroAssembler::ShiftRightAccumulate(IntArithImmFn fn,
2206                                           const ZRegister& zd,
2207                                           const ZRegister& za,
2208                                           const ZRegister& zn,
2209                                           int shift) {
2210   VIXL_ASSERT(allow_macro_instructions_);
2211   if (!zd.Aliases(za) && zd.Aliases(zn)) {
2212     UseScratchRegisterScope temps(this);
2213     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
2214     Mov(ztmp, zn);
2215     {
2216       MovprfxHelperScope guard(this, zd, za);
2217       (this->*fn)(zd, ztmp, shift);
2218     }
2219   } else {
2220     MovprfxHelperScope guard(this, zd, za);
2221     (this->*fn)(zd, zn, shift);
2222   }
2223 }
2224 
Srsra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2225 void MacroAssembler::Srsra(const ZRegister& zd,
2226                            const ZRegister& za,
2227                            const ZRegister& zn,
2228                            int shift) {
2229   ShiftRightAccumulate(&Assembler::srsra, zd, za, zn, shift);
2230 }
2231 
Ssra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2232 void MacroAssembler::Ssra(const ZRegister& zd,
2233                           const ZRegister& za,
2234                           const ZRegister& zn,
2235                           int shift) {
2236   ShiftRightAccumulate(&Assembler::ssra, zd, za, zn, shift);
2237 }
2238 
Ursra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2239 void MacroAssembler::Ursra(const ZRegister& zd,
2240                            const ZRegister& za,
2241                            const ZRegister& zn,
2242                            int shift) {
2243   ShiftRightAccumulate(&Assembler::ursra, zd, za, zn, shift);
2244 }
2245 
Usra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2246 void MacroAssembler::Usra(const ZRegister& zd,
2247                           const ZRegister& za,
2248                           const ZRegister& zn,
2249                           int shift) {
2250   ShiftRightAccumulate(&Assembler::usra, zd, za, zn, shift);
2251 }
2252 
ComplexAddition(ZZZImmFn fn,const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int rot)2253 void MacroAssembler::ComplexAddition(ZZZImmFn fn,
2254                                      const ZRegister& zd,
2255                                      const ZRegister& zn,
2256                                      const ZRegister& zm,
2257                                      int rot) {
2258   VIXL_ASSERT(allow_macro_instructions_);
2259   if (!zd.Aliases(zn) && zd.Aliases(zm)) {
2260     UseScratchRegisterScope temps(this);
2261     ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zm);
2262     Mov(ztmp, zm);
2263     {
2264       MovprfxHelperScope guard(this, zd, zn);
2265       (this->*fn)(zd, zd, ztmp, rot);
2266     }
2267   } else {
2268     MovprfxHelperScope guard(this, zd, zn);
2269     (this->*fn)(zd, zd, zm, rot);
2270   }
2271 }
2272 
Cadd(const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int rot)2273 void MacroAssembler::Cadd(const ZRegister& zd,
2274                           const ZRegister& zn,
2275                           const ZRegister& zm,
2276                           int rot) {
2277   ComplexAddition(&Assembler::cadd, zd, zn, zm, rot);
2278 }
2279 
Sqcadd(const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int rot)2280 void MacroAssembler::Sqcadd(const ZRegister& zd,
2281                             const ZRegister& zn,
2282                             const ZRegister& zm,
2283                             int rot) {
2284   ComplexAddition(&Assembler::sqcadd, zd, zn, zm, rot);
2285 }
2286 
2287 }  // namespace aarch64
2288 }  // namespace vixl
2289