• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28 
29 #include <cmath>
30 
31 #include "simulator-aarch64.h"
32 
33 namespace vixl {
34 namespace aarch64 {
35 
36 using vixl::internal::SimFloat16;
37 
38 template <typename T>
IsFloat64()39 constexpr bool IsFloat64() {
40   return false;
41 }
42 template <>
IsFloat64()43 constexpr bool IsFloat64<double>() {
44   return true;
45 }
46 
47 template <typename T>
IsFloat32()48 constexpr bool IsFloat32() {
49   return false;
50 }
51 template <>
IsFloat32()52 constexpr bool IsFloat32<float>() {
53   return true;
54 }
55 
56 template <typename T>
IsFloat16()57 constexpr bool IsFloat16() {
58   return false;
59 }
60 template <>
IsFloat16()61 constexpr bool IsFloat16<Float16>() {
62   return true;
63 }
64 template <>
IsFloat16()65 constexpr bool IsFloat16<SimFloat16>() {
66   return true;
67 }
68 
69 template <>
FPDefaultNaN()70 double Simulator::FPDefaultNaN<double>() {
71   return kFP64DefaultNaN;
72 }
73 
74 
75 template <>
FPDefaultNaN()76 float Simulator::FPDefaultNaN<float>() {
77   return kFP32DefaultNaN;
78 }
79 
80 
81 template <>
FPDefaultNaN()82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83   return SimFloat16(kFP16DefaultNaN);
84 }
85 
86 
FixedToDouble(int64_t src,int fbits,FPRounding round)87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88   if (src >= 0) {
89     return UFixedToDouble(src, fbits, round);
90   } else if (src == INT64_MIN) {
91     return -UFixedToDouble(src, fbits, round);
92   } else {
93     return -UFixedToDouble(-src, fbits, round);
94   }
95 }
96 
97 
UFixedToDouble(uint64_t src,int fbits,FPRounding round)98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99   // An input of 0 is a special case because the result is effectively
100   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101   if (src == 0) {
102     return 0.0;
103   }
104 
105   // Calculate the exponent. The highest significant bit will have the value
106   // 2^exponent.
107   const int highest_significant_bit = 63 - CountLeadingZeros(src);
108   const int64_t exponent = highest_significant_bit - fbits;
109 
110   return FPRoundToDouble(0, exponent, src, round);
111 }
112 
113 
FixedToFloat(int64_t src,int fbits,FPRounding round)114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115   if (src >= 0) {
116     return UFixedToFloat(src, fbits, round);
117   } else if (src == INT64_MIN) {
118     return -UFixedToFloat(src, fbits, round);
119   } else {
120     return -UFixedToFloat(-src, fbits, round);
121   }
122 }
123 
124 
UFixedToFloat(uint64_t src,int fbits,FPRounding round)125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126   // An input of 0 is a special case because the result is effectively
127   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128   if (src == 0) {
129     return 0.0f;
130   }
131 
132   // Calculate the exponent. The highest significant bit will have the value
133   // 2^exponent.
134   const int highest_significant_bit = 63 - CountLeadingZeros(src);
135   const int32_t exponent = highest_significant_bit - fbits;
136 
137   return FPRoundToFloat(0, exponent, src, round);
138 }
139 
140 
FixedToFloat16(int64_t src,int fbits,FPRounding round)141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142   if (src >= 0) {
143     return UFixedToFloat16(src, fbits, round);
144   } else if (src == INT64_MIN) {
145     return -UFixedToFloat16(src, fbits, round);
146   } else {
147     return -UFixedToFloat16(-src, fbits, round);
148   }
149 }
150 
151 
UFixedToFloat16(uint64_t src,int fbits,FPRounding round)152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153                                       int fbits,
154                                       FPRounding round) {
155   // An input of 0 is a special case because the result is effectively
156   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157   if (src == 0) {
158     return 0.0f;
159   }
160 
161   // Calculate the exponent. The highest significant bit will have the value
162   // 2^exponent.
163   const int highest_significant_bit = 63 - CountLeadingZeros(src);
164   const int16_t exponent = highest_significant_bit - fbits;
165 
166   return FPRoundToFloat16(0, exponent, src, round);
167 }
168 
169 
GenerateRandomTag(uint16_t exclude)170 uint64_t Simulator::GenerateRandomTag(uint16_t exclude) {
171   // Generate a 4 bit integer from a 48bit random number
172   uint64_t rtag = rand_gen_() >> 44;
173   VIXL_ASSERT(IsUint4(rtag));
174 
175   if (exclude == 0) {
176     exclude = static_cast<uint16_t>(rand_gen_() >> 44);
177   }
178 
179   // TODO: implement this to better match the specification, which calls for a
180   // true random mode, and a pseudo-random mode with state (EL1.TAG) modified by
181   // PRNG.
182   return ChooseNonExcludedTag(rtag, 0, exclude);
183 }
184 
185 
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)186 bool Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
187   dst.ClearForWrite(vform);
188   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
189     if (!LoadLane(dst, vform, i, addr)) {
190       return false;
191     }
192     addr += LaneSizeInBytesFromFormat(vform);
193   }
194   return true;
195 }
196 
197 
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)198 bool Simulator::ld1(VectorFormat vform,
199                     LogicVRegister dst,
200                     int index,
201                     uint64_t addr) {
202   dst.ClearForWrite(vform);
203   return LoadLane(dst, vform, index, addr);
204 }
205 
206 
ld1r(VectorFormat vform,VectorFormat unpack_vform,LogicVRegister dst,uint64_t addr,bool is_signed)207 bool Simulator::ld1r(VectorFormat vform,
208                      VectorFormat unpack_vform,
209                      LogicVRegister dst,
210                      uint64_t addr,
211                      bool is_signed) {
212   unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
213   dst.ClearForWrite(vform);
214   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
215     if (is_signed) {
216       if (!LoadIntToLane(dst, vform, unpack_size, i, addr)) {
217         return false;
218       }
219     } else {
220       if (!LoadUintToLane(dst, vform, unpack_size, i, addr)) {
221         return false;
222       }
223     }
224   }
225   return true;
226 }
227 
228 
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)229 bool Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
230   return ld1r(vform, vform, dst, addr);
231 }
232 
233 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)234 bool Simulator::ld2(VectorFormat vform,
235                     LogicVRegister dst1,
236                     LogicVRegister dst2,
237                     uint64_t addr1) {
238   dst1.ClearForWrite(vform);
239   dst2.ClearForWrite(vform);
240   int esize = LaneSizeInBytesFromFormat(vform);
241   uint64_t addr2 = addr1 + esize;
242   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
243     if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2)) {
244       return false;
245     }
246     addr1 += 2 * esize;
247     addr2 += 2 * esize;
248   }
249   return true;
250 }
251 
252 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)253 bool Simulator::ld2(VectorFormat vform,
254                     LogicVRegister dst1,
255                     LogicVRegister dst2,
256                     int index,
257                     uint64_t addr1) {
258   dst1.ClearForWrite(vform);
259   dst2.ClearForWrite(vform);
260   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
261   return (LoadLane(dst1, vform, index, addr1) &&
262           LoadLane(dst2, vform, index, addr2));
263 }
264 
265 
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)266 bool Simulator::ld2r(VectorFormat vform,
267                      LogicVRegister dst1,
268                      LogicVRegister dst2,
269                      uint64_t addr) {
270   dst1.ClearForWrite(vform);
271   dst2.ClearForWrite(vform);
272   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
273   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
274     if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2)) {
275       return false;
276     }
277   }
278   return true;
279 }
280 
281 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)282 bool Simulator::ld3(VectorFormat vform,
283                     LogicVRegister dst1,
284                     LogicVRegister dst2,
285                     LogicVRegister dst3,
286                     uint64_t addr1) {
287   dst1.ClearForWrite(vform);
288   dst2.ClearForWrite(vform);
289   dst3.ClearForWrite(vform);
290   int esize = LaneSizeInBytesFromFormat(vform);
291   uint64_t addr2 = addr1 + esize;
292   uint64_t addr3 = addr2 + esize;
293   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
294     if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) ||
295         !LoadLane(dst3, vform, i, addr3)) {
296       return false;
297     }
298     addr1 += 3 * esize;
299     addr2 += 3 * esize;
300     addr3 += 3 * esize;
301   }
302   return true;
303 }
304 
305 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)306 bool Simulator::ld3(VectorFormat vform,
307                     LogicVRegister dst1,
308                     LogicVRegister dst2,
309                     LogicVRegister dst3,
310                     int index,
311                     uint64_t addr1) {
312   dst1.ClearForWrite(vform);
313   dst2.ClearForWrite(vform);
314   dst3.ClearForWrite(vform);
315   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
316   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
317   return (LoadLane(dst1, vform, index, addr1) &&
318           LoadLane(dst2, vform, index, addr2) &&
319           LoadLane(dst3, vform, index, addr3));
320 }
321 
322 
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)323 bool Simulator::ld3r(VectorFormat vform,
324                      LogicVRegister dst1,
325                      LogicVRegister dst2,
326                      LogicVRegister dst3,
327                      uint64_t addr) {
328   dst1.ClearForWrite(vform);
329   dst2.ClearForWrite(vform);
330   dst3.ClearForWrite(vform);
331   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
332   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
333   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
334     if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) ||
335         !LoadLane(dst3, vform, i, addr3)) {
336       return false;
337     }
338   }
339   return true;
340 }
341 
342 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)343 bool Simulator::ld4(VectorFormat vform,
344                     LogicVRegister dst1,
345                     LogicVRegister dst2,
346                     LogicVRegister dst3,
347                     LogicVRegister dst4,
348                     uint64_t addr1) {
349   dst1.ClearForWrite(vform);
350   dst2.ClearForWrite(vform);
351   dst3.ClearForWrite(vform);
352   dst4.ClearForWrite(vform);
353   int esize = LaneSizeInBytesFromFormat(vform);
354   uint64_t addr2 = addr1 + esize;
355   uint64_t addr3 = addr2 + esize;
356   uint64_t addr4 = addr3 + esize;
357   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
358     if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) ||
359         !LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) {
360       return false;
361     }
362     addr1 += 4 * esize;
363     addr2 += 4 * esize;
364     addr3 += 4 * esize;
365     addr4 += 4 * esize;
366   }
367   return true;
368 }
369 
370 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)371 bool Simulator::ld4(VectorFormat vform,
372                     LogicVRegister dst1,
373                     LogicVRegister dst2,
374                     LogicVRegister dst3,
375                     LogicVRegister dst4,
376                     int index,
377                     uint64_t addr1) {
378   dst1.ClearForWrite(vform);
379   dst2.ClearForWrite(vform);
380   dst3.ClearForWrite(vform);
381   dst4.ClearForWrite(vform);
382   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
383   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
384   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
385   return (LoadLane(dst1, vform, index, addr1) &&
386           LoadLane(dst2, vform, index, addr2) &&
387           LoadLane(dst3, vform, index, addr3) &&
388           LoadLane(dst4, vform, index, addr4));
389 }
390 
391 
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)392 bool Simulator::ld4r(VectorFormat vform,
393                      LogicVRegister dst1,
394                      LogicVRegister dst2,
395                      LogicVRegister dst3,
396                      LogicVRegister dst4,
397                      uint64_t addr) {
398   dst1.ClearForWrite(vform);
399   dst2.ClearForWrite(vform);
400   dst3.ClearForWrite(vform);
401   dst4.ClearForWrite(vform);
402   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
403   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
404   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
405   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
406     if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) ||
407         !LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) {
408       return false;
409     }
410   }
411   return true;
412 }
413 
414 
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)415 bool Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
416   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
417     if (!StoreLane(src, vform, i, addr)) return false;
418     addr += LaneSizeInBytesFromFormat(vform);
419   }
420   return true;
421 }
422 
423 
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)424 bool Simulator::st1(VectorFormat vform,
425                     LogicVRegister src,
426                     int index,
427                     uint64_t addr) {
428   return StoreLane(src, vform, index, addr);
429 }
430 
431 
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,uint64_t addr)432 bool Simulator::st2(VectorFormat vform,
433                     LogicVRegister src,
434                     LogicVRegister src2,
435                     uint64_t addr) {
436   int esize = LaneSizeInBytesFromFormat(vform);
437   uint64_t addr2 = addr + esize;
438   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
439     if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2)) {
440       return false;
441     }
442     addr += 2 * esize;
443     addr2 += 2 * esize;
444   }
445   return true;
446 }
447 
448 
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,int index,uint64_t addr)449 bool Simulator::st2(VectorFormat vform,
450                     LogicVRegister src,
451                     LogicVRegister src2,
452                     int index,
453                     uint64_t addr) {
454   int esize = LaneSizeInBytesFromFormat(vform);
455   return (StoreLane(src, vform, index, addr) &&
456           StoreLane(src2, vform, index, addr + 1 * esize));
457 }
458 
459 
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,uint64_t addr)460 bool Simulator::st3(VectorFormat vform,
461                     LogicVRegister src,
462                     LogicVRegister src2,
463                     LogicVRegister src3,
464                     uint64_t addr) {
465   int esize = LaneSizeInBytesFromFormat(vform);
466   uint64_t addr2 = addr + esize;
467   uint64_t addr3 = addr2 + esize;
468   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
469     if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||
470         !StoreLane(src3, vform, i, addr3)) {
471       return false;
472     }
473     addr += 3 * esize;
474     addr2 += 3 * esize;
475     addr3 += 3 * esize;
476   }
477   return true;
478 }
479 
480 
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,int index,uint64_t addr)481 bool Simulator::st3(VectorFormat vform,
482                     LogicVRegister src,
483                     LogicVRegister src2,
484                     LogicVRegister src3,
485                     int index,
486                     uint64_t addr) {
487   int esize = LaneSizeInBytesFromFormat(vform);
488   return (StoreLane(src, vform, index, addr) &&
489           StoreLane(src2, vform, index, addr + 1 * esize) &&
490           StoreLane(src3, vform, index, addr + 2 * esize));
491 }
492 
493 
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,uint64_t addr)494 bool Simulator::st4(VectorFormat vform,
495                     LogicVRegister src,
496                     LogicVRegister src2,
497                     LogicVRegister src3,
498                     LogicVRegister src4,
499                     uint64_t addr) {
500   int esize = LaneSizeInBytesFromFormat(vform);
501   uint64_t addr2 = addr + esize;
502   uint64_t addr3 = addr2 + esize;
503   uint64_t addr4 = addr3 + esize;
504   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
505     if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||
506         !StoreLane(src3, vform, i, addr3) ||
507         !StoreLane(src4, vform, i, addr4)) {
508       return false;
509     }
510     addr += 4 * esize;
511     addr2 += 4 * esize;
512     addr3 += 4 * esize;
513     addr4 += 4 * esize;
514   }
515   return true;
516 }
517 
518 
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,int index,uint64_t addr)519 bool Simulator::st4(VectorFormat vform,
520                     LogicVRegister src,
521                     LogicVRegister src2,
522                     LogicVRegister src3,
523                     LogicVRegister src4,
524                     int index,
525                     uint64_t addr) {
526   int esize = LaneSizeInBytesFromFormat(vform);
527   return (StoreLane(src, vform, index, addr) &&
528           StoreLane(src2, vform, index, addr + 1 * esize) &&
529           StoreLane(src3, vform, index, addr + 2 * esize) &&
530           StoreLane(src4, vform, index, addr + 3 * esize));
531 }
532 
533 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)534 LogicVRegister Simulator::cmp(VectorFormat vform,
535                               LogicVRegister dst,
536                               const LogicVRegister& src1,
537                               const LogicVRegister& src2,
538                               Condition cond) {
539   dst.ClearForWrite(vform);
540   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
541     int64_t sa = src1.Int(vform, i);
542     int64_t sb = src2.Int(vform, i);
543     uint64_t ua = src1.Uint(vform, i);
544     uint64_t ub = src2.Uint(vform, i);
545     bool result = false;
546     switch (cond) {
547       case eq:
548         result = (ua == ub);
549         break;
550       case ge:
551         result = (sa >= sb);
552         break;
553       case gt:
554         result = (sa > sb);
555         break;
556       case hi:
557         result = (ua > ub);
558         break;
559       case hs:
560         result = (ua >= ub);
561         break;
562       case lt:
563         result = (sa < sb);
564         break;
565       case le:
566         result = (sa <= sb);
567         break;
568       default:
569         VIXL_UNREACHABLE();
570         break;
571     }
572     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
573   }
574   return dst;
575 }
576 
577 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)578 LogicVRegister Simulator::cmp(VectorFormat vform,
579                               LogicVRegister dst,
580                               const LogicVRegister& src1,
581                               int imm,
582                               Condition cond) {
583   SimVRegister temp;
584   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
585   return cmp(vform, dst, src1, imm_reg, cond);
586 }
587 
588 
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)589 LogicVRegister Simulator::cmptst(VectorFormat vform,
590                                  LogicVRegister dst,
591                                  const LogicVRegister& src1,
592                                  const LogicVRegister& src2) {
593   dst.ClearForWrite(vform);
594   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
595     uint64_t ua = src1.Uint(vform, i);
596     uint64_t ub = src2.Uint(vform, i);
597     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
598   }
599   return dst;
600 }
601 
602 
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)603 LogicVRegister Simulator::add(VectorFormat vform,
604                               LogicVRegister dst,
605                               const LogicVRegister& src1,
606                               const LogicVRegister& src2) {
607   int lane_size = LaneSizeInBitsFromFormat(vform);
608   dst.ClearForWrite(vform);
609 
610   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
611     // Test for unsigned saturation.
612     uint64_t ua = src1.UintLeftJustified(vform, i);
613     uint64_t ub = src2.UintLeftJustified(vform, i);
614     uint64_t ur = ua + ub;
615     if (ur < ua) {
616       dst.SetUnsignedSat(i, true);
617     }
618 
619     // Test for signed saturation.
620     bool pos_a = (ua >> 63) == 0;
621     bool pos_b = (ub >> 63) == 0;
622     bool pos_r = (ur >> 63) == 0;
623     // If the signs of the operands are the same, but different from the result,
624     // there was an overflow.
625     if ((pos_a == pos_b) && (pos_a != pos_r)) {
626       dst.SetSignedSat(i, pos_a);
627     }
628     dst.SetInt(vform, i, ur >> (64 - lane_size));
629   }
630   return dst;
631 }
632 
add_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)633 LogicVRegister Simulator::add_uint(VectorFormat vform,
634                                    LogicVRegister dst,
635                                    const LogicVRegister& src1,
636                                    uint64_t value) {
637   int lane_size = LaneSizeInBitsFromFormat(vform);
638   VIXL_ASSERT(IsUintN(lane_size, value));
639   dst.ClearForWrite(vform);
640   // Left-justify `value`.
641   uint64_t ub = value << (64 - lane_size);
642   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
643     // Test for unsigned saturation.
644     uint64_t ua = src1.UintLeftJustified(vform, i);
645     uint64_t ur = ua + ub;
646     if (ur < ua) {
647       dst.SetUnsignedSat(i, true);
648     }
649 
650     // Test for signed saturation.
651     // `value` is always positive, so we have an overflow if the (signed) result
652     // is smaller than the first operand.
653     if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
654       dst.SetSignedSat(i, true);
655     }
656 
657     dst.SetInt(vform, i, ur >> (64 - lane_size));
658   }
659   return dst;
660 }
661 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)662 LogicVRegister Simulator::addp(VectorFormat vform,
663                                LogicVRegister dst,
664                                const LogicVRegister& src1,
665                                const LogicVRegister& src2) {
666   SimVRegister temp1, temp2;
667   uzp1(vform, temp1, src1, src2);
668   uzp2(vform, temp2, src1, src2);
669   add(vform, dst, temp1, temp2);
670   if (IsSVEFormat(vform)) {
671     interleave_top_bottom(vform, dst, dst);
672   }
673   return dst;
674 }
675 
sdiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)676 LogicVRegister Simulator::sdiv(VectorFormat vform,
677                                LogicVRegister dst,
678                                const LogicVRegister& src1,
679                                const LogicVRegister& src2) {
680   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
681 
682   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
683     int64_t val1 = src1.Int(vform, i);
684     int64_t val2 = src2.Int(vform, i);
685     int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
686     int64_t quotient = 0;
687     if ((val1 == min_int) && (val2 == -1)) {
688       quotient = min_int;
689     } else if (val2 != 0) {
690       quotient = val1 / val2;
691     }
692     dst.SetInt(vform, i, quotient);
693   }
694 
695   return dst;
696 }
697 
udiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)698 LogicVRegister Simulator::udiv(VectorFormat vform,
699                                LogicVRegister dst,
700                                const LogicVRegister& src1,
701                                const LogicVRegister& src2) {
702   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
703 
704   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
705     uint64_t val1 = src1.Uint(vform, i);
706     uint64_t val2 = src2.Uint(vform, i);
707     uint64_t quotient = 0;
708     if (val2 != 0) {
709       quotient = val1 / val2;
710     }
711     dst.SetUint(vform, i, quotient);
712   }
713 
714   return dst;
715 }
716 
717 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)718 LogicVRegister Simulator::mla(VectorFormat vform,
719                               LogicVRegister dst,
720                               const LogicVRegister& srca,
721                               const LogicVRegister& src1,
722                               const LogicVRegister& src2) {
723   SimVRegister temp;
724   mul(vform, temp, src1, src2);
725   add(vform, dst, srca, temp);
726   return dst;
727 }
728 
729 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)730 LogicVRegister Simulator::mls(VectorFormat vform,
731                               LogicVRegister dst,
732                               const LogicVRegister& srca,
733                               const LogicVRegister& src1,
734                               const LogicVRegister& src2) {
735   SimVRegister temp;
736   mul(vform, temp, src1, src2);
737   sub(vform, dst, srca, temp);
738   return dst;
739 }
740 
741 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)742 LogicVRegister Simulator::mul(VectorFormat vform,
743                               LogicVRegister dst,
744                               const LogicVRegister& src1,
745                               const LogicVRegister& src2) {
746   dst.ClearForWrite(vform);
747 
748   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
749     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
750   }
751   return dst;
752 }
753 
754 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)755 LogicVRegister Simulator::mul(VectorFormat vform,
756                               LogicVRegister dst,
757                               const LogicVRegister& src1,
758                               const LogicVRegister& src2,
759                               int index) {
760   SimVRegister temp;
761   VectorFormat indexform = VectorFormatFillQ(vform);
762   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
763 }
764 
765 
smulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)766 LogicVRegister Simulator::smulh(VectorFormat vform,
767                                 LogicVRegister dst,
768                                 const LogicVRegister& src1,
769                                 const LogicVRegister& src2) {
770   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
771     int64_t dst_val = 0xbadbeef;
772     int64_t val1 = src1.Int(vform, i);
773     int64_t val2 = src2.Int(vform, i);
774     switch (LaneSizeInBitsFromFormat(vform)) {
775       case 8:
776         dst_val = internal::MultiplyHigh<8>(val1, val2);
777         break;
778       case 16:
779         dst_val = internal::MultiplyHigh<16>(val1, val2);
780         break;
781       case 32:
782         dst_val = internal::MultiplyHigh<32>(val1, val2);
783         break;
784       case 64:
785         dst_val = internal::MultiplyHigh<64>(val1, val2);
786         break;
787       default:
788         VIXL_UNREACHABLE();
789         break;
790     }
791     dst.SetInt(vform, i, dst_val);
792   }
793   return dst;
794 }
795 
796 
umulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)797 LogicVRegister Simulator::umulh(VectorFormat vform,
798                                 LogicVRegister dst,
799                                 const LogicVRegister& src1,
800                                 const LogicVRegister& src2) {
801   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
802     uint64_t dst_val = 0xbadbeef;
803     uint64_t val1 = src1.Uint(vform, i);
804     uint64_t val2 = src2.Uint(vform, i);
805     switch (LaneSizeInBitsFromFormat(vform)) {
806       case 8:
807         dst_val = internal::MultiplyHigh<8>(val1, val2);
808         break;
809       case 16:
810         dst_val = internal::MultiplyHigh<16>(val1, val2);
811         break;
812       case 32:
813         dst_val = internal::MultiplyHigh<32>(val1, val2);
814         break;
815       case 64:
816         dst_val = internal::MultiplyHigh<64>(val1, val2);
817         break;
818       default:
819         VIXL_UNREACHABLE();
820         break;
821     }
822     dst.SetUint(vform, i, dst_val);
823   }
824   return dst;
825 }
826 
827 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)828 LogicVRegister Simulator::mla(VectorFormat vform,
829                               LogicVRegister dst,
830                               const LogicVRegister& src1,
831                               const LogicVRegister& src2,
832                               int index) {
833   SimVRegister temp;
834   VectorFormat indexform = VectorFormatFillQ(vform);
835   return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
836 }
837 
838 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)839 LogicVRegister Simulator::mls(VectorFormat vform,
840                               LogicVRegister dst,
841                               const LogicVRegister& src1,
842                               const LogicVRegister& src2,
843                               int index) {
844   SimVRegister temp;
845   VectorFormat indexform = VectorFormatFillQ(vform);
846   return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
847 }
848 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)849 LogicVRegister Simulator::sqdmull(VectorFormat vform,
850                                   LogicVRegister dst,
851                                   const LogicVRegister& src1,
852                                   const LogicVRegister& src2,
853                                   int index) {
854   SimVRegister temp;
855   VectorFormat indexform =
856       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
857   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
858 }
859 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)860 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
861                                   LogicVRegister dst,
862                                   const LogicVRegister& src1,
863                                   const LogicVRegister& src2,
864                                   int index) {
865   SimVRegister temp;
866   VectorFormat indexform =
867       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
868   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
869 }
870 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)871 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
872                                   LogicVRegister dst,
873                                   const LogicVRegister& src1,
874                                   const LogicVRegister& src2,
875                                   int index) {
876   SimVRegister temp;
877   VectorFormat indexform =
878       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
879   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
880 }
881 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)882 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
883                                   LogicVRegister dst,
884                                   const LogicVRegister& src1,
885                                   const LogicVRegister& src2,
886                                   int index) {
887   SimVRegister temp;
888   VectorFormat indexform = VectorFormatFillQ(vform);
889   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
890 }
891 
892 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)893 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
894                                    LogicVRegister dst,
895                                    const LogicVRegister& src1,
896                                    const LogicVRegister& src2,
897                                    int index) {
898   SimVRegister temp;
899   VectorFormat indexform = VectorFormatFillQ(vform);
900   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
901 }
902 
903 
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)904 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
905                                    LogicVRegister dst,
906                                    const LogicVRegister& src1,
907                                    const LogicVRegister& src2,
908                                    int index) {
909   SimVRegister temp;
910   VectorFormat indexform = VectorFormatFillQ(vform);
911   return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
912 }
913 
914 
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)915 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
916                                    LogicVRegister dst,
917                                    const LogicVRegister& src1,
918                                    const LogicVRegister& src2,
919                                    int index) {
920   SimVRegister temp;
921   VectorFormat indexform = VectorFormatFillQ(vform);
922   return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
923 }
924 
PolynomialMult(uint64_t op1,uint64_t op2,int lane_size_in_bits) const925 uint64_t Simulator::PolynomialMult(uint64_t op1,
926                                    uint64_t op2,
927                                    int lane_size_in_bits) const {
928   return PolynomialMult128(op1, op2, lane_size_in_bits).second;
929 }
930 
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)931 LogicVRegister Simulator::pmul(VectorFormat vform,
932                                LogicVRegister dst,
933                                const LogicVRegister& src1,
934                                const LogicVRegister& src2) {
935   dst.ClearForWrite(vform);
936   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
937     dst.SetUint(vform,
938                 i,
939                 PolynomialMult(src1.Uint(vform, i),
940                                src2.Uint(vform, i),
941                                LaneSizeInBitsFromFormat(vform)));
942   }
943   return dst;
944 }
945 
946 
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)947 LogicVRegister Simulator::pmull(VectorFormat vform,
948                                 LogicVRegister dst,
949                                 const LogicVRegister& src1,
950                                 const LogicVRegister& src2) {
951   dst.ClearForWrite(vform);
952   VectorFormat vform_src = VectorFormatHalfWidth(vform);
953 
954   // Process the elements in reverse to avoid problems when the destination
955   // register is the same as a source.
956   for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
957     dst.SetUint(vform,
958                 i,
959                 PolynomialMult128(src1.Uint(vform_src, i),
960                                   src2.Uint(vform_src, i),
961                                   LaneSizeInBitsFromFormat(vform_src)));
962   }
963 
964   return dst;
965 }
966 
967 
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)968 LogicVRegister Simulator::pmull2(VectorFormat vform,
969                                  LogicVRegister dst,
970                                  const LogicVRegister& src1,
971                                  const LogicVRegister& src2) {
972   dst.ClearForWrite(vform);
973   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
974 
975   int lane_count = LaneCountFromFormat(vform);
976   for (int i = 0; i < lane_count; i++) {
977     dst.SetUint(vform,
978                 i,
979                 PolynomialMult128(src1.Uint(vform_src, lane_count + i),
980                                   src2.Uint(vform_src, lane_count + i),
981                                   LaneSizeInBitsFromFormat(vform_src)));
982   }
983 
984   return dst;
985 }
986 
987 
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)988 LogicVRegister Simulator::sub(VectorFormat vform,
989                               LogicVRegister dst,
990                               const LogicVRegister& src1,
991                               const LogicVRegister& src2) {
992   int lane_size = LaneSizeInBitsFromFormat(vform);
993   dst.ClearForWrite(vform);
994   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
995     // Test for unsigned saturation.
996     uint64_t ua = src1.UintLeftJustified(vform, i);
997     uint64_t ub = src2.UintLeftJustified(vform, i);
998     uint64_t ur = ua - ub;
999     if (ub > ua) {
1000       dst.SetUnsignedSat(i, false);
1001     }
1002 
1003     // Test for signed saturation.
1004     bool pos_a = (ua >> 63) == 0;
1005     bool pos_b = (ub >> 63) == 0;
1006     bool pos_r = (ur >> 63) == 0;
1007     // If the signs of the operands are different, and the sign of the first
1008     // operand doesn't match the result, there was an overflow.
1009     if ((pos_a != pos_b) && (pos_a != pos_r)) {
1010       dst.SetSignedSat(i, pos_a);
1011     }
1012 
1013     dst.SetInt(vform, i, ur >> (64 - lane_size));
1014   }
1015   return dst;
1016 }
1017 
sub_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)1018 LogicVRegister Simulator::sub_uint(VectorFormat vform,
1019                                    LogicVRegister dst,
1020                                    const LogicVRegister& src1,
1021                                    uint64_t value) {
1022   int lane_size = LaneSizeInBitsFromFormat(vform);
1023   VIXL_ASSERT(IsUintN(lane_size, value));
1024   dst.ClearForWrite(vform);
1025   // Left-justify `value`.
1026   uint64_t ub = value << (64 - lane_size);
1027   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1028     // Test for unsigned saturation.
1029     uint64_t ua = src1.UintLeftJustified(vform, i);
1030     uint64_t ur = ua - ub;
1031     if (ub > ua) {
1032       dst.SetUnsignedSat(i, false);
1033     }
1034 
1035     // Test for signed saturation.
1036     // `value` is always positive, so we have an overflow if the (signed) result
1037     // is greater than the first operand.
1038     if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1039       dst.SetSignedSat(i, false);
1040     }
1041 
1042     dst.SetInt(vform, i, ur >> (64 - lane_size));
1043   }
1044   return dst;
1045 }
1046 
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1047 LogicVRegister Simulator::and_(VectorFormat vform,
1048                                LogicVRegister dst,
1049                                const LogicVRegister& src1,
1050                                const LogicVRegister& src2) {
1051   dst.ClearForWrite(vform);
1052   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1053     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1054   }
1055   return dst;
1056 }
1057 
1058 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1059 LogicVRegister Simulator::orr(VectorFormat vform,
1060                               LogicVRegister dst,
1061                               const LogicVRegister& src1,
1062                               const LogicVRegister& src2) {
1063   dst.ClearForWrite(vform);
1064   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1065     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1066   }
1067   return dst;
1068 }
1069 
1070 
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1071 LogicVRegister Simulator::orn(VectorFormat vform,
1072                               LogicVRegister dst,
1073                               const LogicVRegister& src1,
1074                               const LogicVRegister& src2) {
1075   dst.ClearForWrite(vform);
1076   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1077     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1078   }
1079   return dst;
1080 }
1081 
1082 
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1083 LogicVRegister Simulator::eor(VectorFormat vform,
1084                               LogicVRegister dst,
1085                               const LogicVRegister& src1,
1086                               const LogicVRegister& src2) {
1087   dst.ClearForWrite(vform);
1088   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1089     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1090   }
1091   return dst;
1092 }
1093 
1094 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1095 LogicVRegister Simulator::bic(VectorFormat vform,
1096                               LogicVRegister dst,
1097                               const LogicVRegister& src1,
1098                               const LogicVRegister& src2) {
1099   dst.ClearForWrite(vform);
1100   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1101     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1102   }
1103   return dst;
1104 }
1105 
1106 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1107 LogicVRegister Simulator::bic(VectorFormat vform,
1108                               LogicVRegister dst,
1109                               const LogicVRegister& src,
1110                               uint64_t imm) {
1111   uint64_t result[16];
1112   int lane_count = LaneCountFromFormat(vform);
1113   for (int i = 0; i < lane_count; ++i) {
1114     result[i] = src.Uint(vform, i) & ~imm;
1115   }
1116   dst.ClearForWrite(vform);
1117   for (int i = 0; i < lane_count; ++i) {
1118     dst.SetUint(vform, i, result[i]);
1119   }
1120   return dst;
1121 }
1122 
1123 
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1124 LogicVRegister Simulator::bif(VectorFormat vform,
1125                               LogicVRegister dst,
1126                               const LogicVRegister& src1,
1127                               const LogicVRegister& src2) {
1128   dst.ClearForWrite(vform);
1129   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1130     uint64_t operand1 = dst.Uint(vform, i);
1131     uint64_t operand2 = ~src2.Uint(vform, i);
1132     uint64_t operand3 = src1.Uint(vform, i);
1133     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1134     dst.SetUint(vform, i, result);
1135   }
1136   return dst;
1137 }
1138 
1139 
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1140 LogicVRegister Simulator::bit(VectorFormat vform,
1141                               LogicVRegister dst,
1142                               const LogicVRegister& src1,
1143                               const LogicVRegister& src2) {
1144   dst.ClearForWrite(vform);
1145   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1146     uint64_t operand1 = dst.Uint(vform, i);
1147     uint64_t operand2 = src2.Uint(vform, i);
1148     uint64_t operand3 = src1.Uint(vform, i);
1149     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1150     dst.SetUint(vform, i, result);
1151   }
1152   return dst;
1153 }
1154 
1155 
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src_mask,const LogicVRegister & src1,const LogicVRegister & src2)1156 LogicVRegister Simulator::bsl(VectorFormat vform,
1157                               LogicVRegister dst,
1158                               const LogicVRegister& src_mask,
1159                               const LogicVRegister& src1,
1160                               const LogicVRegister& src2) {
1161   dst.ClearForWrite(vform);
1162   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1163     uint64_t operand1 = src2.Uint(vform, i);
1164     uint64_t operand2 = src_mask.Uint(vform, i);
1165     uint64_t operand3 = src1.Uint(vform, i);
1166     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1167     dst.SetUint(vform, i, result);
1168   }
1169   return dst;
1170 }
1171 
1172 
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1173 LogicVRegister Simulator::sminmax(VectorFormat vform,
1174                                   LogicVRegister dst,
1175                                   const LogicVRegister& src1,
1176                                   const LogicVRegister& src2,
1177                                   bool max) {
1178   dst.ClearForWrite(vform);
1179   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1180     int64_t src1_val = src1.Int(vform, i);
1181     int64_t src2_val = src2.Int(vform, i);
1182     int64_t dst_val;
1183     if (max) {
1184       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1185     } else {
1186       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1187     }
1188     dst.SetInt(vform, i, dst_val);
1189   }
1190   return dst;
1191 }
1192 
1193 
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1194 LogicVRegister Simulator::smax(VectorFormat vform,
1195                                LogicVRegister dst,
1196                                const LogicVRegister& src1,
1197                                const LogicVRegister& src2) {
1198   return sminmax(vform, dst, src1, src2, true);
1199 }
1200 
1201 
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1202 LogicVRegister Simulator::smin(VectorFormat vform,
1203                                LogicVRegister dst,
1204                                const LogicVRegister& src1,
1205                                const LogicVRegister& src2) {
1206   return sminmax(vform, dst, src1, src2, false);
1207 }
1208 
1209 
sminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1210 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1211                                    LogicVRegister dst,
1212                                    const LogicVRegister& src1,
1213                                    const LogicVRegister& src2,
1214                                    bool max) {
1215   unsigned lanes = LaneCountFromFormat(vform);
1216   int64_t result[kZRegMaxSizeInBytes];
1217   const LogicVRegister* src = &src1;
1218   for (unsigned j = 0; j < 2; j++) {
1219     for (unsigned i = 0; i < lanes; i += 2) {
1220       int64_t first_val = src->Int(vform, i);
1221       int64_t second_val = src->Int(vform, i + 1);
1222       int64_t dst_val;
1223       if (max) {
1224         dst_val = (first_val > second_val) ? first_val : second_val;
1225       } else {
1226         dst_val = (first_val < second_val) ? first_val : second_val;
1227       }
1228       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1229       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1230     }
1231     src = &src2;
1232   }
1233   dst.SetIntArray(vform, result);
1234   if (IsSVEFormat(vform)) {
1235     interleave_top_bottom(vform, dst, dst);
1236   }
1237   return dst;
1238 }
1239 
1240 
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1241 LogicVRegister Simulator::smaxp(VectorFormat vform,
1242                                 LogicVRegister dst,
1243                                 const LogicVRegister& src1,
1244                                 const LogicVRegister& src2) {
1245   return sminmaxp(vform, dst, src1, src2, true);
1246 }
1247 
1248 
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1249 LogicVRegister Simulator::sminp(VectorFormat vform,
1250                                 LogicVRegister dst,
1251                                 const LogicVRegister& src1,
1252                                 const LogicVRegister& src2) {
1253   return sminmaxp(vform, dst, src1, src2, false);
1254 }
1255 
1256 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1257 LogicVRegister Simulator::addp(VectorFormat vform,
1258                                LogicVRegister dst,
1259                                const LogicVRegister& src) {
1260   VIXL_ASSERT(vform == kFormatD);
1261 
1262   uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1263   dst.ClearForWrite(vform);
1264   dst.SetUint(vform, 0, dst_val);
1265   return dst;
1266 }
1267 
1268 
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1269 LogicVRegister Simulator::addv(VectorFormat vform,
1270                                LogicVRegister dst,
1271                                const LogicVRegister& src) {
1272   VectorFormat vform_dst =
1273       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1274 
1275 
1276   int64_t dst_val = 0;
1277   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1278     dst_val += src.Int(vform, i);
1279   }
1280 
1281   dst.ClearForWrite(vform_dst);
1282   dst.SetInt(vform_dst, 0, dst_val);
1283   return dst;
1284 }
1285 
1286 
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1287 LogicVRegister Simulator::saddlv(VectorFormat vform,
1288                                  LogicVRegister dst,
1289                                  const LogicVRegister& src) {
1290   VectorFormat vform_dst =
1291       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1292 
1293   int64_t dst_val = 0;
1294   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1295     dst_val += src.Int(vform, i);
1296   }
1297 
1298   dst.ClearForWrite(vform_dst);
1299   dst.SetInt(vform_dst, 0, dst_val);
1300   return dst;
1301 }
1302 
1303 
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1304 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1305                                  LogicVRegister dst,
1306                                  const LogicVRegister& src) {
1307   VectorFormat vform_dst =
1308       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1309 
1310   uint64_t dst_val = 0;
1311   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1312     dst_val += src.Uint(vform, i);
1313   }
1314 
1315   dst.ClearForWrite(vform_dst);
1316   dst.SetUint(vform_dst, 0, dst_val);
1317   return dst;
1318 }
1319 
1320 
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1321 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1322                                    LogicVRegister dst,
1323                                    const LogicPRegister& pg,
1324                                    const LogicVRegister& src,
1325                                    bool max) {
1326   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1327   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1328     if (!pg.IsActive(vform, i)) continue;
1329 
1330     int64_t src_val = src.Int(vform, i);
1331     if (max) {
1332       dst_val = (src_val > dst_val) ? src_val : dst_val;
1333     } else {
1334       dst_val = (src_val < dst_val) ? src_val : dst_val;
1335     }
1336   }
1337   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1338   dst.SetInt(vform, 0, dst_val);
1339   return dst;
1340 }
1341 
1342 
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1343 LogicVRegister Simulator::smaxv(VectorFormat vform,
1344                                 LogicVRegister dst,
1345                                 const LogicVRegister& src) {
1346   sminmaxv(vform, dst, GetPTrue(), src, true);
1347   return dst;
1348 }
1349 
1350 
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1351 LogicVRegister Simulator::sminv(VectorFormat vform,
1352                                 LogicVRegister dst,
1353                                 const LogicVRegister& src) {
1354   sminmaxv(vform, dst, GetPTrue(), src, false);
1355   return dst;
1356 }
1357 
1358 
smaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1359 LogicVRegister Simulator::smaxv(VectorFormat vform,
1360                                 LogicVRegister dst,
1361                                 const LogicPRegister& pg,
1362                                 const LogicVRegister& src) {
1363   VIXL_ASSERT(IsSVEFormat(vform));
1364   sminmaxv(vform, dst, pg, src, true);
1365   return dst;
1366 }
1367 
1368 
sminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1369 LogicVRegister Simulator::sminv(VectorFormat vform,
1370                                 LogicVRegister dst,
1371                                 const LogicPRegister& pg,
1372                                 const LogicVRegister& src) {
1373   VIXL_ASSERT(IsSVEFormat(vform));
1374   sminmaxv(vform, dst, pg, src, false);
1375   return dst;
1376 }
1377 
1378 
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1379 LogicVRegister Simulator::uminmax(VectorFormat vform,
1380                                   LogicVRegister dst,
1381                                   const LogicVRegister& src1,
1382                                   const LogicVRegister& src2,
1383                                   bool max) {
1384   dst.ClearForWrite(vform);
1385   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1386     uint64_t src1_val = src1.Uint(vform, i);
1387     uint64_t src2_val = src2.Uint(vform, i);
1388     uint64_t dst_val;
1389     if (max) {
1390       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1391     } else {
1392       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1393     }
1394     dst.SetUint(vform, i, dst_val);
1395   }
1396   return dst;
1397 }
1398 
1399 
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1400 LogicVRegister Simulator::umax(VectorFormat vform,
1401                                LogicVRegister dst,
1402                                const LogicVRegister& src1,
1403                                const LogicVRegister& src2) {
1404   return uminmax(vform, dst, src1, src2, true);
1405 }
1406 
1407 
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1408 LogicVRegister Simulator::umin(VectorFormat vform,
1409                                LogicVRegister dst,
1410                                const LogicVRegister& src1,
1411                                const LogicVRegister& src2) {
1412   return uminmax(vform, dst, src1, src2, false);
1413 }
1414 
1415 
uminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1416 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1417                                    LogicVRegister dst,
1418                                    const LogicVRegister& src1,
1419                                    const LogicVRegister& src2,
1420                                    bool max) {
1421   unsigned lanes = LaneCountFromFormat(vform);
1422   uint64_t result[kZRegMaxSizeInBytes];
1423   const LogicVRegister* src = &src1;
1424   for (unsigned j = 0; j < 2; j++) {
1425     for (unsigned i = 0; i < lanes; i += 2) {
1426       uint64_t first_val = src->Uint(vform, i);
1427       uint64_t second_val = src->Uint(vform, i + 1);
1428       uint64_t dst_val;
1429       if (max) {
1430         dst_val = (first_val > second_val) ? first_val : second_val;
1431       } else {
1432         dst_val = (first_val < second_val) ? first_val : second_val;
1433       }
1434       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1435       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1436     }
1437     src = &src2;
1438   }
1439   dst.SetUintArray(vform, result);
1440   if (IsSVEFormat(vform)) {
1441     interleave_top_bottom(vform, dst, dst);
1442   }
1443   return dst;
1444 }
1445 
1446 
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1447 LogicVRegister Simulator::umaxp(VectorFormat vform,
1448                                 LogicVRegister dst,
1449                                 const LogicVRegister& src1,
1450                                 const LogicVRegister& src2) {
1451   return uminmaxp(vform, dst, src1, src2, true);
1452 }
1453 
1454 
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1455 LogicVRegister Simulator::uminp(VectorFormat vform,
1456                                 LogicVRegister dst,
1457                                 const LogicVRegister& src1,
1458                                 const LogicVRegister& src2) {
1459   return uminmaxp(vform, dst, src1, src2, false);
1460 }
1461 
1462 
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1463 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1464                                    LogicVRegister dst,
1465                                    const LogicPRegister& pg,
1466                                    const LogicVRegister& src,
1467                                    bool max) {
1468   uint64_t dst_val = max ? 0 : UINT64_MAX;
1469   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1470     if (!pg.IsActive(vform, i)) continue;
1471 
1472     uint64_t src_val = src.Uint(vform, i);
1473     if (max) {
1474       dst_val = (src_val > dst_val) ? src_val : dst_val;
1475     } else {
1476       dst_val = (src_val < dst_val) ? src_val : dst_val;
1477     }
1478   }
1479   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1480   dst.SetUint(vform, 0, dst_val);
1481   return dst;
1482 }
1483 
1484 
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1485 LogicVRegister Simulator::umaxv(VectorFormat vform,
1486                                 LogicVRegister dst,
1487                                 const LogicVRegister& src) {
1488   uminmaxv(vform, dst, GetPTrue(), src, true);
1489   return dst;
1490 }
1491 
1492 
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1493 LogicVRegister Simulator::uminv(VectorFormat vform,
1494                                 LogicVRegister dst,
1495                                 const LogicVRegister& src) {
1496   uminmaxv(vform, dst, GetPTrue(), src, false);
1497   return dst;
1498 }
1499 
1500 
umaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1501 LogicVRegister Simulator::umaxv(VectorFormat vform,
1502                                 LogicVRegister dst,
1503                                 const LogicPRegister& pg,
1504                                 const LogicVRegister& src) {
1505   VIXL_ASSERT(IsSVEFormat(vform));
1506   uminmaxv(vform, dst, pg, src, true);
1507   return dst;
1508 }
1509 
1510 
uminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1511 LogicVRegister Simulator::uminv(VectorFormat vform,
1512                                 LogicVRegister dst,
1513                                 const LogicPRegister& pg,
1514                                 const LogicVRegister& src) {
1515   VIXL_ASSERT(IsSVEFormat(vform));
1516   uminmaxv(vform, dst, pg, src, false);
1517   return dst;
1518 }
1519 
1520 
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1521 LogicVRegister Simulator::shl(VectorFormat vform,
1522                               LogicVRegister dst,
1523                               const LogicVRegister& src,
1524                               int shift) {
1525   VIXL_ASSERT(shift >= 0);
1526   SimVRegister temp;
1527   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1528   return ushl(vform, dst, src, shiftreg);
1529 }
1530 
1531 
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1532 LogicVRegister Simulator::sshll(VectorFormat vform,
1533                                 LogicVRegister dst,
1534                                 const LogicVRegister& src,
1535                                 int shift) {
1536   VIXL_ASSERT(shift >= 0);
1537   SimVRegister temp1, temp2;
1538   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1539   LogicVRegister extendedreg = sxtl(vform, temp2, src);
1540   return sshl(vform, dst, extendedreg, shiftreg);
1541 }
1542 
1543 
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1544 LogicVRegister Simulator::sshll2(VectorFormat vform,
1545                                  LogicVRegister dst,
1546                                  const LogicVRegister& src,
1547                                  int shift) {
1548   VIXL_ASSERT(shift >= 0);
1549   SimVRegister temp1, temp2;
1550   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1551   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1552   return sshl(vform, dst, extendedreg, shiftreg);
1553 }
1554 
1555 
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1556 LogicVRegister Simulator::shll(VectorFormat vform,
1557                                LogicVRegister dst,
1558                                const LogicVRegister& src) {
1559   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1560   return sshll(vform, dst, src, shift);
1561 }
1562 
1563 
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1564 LogicVRegister Simulator::shll2(VectorFormat vform,
1565                                 LogicVRegister dst,
1566                                 const LogicVRegister& src) {
1567   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1568   return sshll2(vform, dst, src, shift);
1569 }
1570 
1571 
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1572 LogicVRegister Simulator::ushll(VectorFormat vform,
1573                                 LogicVRegister dst,
1574                                 const LogicVRegister& src,
1575                                 int shift) {
1576   VIXL_ASSERT(shift >= 0);
1577   SimVRegister temp1, temp2;
1578   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1579   LogicVRegister extendedreg = uxtl(vform, temp2, src);
1580   return ushl(vform, dst, extendedreg, shiftreg);
1581 }
1582 
1583 
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1584 LogicVRegister Simulator::ushll2(VectorFormat vform,
1585                                  LogicVRegister dst,
1586                                  const LogicVRegister& src,
1587                                  int shift) {
1588   VIXL_ASSERT(shift >= 0);
1589   SimVRegister temp1, temp2;
1590   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1591   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1592   return ushl(vform, dst, extendedreg, shiftreg);
1593 }
1594 
clast(VectorFormat vform,const LogicPRegister & pg,const LogicVRegister & src,int offset_from_last_active)1595 std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1596                                            const LogicPRegister& pg,
1597                                            const LogicVRegister& src,
1598                                            int offset_from_last_active) {
1599   // Untested for any other values.
1600   VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1601 
1602   int last_active = GetLastActive(vform, pg);
1603   int lane_count = LaneCountFromFormat(vform);
1604   int index =
1605       ((last_active + offset_from_last_active) + lane_count) % lane_count;
1606   return std::make_pair(last_active >= 0, src.Uint(vform, index));
1607 }
1608 
compact(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1609 LogicVRegister Simulator::compact(VectorFormat vform,
1610                                   LogicVRegister dst,
1611                                   const LogicPRegister& pg,
1612                                   const LogicVRegister& src) {
1613   int j = 0;
1614   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1615     if (pg.IsActive(vform, i)) {
1616       dst.SetUint(vform, j++, src.Uint(vform, i));
1617     }
1618   }
1619   for (; j < LaneCountFromFormat(vform); j++) {
1620     dst.SetUint(vform, j, 0);
1621   }
1622   return dst;
1623 }
1624 
splice(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1625 LogicVRegister Simulator::splice(VectorFormat vform,
1626                                  LogicVRegister dst,
1627                                  const LogicPRegister& pg,
1628                                  const LogicVRegister& src1,
1629                                  const LogicVRegister& src2) {
1630   int lane_count = LaneCountFromFormat(vform);
1631   int first_active = GetFirstActive(vform, pg);
1632   int last_active = GetLastActive(vform, pg);
1633   int dst_idx = 0;
1634   uint64_t result[kZRegMaxSizeInBytes];
1635 
1636   if (first_active >= 0) {
1637     VIXL_ASSERT(last_active >= first_active);
1638     VIXL_ASSERT(last_active < lane_count);
1639     for (int i = first_active; i <= last_active; i++) {
1640       result[dst_idx++] = src1.Uint(vform, i);
1641     }
1642   }
1643 
1644   VIXL_ASSERT(dst_idx <= lane_count);
1645   for (int i = dst_idx; i < lane_count; i++) {
1646     result[i] = src2.Uint(vform, i - dst_idx);
1647   }
1648 
1649   dst.SetUintArray(vform, result);
1650 
1651   return dst;
1652 }
1653 
sel(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1654 LogicVRegister Simulator::sel(VectorFormat vform,
1655                               LogicVRegister dst,
1656                               const SimPRegister& pg,
1657                               const LogicVRegister& src1,
1658                               const LogicVRegister& src2) {
1659   int p_reg_bits_per_lane =
1660       LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1661   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1662     uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1663                               ? src1.Uint(vform, lane)
1664                               : src2.Uint(vform, lane);
1665     dst.SetUint(vform, lane, lane_value);
1666   }
1667   return dst;
1668 }
1669 
1670 
sel(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src1,const LogicPRegister & src2)1671 LogicPRegister Simulator::sel(LogicPRegister dst,
1672                               const LogicPRegister& pg,
1673                               const LogicPRegister& src1,
1674                               const LogicPRegister& src2) {
1675   for (int i = 0; i < dst.GetChunkCount(); i++) {
1676     LogicPRegister::ChunkType mask = pg.GetChunk(i);
1677     LogicPRegister::ChunkType result =
1678         (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1679     dst.SetChunk(i, result);
1680   }
1681   return dst;
1682 }
1683 
1684 
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1685 LogicVRegister Simulator::sli(VectorFormat vform,
1686                               LogicVRegister dst,
1687                               const LogicVRegister& src,
1688                               int shift) {
1689   dst.ClearForWrite(vform);
1690   int lane_count = LaneCountFromFormat(vform);
1691   for (int i = 0; i < lane_count; i++) {
1692     uint64_t src_lane = src.Uint(vform, i);
1693     uint64_t dst_lane = dst.Uint(vform, i);
1694     uint64_t shifted = src_lane << shift;
1695     uint64_t mask = MaxUintFromFormat(vform) << shift;
1696     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1697   }
1698   return dst;
1699 }
1700 
1701 
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1702 LogicVRegister Simulator::sqshl(VectorFormat vform,
1703                                 LogicVRegister dst,
1704                                 const LogicVRegister& src,
1705                                 int shift) {
1706   VIXL_ASSERT(shift >= 0);
1707   SimVRegister temp;
1708   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1709   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1710 }
1711 
1712 
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1713 LogicVRegister Simulator::uqshl(VectorFormat vform,
1714                                 LogicVRegister dst,
1715                                 const LogicVRegister& src,
1716                                 int shift) {
1717   VIXL_ASSERT(shift >= 0);
1718   SimVRegister temp;
1719   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1720   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1721 }
1722 
1723 
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1724 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1725                                  LogicVRegister dst,
1726                                  const LogicVRegister& src,
1727                                  int shift) {
1728   VIXL_ASSERT(shift >= 0);
1729   SimVRegister temp;
1730   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1731   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1732 }
1733 
1734 
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1735 LogicVRegister Simulator::sri(VectorFormat vform,
1736                               LogicVRegister dst,
1737                               const LogicVRegister& src,
1738                               int shift) {
1739   dst.ClearForWrite(vform);
1740   int lane_count = LaneCountFromFormat(vform);
1741   VIXL_ASSERT((shift > 0) &&
1742               (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1743   for (int i = 0; i < lane_count; i++) {
1744     uint64_t src_lane = src.Uint(vform, i);
1745     uint64_t dst_lane = dst.Uint(vform, i);
1746     uint64_t shifted;
1747     uint64_t mask;
1748     if (shift == 64) {
1749       shifted = 0;
1750       mask = 0;
1751     } else {
1752       shifted = src_lane >> shift;
1753       mask = MaxUintFromFormat(vform) >> shift;
1754     }
1755     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1756   }
1757   return dst;
1758 }
1759 
1760 
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1761 LogicVRegister Simulator::ushr(VectorFormat vform,
1762                                LogicVRegister dst,
1763                                const LogicVRegister& src,
1764                                int shift) {
1765   VIXL_ASSERT(shift >= 0);
1766   SimVRegister temp;
1767   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1768   return ushl(vform, dst, src, shiftreg);
1769 }
1770 
1771 
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1772 LogicVRegister Simulator::sshr(VectorFormat vform,
1773                                LogicVRegister dst,
1774                                const LogicVRegister& src,
1775                                int shift) {
1776   VIXL_ASSERT(shift >= 0);
1777   SimVRegister temp;
1778   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1779   return sshl(vform, dst, src, shiftreg);
1780 }
1781 
1782 
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1783 LogicVRegister Simulator::ssra(VectorFormat vform,
1784                                LogicVRegister dst,
1785                                const LogicVRegister& src,
1786                                int shift) {
1787   SimVRegister temp;
1788   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1789   return add(vform, dst, dst, shifted_reg);
1790 }
1791 
1792 
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1793 LogicVRegister Simulator::usra(VectorFormat vform,
1794                                LogicVRegister dst,
1795                                const LogicVRegister& src,
1796                                int shift) {
1797   SimVRegister temp;
1798   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1799   return add(vform, dst, dst, shifted_reg);
1800 }
1801 
1802 
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1803 LogicVRegister Simulator::srsra(VectorFormat vform,
1804                                 LogicVRegister dst,
1805                                 const LogicVRegister& src,
1806                                 int shift) {
1807   SimVRegister temp;
1808   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1809   return add(vform, dst, dst, shifted_reg);
1810 }
1811 
1812 
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1813 LogicVRegister Simulator::ursra(VectorFormat vform,
1814                                 LogicVRegister dst,
1815                                 const LogicVRegister& src,
1816                                 int shift) {
1817   SimVRegister temp;
1818   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1819   return add(vform, dst, dst, shifted_reg);
1820 }
1821 
1822 
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1823 LogicVRegister Simulator::cls(VectorFormat vform,
1824                               LogicVRegister dst,
1825                               const LogicVRegister& src) {
1826   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1827   int lane_count = LaneCountFromFormat(vform);
1828 
1829   // Ensure that we can store one result per lane.
1830   int result[kZRegMaxSizeInBytes];
1831 
1832   for (int i = 0; i < lane_count; i++) {
1833     result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
1834   }
1835 
1836   dst.ClearForWrite(vform);
1837   for (int i = 0; i < lane_count; ++i) {
1838     dst.SetUint(vform, i, result[i]);
1839   }
1840   return dst;
1841 }
1842 
1843 
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1844 LogicVRegister Simulator::clz(VectorFormat vform,
1845                               LogicVRegister dst,
1846                               const LogicVRegister& src) {
1847   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1848   int lane_count = LaneCountFromFormat(vform);
1849 
1850   // Ensure that we can store one result per lane.
1851   int result[kZRegMaxSizeInBytes];
1852 
1853   for (int i = 0; i < lane_count; i++) {
1854     result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
1855   }
1856 
1857   dst.ClearForWrite(vform);
1858   for (int i = 0; i < lane_count; ++i) {
1859     dst.SetUint(vform, i, result[i]);
1860   }
1861   return dst;
1862 }
1863 
1864 
cnot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1865 LogicVRegister Simulator::cnot(VectorFormat vform,
1866                                LogicVRegister dst,
1867                                const LogicVRegister& src) {
1868   dst.ClearForWrite(vform);
1869   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1870     uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
1871     dst.SetUint(vform, i, value);
1872   }
1873   return dst;
1874 }
1875 
1876 
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1877 LogicVRegister Simulator::cnt(VectorFormat vform,
1878                               LogicVRegister dst,
1879                               const LogicVRegister& src) {
1880   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1881   int lane_count = LaneCountFromFormat(vform);
1882 
1883   // Ensure that we can store one result per lane.
1884   int result[kZRegMaxSizeInBytes];
1885 
1886   for (int i = 0; i < lane_count; i++) {
1887     result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
1888   }
1889 
1890   dst.ClearForWrite(vform);
1891   for (int i = 0; i < lane_count; ++i) {
1892     dst.SetUint(vform, i, result[i]);
1893   }
1894   return dst;
1895 }
1896 
CalculateSignedShiftDistance(int64_t shift_val,int esize,bool shift_in_ls_byte)1897 static int64_t CalculateSignedShiftDistance(int64_t shift_val,
1898                                             int esize,
1899                                             bool shift_in_ls_byte) {
1900   if (shift_in_ls_byte) {
1901     // Neon uses the least-significant byte of the lane as the shift distance.
1902     shift_val = ExtractSignedBitfield64(7, 0, shift_val);
1903   } else {
1904     // SVE uses a saturated shift distance in the range
1905     //  -(esize + 1) ... (esize + 1).
1906     if (shift_val > (esize + 1)) shift_val = esize + 1;
1907     if (shift_val < -(esize + 1)) shift_val = -(esize + 1);
1908   }
1909   return shift_val;
1910 }
1911 
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1912 LogicVRegister Simulator::sshl(VectorFormat vform,
1913                                LogicVRegister dst,
1914                                const LogicVRegister& src1,
1915                                const LogicVRegister& src2,
1916                                bool shift_in_ls_byte) {
1917   dst.ClearForWrite(vform);
1918   int esize = LaneSizeInBitsFromFormat(vform);
1919   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1920     int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1921                                                      esize,
1922                                                      shift_in_ls_byte);
1923 
1924     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1925 
1926     // Set signed saturation state.
1927     if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1928       dst.SetSignedSat(i, lj_src_val >= 0);
1929     }
1930 
1931     // Set unsigned saturation state.
1932     if (lj_src_val < 0) {
1933       dst.SetUnsignedSat(i, false);
1934     } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1935                (lj_src_val != 0)) {
1936       dst.SetUnsignedSat(i, true);
1937     }
1938 
1939     int64_t src_val = src1.Int(vform, i);
1940     bool src_is_negative = src_val < 0;
1941     if (shift_val > 63) {
1942       dst.SetInt(vform, i, 0);
1943     } else if (shift_val < -63) {
1944       dst.SetRounding(i, src_is_negative);
1945       dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1946     } else {
1947       // Use unsigned types for shifts, as behaviour is undefined for signed
1948       // lhs.
1949       uint64_t usrc_val = static_cast<uint64_t>(src_val);
1950 
1951       if (shift_val < 0) {
1952         // Convert to right shift.
1953         shift_val = -shift_val;
1954 
1955         // Set rounding state by testing most-significant bit shifted out.
1956         // Rounding only needed on right shifts.
1957         if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1958           dst.SetRounding(i, true);
1959         }
1960 
1961         usrc_val >>= shift_val;
1962 
1963         if (src_is_negative) {
1964           // Simulate sign-extension.
1965           usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1966         }
1967       } else {
1968         usrc_val <<= shift_val;
1969       }
1970       dst.SetUint(vform, i, usrc_val);
1971     }
1972   }
1973   return dst;
1974 }
1975 
1976 
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1977 LogicVRegister Simulator::ushl(VectorFormat vform,
1978                                LogicVRegister dst,
1979                                const LogicVRegister& src1,
1980                                const LogicVRegister& src2,
1981                                bool shift_in_ls_byte) {
1982   dst.ClearForWrite(vform);
1983   int esize = LaneSizeInBitsFromFormat(vform);
1984   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1985     int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1986                                                      esize,
1987                                                      shift_in_ls_byte);
1988 
1989     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1990 
1991     // Set saturation state.
1992     if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1993       dst.SetUnsignedSat(i, true);
1994     }
1995 
1996     uint64_t src_val = src1.Uint(vform, i);
1997     if ((shift_val > 63) || (shift_val < -64)) {
1998       dst.SetUint(vform, i, 0);
1999     } else {
2000       if (shift_val < 0) {
2001         // Set rounding state. Rounding only needed on right shifts.
2002         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
2003           dst.SetRounding(i, true);
2004         }
2005 
2006         if (shift_val == -64) {
2007           src_val = 0;
2008         } else {
2009           src_val >>= -shift_val;
2010         }
2011       } else {
2012         src_val <<= shift_val;
2013       }
2014       dst.SetUint(vform, i, src_val);
2015     }
2016   }
2017   return dst;
2018 }
2019 
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2020 LogicVRegister Simulator::sshr(VectorFormat vform,
2021                                LogicVRegister dst,
2022                                const LogicVRegister& src1,
2023                                const LogicVRegister& src2) {
2024   SimVRegister temp;
2025   // Saturate to sidestep the min-int problem.
2026   neg(vform, temp, src2).SignedSaturate(vform);
2027   sshl(vform, dst, src1, temp, false);
2028   return dst;
2029 }
2030 
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2031 LogicVRegister Simulator::ushr(VectorFormat vform,
2032                                LogicVRegister dst,
2033                                const LogicVRegister& src1,
2034                                const LogicVRegister& src2) {
2035   SimVRegister temp;
2036   // Saturate to sidestep the min-int problem.
2037   neg(vform, temp, src2).SignedSaturate(vform);
2038   ushl(vform, dst, src1, temp, false);
2039   return dst;
2040 }
2041 
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2042 LogicVRegister Simulator::neg(VectorFormat vform,
2043                               LogicVRegister dst,
2044                               const LogicVRegister& src) {
2045   dst.ClearForWrite(vform);
2046   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2047     // Test for signed saturation.
2048     int64_t sa = src.Int(vform, i);
2049     if (sa == MinIntFromFormat(vform)) {
2050       dst.SetSignedSat(i, true);
2051     }
2052     dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2053   }
2054   return dst;
2055 }
2056 
2057 
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2058 LogicVRegister Simulator::suqadd(VectorFormat vform,
2059                                  LogicVRegister dst,
2060                                  const LogicVRegister& src1,
2061                                  const LogicVRegister& src2) {
2062   dst.ClearForWrite(vform);
2063   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2064     int64_t sa = src1.IntLeftJustified(vform, i);
2065     uint64_t ub = src2.UintLeftJustified(vform, i);
2066     uint64_t ur = sa + ub;
2067 
2068     int64_t sr;
2069     memcpy(&sr, &ur, sizeof(sr));
2070     if (sr < sa) {  // Test for signed positive saturation.
2071       dst.SetInt(vform, i, MaxIntFromFormat(vform));
2072     } else {
2073       dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));
2074     }
2075   }
2076   return dst;
2077 }
2078 
2079 
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2080 LogicVRegister Simulator::usqadd(VectorFormat vform,
2081                                  LogicVRegister dst,
2082                                  const LogicVRegister& src1,
2083                                  const LogicVRegister& src2) {
2084   dst.ClearForWrite(vform);
2085   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2086     uint64_t ua = src1.UintLeftJustified(vform, i);
2087     int64_t sb = src2.IntLeftJustified(vform, i);
2088     uint64_t ur = ua + sb;
2089 
2090     if ((sb > 0) && (ur <= ua)) {
2091       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
2092     } else if ((sb < 0) && (ur >= ua)) {
2093       dst.SetUint(vform, i, 0);  // Negative saturation.
2094     } else {
2095       dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));
2096     }
2097   }
2098   return dst;
2099 }
2100 
2101 
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2102 LogicVRegister Simulator::abs(VectorFormat vform,
2103                               LogicVRegister dst,
2104                               const LogicVRegister& src) {
2105   dst.ClearForWrite(vform);
2106   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2107     // Test for signed saturation.
2108     int64_t sa = src.Int(vform, i);
2109     if (sa == MinIntFromFormat(vform)) {
2110       dst.SetSignedSat(i, true);
2111     }
2112     if (sa < 0) {
2113       dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2114     } else {
2115       dst.SetInt(vform, i, sa);
2116     }
2117   }
2118   return dst;
2119 }
2120 
2121 
andv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2122 LogicVRegister Simulator::andv(VectorFormat vform,
2123                                LogicVRegister dst,
2124                                const LogicPRegister& pg,
2125                                const LogicVRegister& src) {
2126   VIXL_ASSERT(IsSVEFormat(vform));
2127   uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2128   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2129     if (!pg.IsActive(vform, i)) continue;
2130 
2131     result &= src.Uint(vform, i);
2132   }
2133   VectorFormat vform_dst =
2134       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2135   dst.ClearForWrite(vform_dst);
2136   dst.SetUint(vform_dst, 0, result);
2137   return dst;
2138 }
2139 
2140 
eorv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2141 LogicVRegister Simulator::eorv(VectorFormat vform,
2142                                LogicVRegister dst,
2143                                const LogicPRegister& pg,
2144                                const LogicVRegister& src) {
2145   VIXL_ASSERT(IsSVEFormat(vform));
2146   uint64_t result = 0;
2147   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2148     if (!pg.IsActive(vform, i)) continue;
2149 
2150     result ^= src.Uint(vform, i);
2151   }
2152   VectorFormat vform_dst =
2153       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2154   dst.ClearForWrite(vform_dst);
2155   dst.SetUint(vform_dst, 0, result);
2156   return dst;
2157 }
2158 
2159 
orv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2160 LogicVRegister Simulator::orv(VectorFormat vform,
2161                               LogicVRegister dst,
2162                               const LogicPRegister& pg,
2163                               const LogicVRegister& src) {
2164   VIXL_ASSERT(IsSVEFormat(vform));
2165   uint64_t result = 0;
2166   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2167     if (!pg.IsActive(vform, i)) continue;
2168 
2169     result |= src.Uint(vform, i);
2170   }
2171   VectorFormat vform_dst =
2172       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2173   dst.ClearForWrite(vform_dst);
2174   dst.SetUint(vform_dst, 0, result);
2175   return dst;
2176 }
2177 
2178 
saddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2179 LogicVRegister Simulator::saddv(VectorFormat vform,
2180                                 LogicVRegister dst,
2181                                 const LogicPRegister& pg,
2182                                 const LogicVRegister& src) {
2183   VIXL_ASSERT(IsSVEFormat(vform));
2184   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2185   int64_t result = 0;
2186   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2187     if (!pg.IsActive(vform, i)) continue;
2188 
2189     // The destination register always has D-lane sizes and the source register
2190     // always has S-lanes or smaller, so signed integer overflow -- undefined
2191     // behaviour -- can't occur.
2192     result += src.Int(vform, i);
2193   }
2194 
2195   dst.ClearForWrite(kFormatD);
2196   dst.SetInt(kFormatD, 0, result);
2197   return dst;
2198 }
2199 
2200 
uaddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2201 LogicVRegister Simulator::uaddv(VectorFormat vform,
2202                                 LogicVRegister dst,
2203                                 const LogicPRegister& pg,
2204                                 const LogicVRegister& src) {
2205   VIXL_ASSERT(IsSVEFormat(vform));
2206   uint64_t result = 0;
2207   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2208     if (!pg.IsActive(vform, i)) continue;
2209 
2210     result += src.Uint(vform, i);
2211   }
2212 
2213   dst.ClearForWrite(kFormatD);
2214   dst.SetUint(kFormatD, 0, result);
2215   return dst;
2216 }
2217 
2218 
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dst_is_signed,const LogicVRegister & src,bool src_is_signed)2219 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2220                                         LogicVRegister dst,
2221                                         bool dst_is_signed,
2222                                         const LogicVRegister& src,
2223                                         bool src_is_signed) {
2224   bool upperhalf = false;
2225   VectorFormat srcform = dstform;
2226   if ((dstform == kFormat16B) || (dstform == kFormat8H) ||
2227       (dstform == kFormat4S)) {
2228     upperhalf = true;
2229     srcform = VectorFormatHalfLanes(srcform);
2230   }
2231   srcform = VectorFormatDoubleWidth(srcform);
2232 
2233   LogicVRegister src_copy = src;
2234 
2235   int offset;
2236   if (upperhalf) {
2237     offset = LaneCountFromFormat(dstform) / 2;
2238   } else {
2239     offset = 0;
2240   }
2241 
2242   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2243     int64_t ssrc = src_copy.Int(srcform, i);
2244     uint64_t usrc = src_copy.Uint(srcform, i);
2245 
2246     // Test for signed saturation
2247     if (ssrc > MaxIntFromFormat(dstform)) {
2248       dst.SetSignedSat(offset + i, true);
2249     } else if (ssrc < MinIntFromFormat(dstform)) {
2250       dst.SetSignedSat(offset + i, false);
2251     }
2252 
2253     // Test for unsigned saturation
2254     if (src_is_signed) {
2255       if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2256         dst.SetUnsignedSat(offset + i, true);
2257       } else if (ssrc < 0) {
2258         dst.SetUnsignedSat(offset + i, false);
2259       }
2260     } else {
2261       if (usrc > MaxUintFromFormat(dstform)) {
2262         dst.SetUnsignedSat(offset + i, true);
2263       }
2264     }
2265 
2266     int64_t result;
2267     if (src_is_signed) {
2268       result = ssrc & MaxUintFromFormat(dstform);
2269     } else {
2270       result = usrc & MaxUintFromFormat(dstform);
2271     }
2272 
2273     if (dst_is_signed) {
2274       dst.SetInt(dstform, offset + i, result);
2275     } else {
2276       dst.SetUint(dstform, offset + i, result);
2277     }
2278   }
2279 
2280   if (upperhalf) {
2281     // Clear any bits beyond a Q register.
2282     dst.ClearForWrite(kFormat16B);
2283   } else {
2284     dst.ClearForWrite(dstform);
2285   }
2286   return dst;
2287 }
2288 
2289 
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2290 LogicVRegister Simulator::xtn(VectorFormat vform,
2291                               LogicVRegister dst,
2292                               const LogicVRegister& src) {
2293   return extractnarrow(vform, dst, true, src, true);
2294 }
2295 
2296 
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2297 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2298                                 LogicVRegister dst,
2299                                 const LogicVRegister& src) {
2300   return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2301 }
2302 
2303 
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2304 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2305                                  LogicVRegister dst,
2306                                  const LogicVRegister& src) {
2307   return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2308 }
2309 
2310 
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2311 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2312                                 LogicVRegister dst,
2313                                 const LogicVRegister& src) {
2314   return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2315 }
2316 
2317 
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_signed)2318 LogicVRegister Simulator::absdiff(VectorFormat vform,
2319                                   LogicVRegister dst,
2320                                   const LogicVRegister& src1,
2321                                   const LogicVRegister& src2,
2322                                   bool is_signed) {
2323   dst.ClearForWrite(vform);
2324   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2325     bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2326                                   : (src1.Uint(vform, i) > src2.Uint(vform, i));
2327     // Always calculate the answer using unsigned arithmetic, to avoid
2328     // implementation-defined signed overflow.
2329     if (src1_gt_src2) {
2330       dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2331     } else {
2332       dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2333     }
2334   }
2335   return dst;
2336 }
2337 
2338 
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2339 LogicVRegister Simulator::saba(VectorFormat vform,
2340                                LogicVRegister dst,
2341                                const LogicVRegister& src1,
2342                                const LogicVRegister& src2) {
2343   SimVRegister temp;
2344   dst.ClearForWrite(vform);
2345   absdiff(vform, temp, src1, src2, true);
2346   add(vform, dst, dst, temp);
2347   return dst;
2348 }
2349 
2350 
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2351 LogicVRegister Simulator::uaba(VectorFormat vform,
2352                                LogicVRegister dst,
2353                                const LogicVRegister& src1,
2354                                const LogicVRegister& src2) {
2355   SimVRegister temp;
2356   dst.ClearForWrite(vform);
2357   absdiff(vform, temp, src1, src2, false);
2358   add(vform, dst, dst, temp);
2359   return dst;
2360 }
2361 
2362 
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2363 LogicVRegister Simulator::not_(VectorFormat vform,
2364                                LogicVRegister dst,
2365                                const LogicVRegister& src) {
2366   dst.ClearForWrite(vform);
2367   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2368     dst.SetUint(vform, i, ~src.Uint(vform, i));
2369   }
2370   return dst;
2371 }
2372 
2373 
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2374 LogicVRegister Simulator::rbit(VectorFormat vform,
2375                                LogicVRegister dst,
2376                                const LogicVRegister& src) {
2377   uint64_t result[kZRegMaxSizeInBytes];
2378   int lane_count = LaneCountFromFormat(vform);
2379   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2380   uint64_t reversed_value;
2381   uint64_t value;
2382   for (int i = 0; i < lane_count; i++) {
2383     value = src.Uint(vform, i);
2384     reversed_value = 0;
2385     for (int j = 0; j < lane_size_in_bits; j++) {
2386       reversed_value = (reversed_value << 1) | (value & 1);
2387       value >>= 1;
2388     }
2389     result[i] = reversed_value;
2390   }
2391 
2392   dst.ClearForWrite(vform);
2393   for (int i = 0; i < lane_count; ++i) {
2394     dst.SetUint(vform, i, result[i]);
2395   }
2396   return dst;
2397 }
2398 
2399 
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2400 LogicVRegister Simulator::rev(VectorFormat vform,
2401                               LogicVRegister dst,
2402                               const LogicVRegister& src) {
2403   VIXL_ASSERT(IsSVEFormat(vform));
2404   int lane_count = LaneCountFromFormat(vform);
2405   for (int i = 0; i < lane_count / 2; i++) {
2406     uint64_t t = src.Uint(vform, i);
2407     dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2408     dst.SetUint(vform, lane_count - i - 1, t);
2409   }
2410   return dst;
2411 }
2412 
2413 
rev_byte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rev_size)2414 LogicVRegister Simulator::rev_byte(VectorFormat vform,
2415                                    LogicVRegister dst,
2416                                    const LogicVRegister& src,
2417                                    int rev_size) {
2418   uint64_t result[kZRegMaxSizeInBytes] = {};
2419   int lane_count = LaneCountFromFormat(vform);
2420   int lane_size = LaneSizeInBytesFromFormat(vform);
2421   int lanes_per_loop = rev_size / lane_size;
2422   for (int i = 0; i < lane_count; i += lanes_per_loop) {
2423     for (int j = 0; j < lanes_per_loop; j++) {
2424       result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2425     }
2426   }
2427   dst.ClearForWrite(vform);
2428   for (int i = 0; i < lane_count; ++i) {
2429     dst.SetUint(vform, i, result[i]);
2430   }
2431   return dst;
2432 }
2433 
2434 
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2435 LogicVRegister Simulator::rev16(VectorFormat vform,
2436                                 LogicVRegister dst,
2437                                 const LogicVRegister& src) {
2438   return rev_byte(vform, dst, src, 2);
2439 }
2440 
2441 
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2442 LogicVRegister Simulator::rev32(VectorFormat vform,
2443                                 LogicVRegister dst,
2444                                 const LogicVRegister& src) {
2445   return rev_byte(vform, dst, src, 4);
2446 }
2447 
2448 
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2449 LogicVRegister Simulator::rev64(VectorFormat vform,
2450                                 LogicVRegister dst,
2451                                 const LogicVRegister& src) {
2452   return rev_byte(vform, dst, src, 8);
2453 }
2454 
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2455 LogicVRegister Simulator::addlp(VectorFormat vform,
2456                                 LogicVRegister dst,
2457                                 const LogicVRegister& src,
2458                                 bool is_signed,
2459                                 bool do_accumulate) {
2460   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2461   VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);
2462 
2463   uint64_t result[kZRegMaxSizeInBytes];
2464   int lane_count = LaneCountFromFormat(vform);
2465   for (int i = 0; i < lane_count; i++) {
2466     if (is_signed) {
2467       result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2468                                         src.Int(vformsrc, 2 * i + 1));
2469     } else {
2470       result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2471     }
2472   }
2473 
2474   dst.ClearForWrite(vform);
2475   for (int i = 0; i < lane_count; ++i) {
2476     if (do_accumulate) {
2477       result[i] += dst.Uint(vform, i);
2478     }
2479     dst.SetUint(vform, i, result[i]);
2480   }
2481 
2482   return dst;
2483 }
2484 
2485 
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2486 LogicVRegister Simulator::saddlp(VectorFormat vform,
2487                                  LogicVRegister dst,
2488                                  const LogicVRegister& src) {
2489   return addlp(vform, dst, src, true, false);
2490 }
2491 
2492 
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2493 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2494                                  LogicVRegister dst,
2495                                  const LogicVRegister& src) {
2496   return addlp(vform, dst, src, false, false);
2497 }
2498 
2499 
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2500 LogicVRegister Simulator::sadalp(VectorFormat vform,
2501                                  LogicVRegister dst,
2502                                  const LogicVRegister& src) {
2503   return addlp(vform, dst, src, true, true);
2504 }
2505 
2506 
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2507 LogicVRegister Simulator::uadalp(VectorFormat vform,
2508                                  LogicVRegister dst,
2509                                  const LogicVRegister& src) {
2510   return addlp(vform, dst, src, false, true);
2511 }
2512 
ror(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rotation)2513 LogicVRegister Simulator::ror(VectorFormat vform,
2514                               LogicVRegister dst,
2515                               const LogicVRegister& src,
2516                               int rotation) {
2517   dst.ClearForWrite(vform);
2518   int width = LaneSizeInBitsFromFormat(vform);
2519   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2520     uint64_t value = src.Uint(vform, i);
2521     dst.SetUint(vform, i, RotateRight(value, rotation, width));
2522   }
2523   return dst;
2524 }
2525 
rol(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rotation)2526 LogicVRegister Simulator::rol(VectorFormat vform,
2527                               LogicVRegister dst,
2528                               const LogicVRegister& src,
2529                               int rotation) {
2530   int ror_equivalent = LaneSizeInBitsFromFormat(vform) - rotation;
2531   return ror(vform, dst, src, ror_equivalent);
2532 }
2533 
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2534 LogicVRegister Simulator::ext(VectorFormat vform,
2535                               LogicVRegister dst,
2536                               const LogicVRegister& src1,
2537                               const LogicVRegister& src2,
2538                               int index) {
2539   uint8_t result[kZRegMaxSizeInBytes] = {};
2540   int lane_count = LaneCountFromFormat(vform);
2541   for (int i = 0; i < lane_count - index; ++i) {
2542     result[i] = static_cast<uint8_t>(src1.Uint(vform, i + index));
2543   }
2544   for (int i = 0; i < index; ++i) {
2545     result[lane_count - index + i] = static_cast<uint8_t>(src2.Uint(vform, i));
2546   }
2547   dst.ClearForWrite(vform);
2548   for (int i = 0; i < lane_count; ++i) {
2549     dst.SetUint(vform, i, result[i]);
2550   }
2551   return dst;
2552 }
2553 
rotate_elements_right(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int index)2554 LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
2555                                                 LogicVRegister dst,
2556                                                 const LogicVRegister& src,
2557                                                 int index) {
2558   if (index < 0) index += LaneCountFromFormat(vform);
2559   VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
2560   index *= LaneSizeInBytesFromFormat(vform);
2561   return ext(kFormatVnB, dst, src, src, index);
2562 }
2563 
2564 
2565 template <typename T>
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2566 LogicVRegister Simulator::fadda(VectorFormat vform,
2567                                 LogicVRegister acc,
2568                                 const LogicPRegister& pg,
2569                                 const LogicVRegister& src) {
2570   T result = acc.Float<T>(0);
2571   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2572     if (!pg.IsActive(vform, i)) continue;
2573 
2574     result = FPAdd(result, src.Float<T>(i));
2575   }
2576   VectorFormat vform_dst =
2577       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2578   acc.ClearForWrite(vform_dst);
2579   acc.SetFloat(0, result);
2580   return acc;
2581 }
2582 
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2583 LogicVRegister Simulator::fadda(VectorFormat vform,
2584                                 LogicVRegister acc,
2585                                 const LogicPRegister& pg,
2586                                 const LogicVRegister& src) {
2587   switch (LaneSizeInBitsFromFormat(vform)) {
2588     case kHRegSize:
2589       fadda<SimFloat16>(vform, acc, pg, src);
2590       break;
2591     case kSRegSize:
2592       fadda<float>(vform, acc, pg, src);
2593       break;
2594     case kDRegSize:
2595       fadda<double>(vform, acc, pg, src);
2596       break;
2597     default:
2598       VIXL_UNREACHABLE();
2599   }
2600   return acc;
2601 }
2602 
2603 template <typename T>
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2604 LogicVRegister Simulator::fcadd(VectorFormat vform,
2605                                 LogicVRegister dst,          // d
2606                                 const LogicVRegister& src1,  // n
2607                                 const LogicVRegister& src2,  // m
2608                                 int rot) {
2609   int elements = LaneCountFromFormat(vform);
2610 
2611   T element1, element3;
2612   rot = (rot == 1) ? 270 : 90;
2613 
2614   // Loop example:
2615   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2616   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2617 
2618   for (int e = 0; e <= (elements / 2) - 1; e++) {
2619     switch (rot) {
2620       case 90:
2621         element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2622         element3 = src2.Float<T>(e * 2);
2623         break;
2624       case 270:
2625         element1 = src2.Float<T>(e * 2 + 1);
2626         element3 = FPNeg(src2.Float<T>(e * 2));
2627         break;
2628       default:
2629         VIXL_UNREACHABLE();
2630         return dst;  // prevents "element(n) may be unintialized" errors
2631     }
2632     dst.ClearForWrite(vform);
2633     dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2634     dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2635   }
2636   return dst;
2637 }
2638 
2639 
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2640 LogicVRegister Simulator::fcadd(VectorFormat vform,
2641                                 LogicVRegister dst,          // d
2642                                 const LogicVRegister& src1,  // n
2643                                 const LogicVRegister& src2,  // m
2644                                 int rot) {
2645   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2646     fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2647   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2648     fcadd<float>(vform, dst, src1, src2, rot);
2649   } else {
2650     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2651     fcadd<double>(vform, dst, src1, src2, rot);
2652   }
2653   return dst;
2654 }
2655 
2656 template <typename T>
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int index,int rot)2657 LogicVRegister Simulator::fcmla(VectorFormat vform,
2658                                 LogicVRegister dst,
2659                                 const LogicVRegister& src1,
2660                                 const LogicVRegister& src2,
2661                                 const LogicVRegister& acc,
2662                                 int index,
2663                                 int rot) {
2664   int elements = LaneCountFromFormat(vform);
2665 
2666   T element1, element2, element3, element4;
2667   rot *= 90;
2668 
2669   // Loop example:
2670   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2671   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2672 
2673   for (int e = 0; e <= (elements / 2) - 1; e++) {
2674     // Index == -1 indicates a vector/vector rather than vector/indexed-element
2675     // operation.
2676     int f = (index < 0) ? e : index;
2677 
2678     switch (rot) {
2679       case 0:
2680         element1 = src2.Float<T>(f * 2);
2681         element2 = src1.Float<T>(e * 2);
2682         element3 = src2.Float<T>(f * 2 + 1);
2683         element4 = src1.Float<T>(e * 2);
2684         break;
2685       case 90:
2686         element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2687         element2 = src1.Float<T>(e * 2 + 1);
2688         element3 = src2.Float<T>(f * 2);
2689         element4 = src1.Float<T>(e * 2 + 1);
2690         break;
2691       case 180:
2692         element1 = FPNeg(src2.Float<T>(f * 2));
2693         element2 = src1.Float<T>(e * 2);
2694         element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2695         element4 = src1.Float<T>(e * 2);
2696         break;
2697       case 270:
2698         element1 = src2.Float<T>(f * 2 + 1);
2699         element2 = src1.Float<T>(e * 2 + 1);
2700         element3 = FPNeg(src2.Float<T>(f * 2));
2701         element4 = src1.Float<T>(e * 2 + 1);
2702         break;
2703       default:
2704         VIXL_UNREACHABLE();
2705         return dst;  // prevents "element(n) may be unintialized" errors
2706     }
2707     dst.ClearForWrite(vform);
2708     dst.SetFloat<T>(vform,
2709                     e * 2,
2710                     FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2711     dst.SetFloat<T>(vform,
2712                     e * 2 + 1,
2713                     FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2714   }
2715   return dst;
2716 }
2717 
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int rot)2718 LogicVRegister Simulator::fcmla(VectorFormat vform,
2719                                 LogicVRegister dst,
2720                                 const LogicVRegister& src1,
2721                                 const LogicVRegister& src2,
2722                                 const LogicVRegister& acc,
2723                                 int rot) {
2724   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2725     fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2726   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2727     fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2728   } else {
2729     fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2730   }
2731   return dst;
2732 }
2733 
2734 
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2735 LogicVRegister Simulator::fcmla(VectorFormat vform,
2736                                 LogicVRegister dst,          // d
2737                                 const LogicVRegister& src1,  // n
2738                                 const LogicVRegister& src2,  // m
2739                                 int index,
2740                                 int rot) {
2741   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2742     fcmla<SimFloat16>(vform, dst, src1, src2, dst, index, rot);
2743   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2744     fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2745   } else {
2746     fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2747   }
2748   return dst;
2749 }
2750 
cadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot,bool saturate)2751 LogicVRegister Simulator::cadd(VectorFormat vform,
2752                                LogicVRegister dst,
2753                                const LogicVRegister& src1,
2754                                const LogicVRegister& src2,
2755                                int rot,
2756                                bool saturate) {
2757   SimVRegister src1_r, src1_i;
2758   SimVRegister src2_r, src2_i;
2759   SimVRegister zero;
2760   zero.Clear();
2761   uzp1(vform, src1_r, src1, zero);
2762   uzp2(vform, src1_i, src1, zero);
2763   uzp1(vform, src2_r, src2, zero);
2764   uzp2(vform, src2_i, src2, zero);
2765 
2766   if (rot == 90) {
2767     if (saturate) {
2768       sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2769       add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2770     } else {
2771       sub(vform, src1_r, src1_r, src2_i);
2772       add(vform, src1_i, src1_i, src2_r);
2773     }
2774   } else {
2775     VIXL_ASSERT(rot == 270);
2776     if (saturate) {
2777       add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2778       sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2779     } else {
2780       add(vform, src1_r, src1_r, src2_i);
2781       sub(vform, src1_i, src1_i, src2_r);
2782     }
2783   }
2784 
2785   zip1(vform, dst, src1_r, src1_i);
2786   return dst;
2787 }
2788 
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2789 LogicVRegister Simulator::cmla(VectorFormat vform,
2790                                LogicVRegister dst,
2791                                const LogicVRegister& srca,
2792                                const LogicVRegister& src1,
2793                                const LogicVRegister& src2,
2794                                int rot) {
2795   SimVRegister src1_a;
2796   SimVRegister src2_a, src2_b;
2797   SimVRegister srca_i, srca_r;
2798   SimVRegister zero, temp;
2799   zero.Clear();
2800 
2801   if ((rot == 0) || (rot == 180)) {
2802     uzp1(vform, src1_a, src1, zero);
2803     uzp1(vform, src2_a, src2, zero);
2804     uzp2(vform, src2_b, src2, zero);
2805   } else {
2806     uzp2(vform, src1_a, src1, zero);
2807     uzp2(vform, src2_a, src2, zero);
2808     uzp1(vform, src2_b, src2, zero);
2809   }
2810 
2811   uzp1(vform, srca_r, srca, zero);
2812   uzp2(vform, srca_i, srca, zero);
2813 
2814   bool sub_r = (rot == 90) || (rot == 180);
2815   bool sub_i = (rot == 180) || (rot == 270);
2816 
2817   mul(vform, temp, src1_a, src2_a);
2818   if (sub_r) {
2819     sub(vform, srca_r, srca_r, temp);
2820   } else {
2821     add(vform, srca_r, srca_r, temp);
2822   }
2823 
2824   mul(vform, temp, src1_a, src2_b);
2825   if (sub_i) {
2826     sub(vform, srca_i, srca_i, temp);
2827   } else {
2828     add(vform, srca_i, srca_i, temp);
2829   }
2830 
2831   zip1(vform, dst, srca_r, srca_i);
2832   return dst;
2833 }
2834 
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2835 LogicVRegister Simulator::cmla(VectorFormat vform,
2836                                LogicVRegister dst,
2837                                const LogicVRegister& srca,
2838                                const LogicVRegister& src1,
2839                                const LogicVRegister& src2,
2840                                int index,
2841                                int rot) {
2842   SimVRegister temp;
2843   dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
2844   return cmla(vform, dst, srca, src1, temp, rot);
2845 }
2846 
bgrp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool do_bext)2847 LogicVRegister Simulator::bgrp(VectorFormat vform,
2848                                LogicVRegister dst,
2849                                const LogicVRegister& src1,
2850                                const LogicVRegister& src2,
2851                                bool do_bext) {
2852   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2853     uint64_t value = src1.Uint(vform, i);
2854     uint64_t mask = src2.Uint(vform, i);
2855     int high_pos = 0;
2856     int low_pos = 0;
2857     uint64_t result_high = 0;
2858     uint64_t result_low = 0;
2859     for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2860       if ((mask & 1) == 0) {
2861         result_high |= (value & 1) << high_pos;
2862         high_pos++;
2863       } else {
2864         result_low |= (value & 1) << low_pos;
2865         low_pos++;
2866       }
2867       mask >>= 1;
2868       value >>= 1;
2869     }
2870 
2871     if (!do_bext) {
2872       result_low |= result_high << low_pos;
2873     }
2874 
2875     dst.SetUint(vform, i, result_low);
2876   }
2877   return dst;
2878 }
2879 
bdep(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2880 LogicVRegister Simulator::bdep(VectorFormat vform,
2881                                LogicVRegister dst,
2882                                const LogicVRegister& src1,
2883                                const LogicVRegister& src2) {
2884   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2885     uint64_t value = src1.Uint(vform, i);
2886     uint64_t mask = src2.Uint(vform, i);
2887     uint64_t result = 0;
2888     for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2889       if ((mask & 1) == 1) {
2890         result |= (value & 1) << j;
2891         value >>= 1;
2892       }
2893       mask >>= 1;
2894     }
2895     dst.SetUint(vform, i, result);
2896   }
2897   return dst;
2898 }
2899 
histogram(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2,bool do_segmented)2900 LogicVRegister Simulator::histogram(VectorFormat vform,
2901                                     LogicVRegister dst,
2902                                     const LogicPRegister& pg,
2903                                     const LogicVRegister& src1,
2904                                     const LogicVRegister& src2,
2905                                     bool do_segmented) {
2906   int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
2907   uint64_t result[kZRegMaxSizeInBytes];
2908 
2909   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2910     uint64_t count = 0;
2911     uint64_t value = src1.Uint(vform, i);
2912 
2913     int segment = do_segmented ? (i / elements_per_segment) : 0;
2914     int segment_offset = segment * elements_per_segment;
2915     int hist_limit = do_segmented ? elements_per_segment : (i + 1);
2916     for (int j = 0; j < hist_limit; j++) {
2917       if (pg.IsActive(vform, j) &&
2918           (value == src2.Uint(vform, j + segment_offset))) {
2919         count++;
2920       }
2921     }
2922     result[i] = count;
2923   }
2924   dst.SetUintArray(vform, result);
2925   return dst;
2926 }
2927 
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2928 LogicVRegister Simulator::dup_element(VectorFormat vform,
2929                                       LogicVRegister dst,
2930                                       const LogicVRegister& src,
2931                                       int src_index) {
2932   if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {
2933     // When duplicating an element larger than 64 bits, split the element into
2934     // 64-bit parts, and duplicate the parts across the destination.
2935     uint64_t d[4];
2936     int count = (vform == kFormatVnQ) ? 2 : 4;
2937     for (int i = 0; i < count; i++) {
2938       d[i] = src.Uint(kFormatVnD, (src_index * count) + i);
2939     }
2940     dst.Clear();
2941     for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {
2942       dst.SetUint(kFormatVnD, i, d[i % count]);
2943     }
2944   } else {
2945     int lane_count = LaneCountFromFormat(vform);
2946     uint64_t value = src.Uint(vform, src_index);
2947     dst.ClearForWrite(vform);
2948     for (int i = 0; i < lane_count; ++i) {
2949       dst.SetUint(vform, i, value);
2950     }
2951   }
2952   return dst;
2953 }
2954 
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2955 LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2956                                                    LogicVRegister dst,
2957                                                    const LogicVRegister& src,
2958                                                    int src_index) {
2959   // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2960   // whereas in NEON, the size of segment is equal to the size of register
2961   // itself.
2962   int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2963   VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2964   int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2965 
2966   VIXL_ASSERT(src_index >= 0);
2967   VIXL_ASSERT(src_index < lanes_per_segment);
2968 
2969   dst.ClearForWrite(vform);
2970   for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2971     uint64_t value = src.Uint(vform, j + src_index);
2972     for (int i = 0; i < lanes_per_segment; i++) {
2973       dst.SetUint(vform, j + i, value);
2974     }
2975   }
2976   return dst;
2977 }
2978 
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const std::pair<int,int> & src_and_index)2979 LogicVRegister Simulator::dup_elements_to_segments(
2980     VectorFormat vform,
2981     LogicVRegister dst,
2982     const std::pair<int, int>& src_and_index) {
2983   return dup_elements_to_segments(vform,
2984                                   dst,
2985                                   ReadVRegister(src_and_index.first),
2986                                   src_and_index.second);
2987 }
2988 
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2989 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2990                                         LogicVRegister dst,
2991                                         uint64_t imm) {
2992   int lane_count = LaneCountFromFormat(vform);
2993   uint64_t value = imm & MaxUintFromFormat(vform);
2994   dst.ClearForWrite(vform);
2995   for (int i = 0; i < lane_count; ++i) {
2996     dst.SetUint(vform, i, value);
2997   }
2998   return dst;
2999 }
3000 
3001 
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)3002 LogicVRegister Simulator::ins_element(VectorFormat vform,
3003                                       LogicVRegister dst,
3004                                       int dst_index,
3005                                       const LogicVRegister& src,
3006                                       int src_index) {
3007   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
3008   return dst;
3009 }
3010 
3011 
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)3012 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
3013                                         LogicVRegister dst,
3014                                         int dst_index,
3015                                         uint64_t imm) {
3016   uint64_t value = imm & MaxUintFromFormat(vform);
3017   dst.SetUint(vform, dst_index, value);
3018   return dst;
3019 }
3020 
3021 
index(VectorFormat vform,LogicVRegister dst,uint64_t start,uint64_t step)3022 LogicVRegister Simulator::index(VectorFormat vform,
3023                                 LogicVRegister dst,
3024                                 uint64_t start,
3025                                 uint64_t step) {
3026   VIXL_ASSERT(IsSVEFormat(vform));
3027   uint64_t value = start;
3028   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3029     dst.SetUint(vform, i, value);
3030     value += step;
3031   }
3032   return dst;
3033 }
3034 
3035 
insr(VectorFormat vform,LogicVRegister dst,uint64_t imm)3036 LogicVRegister Simulator::insr(VectorFormat vform,
3037                                LogicVRegister dst,
3038                                uint64_t imm) {
3039   VIXL_ASSERT(IsSVEFormat(vform));
3040   for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
3041     dst.SetUint(vform, i, dst.Uint(vform, i - 1));
3042   }
3043   dst.SetUint(vform, 0, imm);
3044   return dst;
3045 }
3046 
3047 
mov(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3048 LogicVRegister Simulator::mov(VectorFormat vform,
3049                               LogicVRegister dst,
3050                               const LogicVRegister& src) {
3051   dst.ClearForWrite(vform);
3052   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
3053     dst.SetUint(vform, lane, src.Uint(vform, lane));
3054   }
3055   return dst;
3056 }
3057 
3058 
mov(LogicPRegister dst,const LogicPRegister & src)3059 LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
3060   // Avoid a copy if the registers already alias.
3061   if (dst.Aliases(src)) return dst;
3062 
3063   for (int i = 0; i < dst.GetChunkCount(); i++) {
3064     dst.SetChunk(i, src.GetChunk(i));
3065   }
3066   return dst;
3067 }
3068 
3069 
mov_merging(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3070 LogicVRegister Simulator::mov_merging(VectorFormat vform,
3071                                       LogicVRegister dst,
3072                                       const SimPRegister& pg,
3073                                       const LogicVRegister& src) {
3074   return sel(vform, dst, pg, src, dst);
3075 }
3076 
mov_zeroing(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3077 LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3078                                       LogicVRegister dst,
3079                                       const SimPRegister& pg,
3080                                       const LogicVRegister& src) {
3081   SimVRegister zero;
3082   dup_immediate(vform, zero, 0);
3083   return sel(vform, dst, pg, src, zero);
3084 }
3085 
mov_alternating(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int start_at)3086 LogicVRegister Simulator::mov_alternating(VectorFormat vform,
3087                                           LogicVRegister dst,
3088                                           const LogicVRegister& src,
3089                                           int start_at) {
3090   VIXL_ASSERT((start_at == 0) || (start_at == 1));
3091   for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
3092     dst.SetUint(vform, i, src.Uint(vform, i));
3093   }
3094   return dst;
3095 }
3096 
mov_merging(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3097 LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3098                                       const LogicPRegister& pg,
3099                                       const LogicPRegister& src) {
3100   return sel(dst, pg, src, dst);
3101 }
3102 
mov_zeroing(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3103 LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3104                                       const LogicPRegister& pg,
3105                                       const LogicPRegister& src) {
3106   SimPRegister all_false;
3107   return sel(dst, pg, src, pfalse(all_false));
3108 }
3109 
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)3110 LogicVRegister Simulator::movi(VectorFormat vform,
3111                                LogicVRegister dst,
3112                                uint64_t imm) {
3113   int lane_count = LaneCountFromFormat(vform);
3114   dst.ClearForWrite(vform);
3115   for (int i = 0; i < lane_count; ++i) {
3116     dst.SetUint(vform, i, imm);
3117   }
3118   return dst;
3119 }
3120 
3121 
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)3122 LogicVRegister Simulator::mvni(VectorFormat vform,
3123                                LogicVRegister dst,
3124                                uint64_t imm) {
3125   int lane_count = LaneCountFromFormat(vform);
3126   dst.ClearForWrite(vform);
3127   for (int i = 0; i < lane_count; ++i) {
3128     dst.SetUint(vform, i, ~imm);
3129   }
3130   return dst;
3131 }
3132 
3133 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)3134 LogicVRegister Simulator::orr(VectorFormat vform,
3135                               LogicVRegister dst,
3136                               const LogicVRegister& src,
3137                               uint64_t imm) {
3138   uint64_t result[16];
3139   int lane_count = LaneCountFromFormat(vform);
3140   for (int i = 0; i < lane_count; ++i) {
3141     result[i] = src.Uint(vform, i) | imm;
3142   }
3143   dst.ClearForWrite(vform);
3144   for (int i = 0; i < lane_count; ++i) {
3145     dst.SetUint(vform, i, result[i]);
3146   }
3147   return dst;
3148 }
3149 
3150 
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3151 LogicVRegister Simulator::uxtl(VectorFormat vform,
3152                                LogicVRegister dst,
3153                                const LogicVRegister& src,
3154                                bool is_2) {
3155   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3156   int lane_count = LaneCountFromFormat(vform);
3157   int src_offset = is_2 ? lane_count : 0;
3158 
3159   dst.ClearForWrite(vform);
3160   for (int i = 0; i < lane_count; i++) {
3161     dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));
3162   }
3163   return dst;
3164 }
3165 
3166 
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3167 LogicVRegister Simulator::sxtl(VectorFormat vform,
3168                                LogicVRegister dst,
3169                                const LogicVRegister& src,
3170                                bool is_2) {
3171   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3172   int lane_count = LaneCountFromFormat(vform);
3173   int src_offset = is_2 ? lane_count : 0;
3174 
3175   dst.ClearForWrite(vform);
3176   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3177     dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));
3178   }
3179   return dst;
3180 }
3181 
3182 
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3183 LogicVRegister Simulator::uxtl2(VectorFormat vform,
3184                                 LogicVRegister dst,
3185                                 const LogicVRegister& src) {
3186   return uxtl(vform, dst, src, /* is_2 = */ true);
3187 }
3188 
3189 
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3190 LogicVRegister Simulator::sxtl2(VectorFormat vform,
3191                                 LogicVRegister dst,
3192                                 const LogicVRegister& src) {
3193   return sxtl(vform, dst, src, /* is_2 = */ true);
3194 }
3195 
3196 
uxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3197 LogicVRegister Simulator::uxt(VectorFormat vform,
3198                               LogicVRegister dst,
3199                               const LogicVRegister& src,
3200                               unsigned from_size_in_bits) {
3201   int lane_count = LaneCountFromFormat(vform);
3202   uint64_t mask = GetUintMask(from_size_in_bits);
3203 
3204   dst.ClearForWrite(vform);
3205   for (int i = 0; i < lane_count; i++) {
3206     dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3207   }
3208   return dst;
3209 }
3210 
3211 
sxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3212 LogicVRegister Simulator::sxt(VectorFormat vform,
3213                               LogicVRegister dst,
3214                               const LogicVRegister& src,
3215                               unsigned from_size_in_bits) {
3216   int lane_count = LaneCountFromFormat(vform);
3217 
3218   dst.ClearForWrite(vform);
3219   for (int i = 0; i < lane_count; i++) {
3220     uint64_t value =
3221         ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3222     dst.SetInt(vform, i, value);
3223   }
3224   return dst;
3225 }
3226 
3227 
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3228 LogicVRegister Simulator::shrn(VectorFormat vform,
3229                                LogicVRegister dst,
3230                                const LogicVRegister& src,
3231                                int shift) {
3232   SimVRegister temp;
3233   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3234   VectorFormat vform_dst = vform;
3235   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3236   return extractnarrow(vform_dst, dst, false, shifted_src, false);
3237 }
3238 
3239 
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3240 LogicVRegister Simulator::shrn2(VectorFormat vform,
3241                                 LogicVRegister dst,
3242                                 const LogicVRegister& src,
3243                                 int shift) {
3244   SimVRegister temp;
3245   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3246   VectorFormat vformdst = vform;
3247   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3248   return extractnarrow(vformdst, dst, false, shifted_src, false);
3249 }
3250 
3251 
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3252 LogicVRegister Simulator::rshrn(VectorFormat vform,
3253                                 LogicVRegister dst,
3254                                 const LogicVRegister& src,
3255                                 int shift) {
3256   SimVRegister temp;
3257   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3258   VectorFormat vformdst = vform;
3259   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3260   return extractnarrow(vformdst, dst, false, shifted_src, false);
3261 }
3262 
3263 
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3264 LogicVRegister Simulator::rshrn2(VectorFormat vform,
3265                                  LogicVRegister dst,
3266                                  const LogicVRegister& src,
3267                                  int shift) {
3268   SimVRegister temp;
3269   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3270   VectorFormat vformdst = vform;
3271   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3272   return extractnarrow(vformdst, dst, false, shifted_src, false);
3273 }
3274 
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)3275 LogicVRegister Simulator::Table(VectorFormat vform,
3276                                 LogicVRegister dst,
3277                                 const LogicVRegister& ind,
3278                                 bool zero_out_of_bounds,
3279                                 const LogicVRegister* tab1,
3280                                 const LogicVRegister* tab2,
3281                                 const LogicVRegister* tab3,
3282                                 const LogicVRegister* tab4) {
3283   VIXL_ASSERT(tab1 != NULL);
3284   int lane_count = LaneCountFromFormat(vform);
3285   VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));
3286   uint64_t table[kZRegMaxSizeInBytes * 2];
3287   uint64_t result[kZRegMaxSizeInBytes];
3288 
3289   // For Neon, the table source registers are always 16B, and Neon allows only
3290   // 8B or 16B vform for the destination, so infer the table format from the
3291   // destination.
3292   VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;
3293 
3294   uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);
3295   if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);
3296   if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);
3297   if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);
3298 
3299   for (int i = 0; i < lane_count; i++) {
3300     uint64_t index = ind.Uint(vform, i);
3301     result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);
3302     if (index < tab_size) result[i] = table[index];
3303   }
3304   dst.SetUintArray(vform, result);
3305   return dst;
3306 }
3307 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3308 LogicVRegister Simulator::tbl(VectorFormat vform,
3309                               LogicVRegister dst,
3310                               const LogicVRegister& tab,
3311                               const LogicVRegister& ind) {
3312   return Table(vform, dst, ind, true, &tab);
3313 }
3314 
3315 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3316 LogicVRegister Simulator::tbl(VectorFormat vform,
3317                               LogicVRegister dst,
3318                               const LogicVRegister& tab,
3319                               const LogicVRegister& tab2,
3320                               const LogicVRegister& ind) {
3321   return Table(vform, dst, ind, true, &tab, &tab2);
3322 }
3323 
3324 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3325 LogicVRegister Simulator::tbl(VectorFormat vform,
3326                               LogicVRegister dst,
3327                               const LogicVRegister& tab,
3328                               const LogicVRegister& tab2,
3329                               const LogicVRegister& tab3,
3330                               const LogicVRegister& ind) {
3331   return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3332 }
3333 
3334 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3335 LogicVRegister Simulator::tbl(VectorFormat vform,
3336                               LogicVRegister dst,
3337                               const LogicVRegister& tab,
3338                               const LogicVRegister& tab2,
3339                               const LogicVRegister& tab3,
3340                               const LogicVRegister& tab4,
3341                               const LogicVRegister& ind) {
3342   return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3343 }
3344 
3345 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3346 LogicVRegister Simulator::tbx(VectorFormat vform,
3347                               LogicVRegister dst,
3348                               const LogicVRegister& tab,
3349                               const LogicVRegister& ind) {
3350   return Table(vform, dst, ind, false, &tab);
3351 }
3352 
3353 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3354 LogicVRegister Simulator::tbx(VectorFormat vform,
3355                               LogicVRegister dst,
3356                               const LogicVRegister& tab,
3357                               const LogicVRegister& tab2,
3358                               const LogicVRegister& ind) {
3359   return Table(vform, dst, ind, false, &tab, &tab2);
3360 }
3361 
3362 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3363 LogicVRegister Simulator::tbx(VectorFormat vform,
3364                               LogicVRegister dst,
3365                               const LogicVRegister& tab,
3366                               const LogicVRegister& tab2,
3367                               const LogicVRegister& tab3,
3368                               const LogicVRegister& ind) {
3369   return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3370 }
3371 
3372 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3373 LogicVRegister Simulator::tbx(VectorFormat vform,
3374                               LogicVRegister dst,
3375                               const LogicVRegister& tab,
3376                               const LogicVRegister& tab2,
3377                               const LogicVRegister& tab3,
3378                               const LogicVRegister& tab4,
3379                               const LogicVRegister& ind) {
3380   return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3381 }
3382 
3383 
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3384 LogicVRegister Simulator::uqshrn(VectorFormat vform,
3385                                  LogicVRegister dst,
3386                                  const LogicVRegister& src,
3387                                  int shift) {
3388   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3389 }
3390 
3391 
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3392 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3393                                   LogicVRegister dst,
3394                                   const LogicVRegister& src,
3395                                   int shift) {
3396   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3397 }
3398 
3399 
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3400 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3401                                   LogicVRegister dst,
3402                                   const LogicVRegister& src,
3403                                   int shift) {
3404   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3405 }
3406 
3407 
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3408 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3409                                    LogicVRegister dst,
3410                                    const LogicVRegister& src,
3411                                    int shift) {
3412   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3413 }
3414 
3415 
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3416 LogicVRegister Simulator::sqshrn(VectorFormat vform,
3417                                  LogicVRegister dst,
3418                                  const LogicVRegister& src,
3419                                  int shift) {
3420   SimVRegister temp;
3421   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3422   VectorFormat vformdst = vform;
3423   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3424   return sqxtn(vformdst, dst, shifted_src);
3425 }
3426 
3427 
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3428 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3429                                   LogicVRegister dst,
3430                                   const LogicVRegister& src,
3431                                   int shift) {
3432   SimVRegister temp;
3433   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3434   VectorFormat vformdst = vform;
3435   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3436   return sqxtn(vformdst, dst, shifted_src);
3437 }
3438 
3439 
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3440 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3441                                   LogicVRegister dst,
3442                                   const LogicVRegister& src,
3443                                   int shift) {
3444   SimVRegister temp;
3445   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3446   VectorFormat vformdst = vform;
3447   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3448   return sqxtn(vformdst, dst, shifted_src);
3449 }
3450 
3451 
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3452 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3453                                    LogicVRegister dst,
3454                                    const LogicVRegister& src,
3455                                    int shift) {
3456   SimVRegister temp;
3457   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3458   VectorFormat vformdst = vform;
3459   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3460   return sqxtn(vformdst, dst, shifted_src);
3461 }
3462 
3463 
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3464 LogicVRegister Simulator::sqshrun(VectorFormat vform,
3465                                   LogicVRegister dst,
3466                                   const LogicVRegister& src,
3467                                   int shift) {
3468   SimVRegister temp;
3469   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3470   VectorFormat vformdst = vform;
3471   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3472   return sqxtun(vformdst, dst, shifted_src);
3473 }
3474 
3475 
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3476 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3477                                    LogicVRegister dst,
3478                                    const LogicVRegister& src,
3479                                    int shift) {
3480   SimVRegister temp;
3481   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3482   VectorFormat vformdst = vform;
3483   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3484   return sqxtun(vformdst, dst, shifted_src);
3485 }
3486 
3487 
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3488 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3489                                    LogicVRegister dst,
3490                                    const LogicVRegister& src,
3491                                    int shift) {
3492   SimVRegister temp;
3493   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3494   VectorFormat vformdst = vform;
3495   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3496   return sqxtun(vformdst, dst, shifted_src);
3497 }
3498 
3499 
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3500 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3501                                     LogicVRegister dst,
3502                                     const LogicVRegister& src,
3503                                     int shift) {
3504   SimVRegister temp;
3505   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3506   VectorFormat vformdst = vform;
3507   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3508   return sqxtun(vformdst, dst, shifted_src);
3509 }
3510 
3511 
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3512 LogicVRegister Simulator::uaddl(VectorFormat vform,
3513                                 LogicVRegister dst,
3514                                 const LogicVRegister& src1,
3515                                 const LogicVRegister& src2) {
3516   SimVRegister temp1, temp2;
3517   uxtl(vform, temp1, src1);
3518   uxtl(vform, temp2, src2);
3519   add(vform, dst, temp1, temp2);
3520   return dst;
3521 }
3522 
3523 
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3524 LogicVRegister Simulator::uaddl2(VectorFormat vform,
3525                                  LogicVRegister dst,
3526                                  const LogicVRegister& src1,
3527                                  const LogicVRegister& src2) {
3528   SimVRegister temp1, temp2;
3529   uxtl2(vform, temp1, src1);
3530   uxtl2(vform, temp2, src2);
3531   add(vform, dst, temp1, temp2);
3532   return dst;
3533 }
3534 
3535 
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3536 LogicVRegister Simulator::uaddw(VectorFormat vform,
3537                                 LogicVRegister dst,
3538                                 const LogicVRegister& src1,
3539                                 const LogicVRegister& src2) {
3540   SimVRegister temp;
3541   uxtl(vform, temp, src2);
3542   add(vform, dst, src1, temp);
3543   return dst;
3544 }
3545 
3546 
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3547 LogicVRegister Simulator::uaddw2(VectorFormat vform,
3548                                  LogicVRegister dst,
3549                                  const LogicVRegister& src1,
3550                                  const LogicVRegister& src2) {
3551   SimVRegister temp;
3552   uxtl2(vform, temp, src2);
3553   add(vform, dst, src1, temp);
3554   return dst;
3555 }
3556 
3557 
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3558 LogicVRegister Simulator::saddl(VectorFormat vform,
3559                                 LogicVRegister dst,
3560                                 const LogicVRegister& src1,
3561                                 const LogicVRegister& src2) {
3562   SimVRegister temp1, temp2;
3563   sxtl(vform, temp1, src1);
3564   sxtl(vform, temp2, src2);
3565   add(vform, dst, temp1, temp2);
3566   return dst;
3567 }
3568 
3569 
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3570 LogicVRegister Simulator::saddl2(VectorFormat vform,
3571                                  LogicVRegister dst,
3572                                  const LogicVRegister& src1,
3573                                  const LogicVRegister& src2) {
3574   SimVRegister temp1, temp2;
3575   sxtl2(vform, temp1, src1);
3576   sxtl2(vform, temp2, src2);
3577   add(vform, dst, temp1, temp2);
3578   return dst;
3579 }
3580 
3581 
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3582 LogicVRegister Simulator::saddw(VectorFormat vform,
3583                                 LogicVRegister dst,
3584                                 const LogicVRegister& src1,
3585                                 const LogicVRegister& src2) {
3586   SimVRegister temp;
3587   sxtl(vform, temp, src2);
3588   add(vform, dst, src1, temp);
3589   return dst;
3590 }
3591 
3592 
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3593 LogicVRegister Simulator::saddw2(VectorFormat vform,
3594                                  LogicVRegister dst,
3595                                  const LogicVRegister& src1,
3596                                  const LogicVRegister& src2) {
3597   SimVRegister temp;
3598   sxtl2(vform, temp, src2);
3599   add(vform, dst, src1, temp);
3600   return dst;
3601 }
3602 
3603 
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3604 LogicVRegister Simulator::usubl(VectorFormat vform,
3605                                 LogicVRegister dst,
3606                                 const LogicVRegister& src1,
3607                                 const LogicVRegister& src2) {
3608   SimVRegister temp1, temp2;
3609   uxtl(vform, temp1, src1);
3610   uxtl(vform, temp2, src2);
3611   sub(vform, dst, temp1, temp2);
3612   return dst;
3613 }
3614 
3615 
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3616 LogicVRegister Simulator::usubl2(VectorFormat vform,
3617                                  LogicVRegister dst,
3618                                  const LogicVRegister& src1,
3619                                  const LogicVRegister& src2) {
3620   SimVRegister temp1, temp2;
3621   uxtl2(vform, temp1, src1);
3622   uxtl2(vform, temp2, src2);
3623   sub(vform, dst, temp1, temp2);
3624   return dst;
3625 }
3626 
3627 
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3628 LogicVRegister Simulator::usubw(VectorFormat vform,
3629                                 LogicVRegister dst,
3630                                 const LogicVRegister& src1,
3631                                 const LogicVRegister& src2) {
3632   SimVRegister temp;
3633   uxtl(vform, temp, src2);
3634   sub(vform, dst, src1, temp);
3635   return dst;
3636 }
3637 
3638 
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3639 LogicVRegister Simulator::usubw2(VectorFormat vform,
3640                                  LogicVRegister dst,
3641                                  const LogicVRegister& src1,
3642                                  const LogicVRegister& src2) {
3643   SimVRegister temp;
3644   uxtl2(vform, temp, src2);
3645   sub(vform, dst, src1, temp);
3646   return dst;
3647 }
3648 
3649 
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3650 LogicVRegister Simulator::ssubl(VectorFormat vform,
3651                                 LogicVRegister dst,
3652                                 const LogicVRegister& src1,
3653                                 const LogicVRegister& src2) {
3654   SimVRegister temp1, temp2;
3655   sxtl(vform, temp1, src1);
3656   sxtl(vform, temp2, src2);
3657   sub(vform, dst, temp1, temp2);
3658   return dst;
3659 }
3660 
3661 
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3662 LogicVRegister Simulator::ssubl2(VectorFormat vform,
3663                                  LogicVRegister dst,
3664                                  const LogicVRegister& src1,
3665                                  const LogicVRegister& src2) {
3666   SimVRegister temp1, temp2;
3667   sxtl2(vform, temp1, src1);
3668   sxtl2(vform, temp2, src2);
3669   sub(vform, dst, temp1, temp2);
3670   return dst;
3671 }
3672 
3673 
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3674 LogicVRegister Simulator::ssubw(VectorFormat vform,
3675                                 LogicVRegister dst,
3676                                 const LogicVRegister& src1,
3677                                 const LogicVRegister& src2) {
3678   SimVRegister temp;
3679   sxtl(vform, temp, src2);
3680   sub(vform, dst, src1, temp);
3681   return dst;
3682 }
3683 
3684 
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3685 LogicVRegister Simulator::ssubw2(VectorFormat vform,
3686                                  LogicVRegister dst,
3687                                  const LogicVRegister& src1,
3688                                  const LogicVRegister& src2) {
3689   SimVRegister temp;
3690   sxtl2(vform, temp, src2);
3691   sub(vform, dst, src1, temp);
3692   return dst;
3693 }
3694 
3695 
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3696 LogicVRegister Simulator::uabal(VectorFormat vform,
3697                                 LogicVRegister dst,
3698                                 const LogicVRegister& src1,
3699                                 const LogicVRegister& src2) {
3700   SimVRegister temp1, temp2;
3701   uxtl(vform, temp1, src1);
3702   uxtl(vform, temp2, src2);
3703   uaba(vform, dst, temp1, temp2);
3704   return dst;
3705 }
3706 
3707 
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3708 LogicVRegister Simulator::uabal2(VectorFormat vform,
3709                                  LogicVRegister dst,
3710                                  const LogicVRegister& src1,
3711                                  const LogicVRegister& src2) {
3712   SimVRegister temp1, temp2;
3713   uxtl2(vform, temp1, src1);
3714   uxtl2(vform, temp2, src2);
3715   uaba(vform, dst, temp1, temp2);
3716   return dst;
3717 }
3718 
3719 
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3720 LogicVRegister Simulator::sabal(VectorFormat vform,
3721                                 LogicVRegister dst,
3722                                 const LogicVRegister& src1,
3723                                 const LogicVRegister& src2) {
3724   SimVRegister temp1, temp2;
3725   sxtl(vform, temp1, src1);
3726   sxtl(vform, temp2, src2);
3727   saba(vform, dst, temp1, temp2);
3728   return dst;
3729 }
3730 
3731 
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3732 LogicVRegister Simulator::sabal2(VectorFormat vform,
3733                                  LogicVRegister dst,
3734                                  const LogicVRegister& src1,
3735                                  const LogicVRegister& src2) {
3736   SimVRegister temp1, temp2;
3737   sxtl2(vform, temp1, src1);
3738   sxtl2(vform, temp2, src2);
3739   saba(vform, dst, temp1, temp2);
3740   return dst;
3741 }
3742 
3743 
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3744 LogicVRegister Simulator::uabdl(VectorFormat vform,
3745                                 LogicVRegister dst,
3746                                 const LogicVRegister& src1,
3747                                 const LogicVRegister& src2) {
3748   SimVRegister temp1, temp2;
3749   uxtl(vform, temp1, src1);
3750   uxtl(vform, temp2, src2);
3751   absdiff(vform, dst, temp1, temp2, false);
3752   return dst;
3753 }
3754 
3755 
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3756 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3757                                  LogicVRegister dst,
3758                                  const LogicVRegister& src1,
3759                                  const LogicVRegister& src2) {
3760   SimVRegister temp1, temp2;
3761   uxtl2(vform, temp1, src1);
3762   uxtl2(vform, temp2, src2);
3763   absdiff(vform, dst, temp1, temp2, false);
3764   return dst;
3765 }
3766 
3767 
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3768 LogicVRegister Simulator::sabdl(VectorFormat vform,
3769                                 LogicVRegister dst,
3770                                 const LogicVRegister& src1,
3771                                 const LogicVRegister& src2) {
3772   SimVRegister temp1, temp2;
3773   sxtl(vform, temp1, src1);
3774   sxtl(vform, temp2, src2);
3775   absdiff(vform, dst, temp1, temp2, true);
3776   return dst;
3777 }
3778 
3779 
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3780 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3781                                  LogicVRegister dst,
3782                                  const LogicVRegister& src1,
3783                                  const LogicVRegister& src2) {
3784   SimVRegister temp1, temp2;
3785   sxtl2(vform, temp1, src1);
3786   sxtl2(vform, temp2, src2);
3787   absdiff(vform, dst, temp1, temp2, true);
3788   return dst;
3789 }
3790 
3791 
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3792 LogicVRegister Simulator::umull(VectorFormat vform,
3793                                 LogicVRegister dst,
3794                                 const LogicVRegister& src1,
3795                                 const LogicVRegister& src2,
3796                                 bool is_2) {
3797   SimVRegister temp1, temp2;
3798   uxtl(vform, temp1, src1, is_2);
3799   uxtl(vform, temp2, src2, is_2);
3800   mul(vform, dst, temp1, temp2);
3801   return dst;
3802 }
3803 
3804 
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3805 LogicVRegister Simulator::umull2(VectorFormat vform,
3806                                  LogicVRegister dst,
3807                                  const LogicVRegister& src1,
3808                                  const LogicVRegister& src2) {
3809   return umull(vform, dst, src1, src2, /* is_2 = */ true);
3810 }
3811 
3812 
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3813 LogicVRegister Simulator::smull(VectorFormat vform,
3814                                 LogicVRegister dst,
3815                                 const LogicVRegister& src1,
3816                                 const LogicVRegister& src2,
3817                                 bool is_2) {
3818   SimVRegister temp1, temp2;
3819   sxtl(vform, temp1, src1, is_2);
3820   sxtl(vform, temp2, src2, is_2);
3821   mul(vform, dst, temp1, temp2);
3822   return dst;
3823 }
3824 
3825 
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3826 LogicVRegister Simulator::smull2(VectorFormat vform,
3827                                  LogicVRegister dst,
3828                                  const LogicVRegister& src1,
3829                                  const LogicVRegister& src2) {
3830   return smull(vform, dst, src1, src2, /* is_2 = */ true);
3831 }
3832 
3833 
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3834 LogicVRegister Simulator::umlsl(VectorFormat vform,
3835                                 LogicVRegister dst,
3836                                 const LogicVRegister& src1,
3837                                 const LogicVRegister& src2,
3838                                 bool is_2) {
3839   SimVRegister temp1, temp2;
3840   uxtl(vform, temp1, src1, is_2);
3841   uxtl(vform, temp2, src2, is_2);
3842   mls(vform, dst, dst, temp1, temp2);
3843   return dst;
3844 }
3845 
3846 
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3847 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3848                                  LogicVRegister dst,
3849                                  const LogicVRegister& src1,
3850                                  const LogicVRegister& src2) {
3851   return umlsl(vform, dst, src1, src2, /* is_2 = */ true);
3852 }
3853 
3854 
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3855 LogicVRegister Simulator::smlsl(VectorFormat vform,
3856                                 LogicVRegister dst,
3857                                 const LogicVRegister& src1,
3858                                 const LogicVRegister& src2,
3859                                 bool is_2) {
3860   SimVRegister temp1, temp2;
3861   sxtl(vform, temp1, src1, is_2);
3862   sxtl(vform, temp2, src2, is_2);
3863   mls(vform, dst, dst, temp1, temp2);
3864   return dst;
3865 }
3866 
3867 
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3868 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3869                                  LogicVRegister dst,
3870                                  const LogicVRegister& src1,
3871                                  const LogicVRegister& src2) {
3872   return smlsl(vform, dst, src1, src2, /* is_2 = */ true);
3873 }
3874 
3875 
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3876 LogicVRegister Simulator::umlal(VectorFormat vform,
3877                                 LogicVRegister dst,
3878                                 const LogicVRegister& src1,
3879                                 const LogicVRegister& src2,
3880                                 bool is_2) {
3881   SimVRegister temp1, temp2;
3882   uxtl(vform, temp1, src1, is_2);
3883   uxtl(vform, temp2, src2, is_2);
3884   mla(vform, dst, dst, temp1, temp2);
3885   return dst;
3886 }
3887 
3888 
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3889 LogicVRegister Simulator::umlal2(VectorFormat vform,
3890                                  LogicVRegister dst,
3891                                  const LogicVRegister& src1,
3892                                  const LogicVRegister& src2) {
3893   return umlal(vform, dst, src1, src2, /* is_2 = */ true);
3894 }
3895 
3896 
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3897 LogicVRegister Simulator::smlal(VectorFormat vform,
3898                                 LogicVRegister dst,
3899                                 const LogicVRegister& src1,
3900                                 const LogicVRegister& src2,
3901                                 bool is_2) {
3902   SimVRegister temp1, temp2;
3903   sxtl(vform, temp1, src1, is_2);
3904   sxtl(vform, temp2, src2, is_2);
3905   mla(vform, dst, dst, temp1, temp2);
3906   return dst;
3907 }
3908 
3909 
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3910 LogicVRegister Simulator::smlal2(VectorFormat vform,
3911                                  LogicVRegister dst,
3912                                  const LogicVRegister& src1,
3913                                  const LogicVRegister& src2) {
3914   return smlal(vform, dst, src1, src2, /* is_2 = */ true);
3915 }
3916 
3917 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3918 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3919                                   LogicVRegister dst,
3920                                   const LogicVRegister& src1,
3921                                   const LogicVRegister& src2,
3922                                   bool is_2) {
3923   SimVRegister temp;
3924   LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3925   return add(vform, dst, dst, product).SignedSaturate(vform);
3926 }
3927 
3928 
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3929 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3930                                    LogicVRegister dst,
3931                                    const LogicVRegister& src1,
3932                                    const LogicVRegister& src2) {
3933   return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);
3934 }
3935 
3936 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3937 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3938                                   LogicVRegister dst,
3939                                   const LogicVRegister& src1,
3940                                   const LogicVRegister& src2,
3941                                   bool is_2) {
3942   SimVRegister temp;
3943   LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3944   return sub(vform, dst, dst, product).SignedSaturate(vform);
3945 }
3946 
3947 
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3948 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3949                                    LogicVRegister dst,
3950                                    const LogicVRegister& src1,
3951                                    const LogicVRegister& src2) {
3952   return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);
3953 }
3954 
3955 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3956 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3957                                   LogicVRegister dst,
3958                                   const LogicVRegister& src1,
3959                                   const LogicVRegister& src2,
3960                                   bool is_2) {
3961   SimVRegister temp;
3962   LogicVRegister product = smull(vform, temp, src1, src2, is_2);
3963   return add(vform, dst, product, product).SignedSaturate(vform);
3964 }
3965 
3966 
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3967 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3968                                    LogicVRegister dst,
3969                                    const LogicVRegister& src1,
3970                                    const LogicVRegister& src2) {
3971   return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);
3972 }
3973 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3974 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3975                                    LogicVRegister dst,
3976                                    const LogicVRegister& src1,
3977                                    const LogicVRegister& src2,
3978                                    bool round) {
3979   int esize = LaneSizeInBitsFromFormat(vform);
3980 
3981   SimVRegister temp_lo, temp_hi;
3982 
3983   // Compute low and high multiplication results.
3984   mul(vform, temp_lo, src1, src2);
3985   smulh(vform, temp_hi, src1, src2);
3986 
3987   // Double by shifting high half, and adding in most-significant bit of low
3988   // half.
3989   shl(vform, temp_hi, temp_hi, 1);
3990   usra(vform, temp_hi, temp_lo, esize - 1);
3991 
3992   if (round) {
3993     // Add the second (due to doubling) most-significant bit of the low half
3994     // into the result.
3995     shl(vform, temp_lo, temp_lo, 1);
3996     usra(vform, temp_hi, temp_lo, esize - 1);
3997   }
3998 
3999   SimPRegister not_sat;
4000   LogicPRegister ptemp(not_sat);
4001   dst.ClearForWrite(vform);
4002   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4003     // Saturation only occurs when src1 = src2 = minimum representable value.
4004     // Check this as a special case.
4005     ptemp.SetActive(vform, i, true);
4006     if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&
4007         (src2.Int(vform, i) == MinIntFromFormat(vform))) {
4008       ptemp.SetActive(vform, i, false);
4009     }
4010     dst.SetInt(vform, i, MaxIntFromFormat(vform));
4011   }
4012 
4013   mov_merging(vform, dst, not_sat, temp_hi);
4014   return dst;
4015 }
4016 
4017 
dot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_src1_signed,bool is_src2_signed)4018 LogicVRegister Simulator::dot(VectorFormat vform,
4019                               LogicVRegister dst,
4020                               const LogicVRegister& src1,
4021                               const LogicVRegister& src2,
4022                               bool is_src1_signed,
4023                               bool is_src2_signed) {
4024   VectorFormat quarter_vform =
4025       VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4026 
4027   dst.ClearForWrite(vform);
4028   for (int e = 0; e < LaneCountFromFormat(vform); e++) {
4029     uint64_t result = 0;
4030     int64_t element1, element2;
4031     for (int i = 0; i < 4; i++) {
4032       int index = 4 * e + i;
4033       if (is_src1_signed) {
4034         element1 = src1.Int(quarter_vform, index);
4035       } else {
4036         element1 = src1.Uint(quarter_vform, index);
4037       }
4038       if (is_src2_signed) {
4039         element2 = src2.Int(quarter_vform, index);
4040       } else {
4041         element2 = src2.Uint(quarter_vform, index);
4042       }
4043       result += element1 * element2;
4044     }
4045     dst.SetUint(vform, e, result + dst.Uint(vform, e));
4046   }
4047   return dst;
4048 }
4049 
4050 
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4051 LogicVRegister Simulator::sdot(VectorFormat vform,
4052                                LogicVRegister dst,
4053                                const LogicVRegister& src1,
4054                                const LogicVRegister& src2) {
4055   return dot(vform, dst, src1, src2, true, true);
4056 }
4057 
4058 
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4059 LogicVRegister Simulator::udot(VectorFormat vform,
4060                                LogicVRegister dst,
4061                                const LogicVRegister& src1,
4062                                const LogicVRegister& src2) {
4063   return dot(vform, dst, src1, src2, false, false);
4064 }
4065 
usdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4066 LogicVRegister Simulator::usdot(VectorFormat vform,
4067                                 LogicVRegister dst,
4068                                 const LogicVRegister& src1,
4069                                 const LogicVRegister& src2) {
4070   return dot(vform, dst, src1, src2, false, true);
4071 }
4072 
cdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & acc,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4073 LogicVRegister Simulator::cdot(VectorFormat vform,
4074                                LogicVRegister dst,
4075                                const LogicVRegister& acc,
4076                                const LogicVRegister& src1,
4077                                const LogicVRegister& src2,
4078                                int rot) {
4079   VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
4080   VectorFormat quarter_vform =
4081       VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4082 
4083   int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;
4084   int sel_b = 1 - sel_a;
4085   int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;
4086 
4087   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4088     int64_t result = acc.Int(vform, i);
4089     for (int j = 0; j < 2; j++) {
4090       int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);
4091       int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);
4092       int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);
4093       int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);
4094       result += (r1 * r2) + (sub_i * i1 * i2);
4095     }
4096     dst.SetInt(vform, i, result);
4097   }
4098   return dst;
4099 }
4100 
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4101 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4102                                     LogicVRegister dst,
4103                                     const LogicVRegister& srca,
4104                                     const LogicVRegister& src1,
4105                                     const LogicVRegister& src2,
4106                                     int rot) {
4107   SimVRegister src1_a, src1_b;
4108   SimVRegister src2_a, src2_b;
4109   SimVRegister srca_i, srca_r;
4110   SimVRegister zero, temp;
4111   zero.Clear();
4112 
4113   if ((rot == 0) || (rot == 180)) {
4114     uzp1(vform, src1_a, src1, zero);
4115     uzp1(vform, src2_a, src2, zero);
4116     uzp2(vform, src2_b, src2, zero);
4117   } else {
4118     uzp2(vform, src1_a, src1, zero);
4119     uzp2(vform, src2_a, src2, zero);
4120     uzp1(vform, src2_b, src2, zero);
4121   }
4122 
4123   uzp1(vform, srca_r, srca, zero);
4124   uzp2(vform, srca_i, srca, zero);
4125 
4126   bool sub_r = (rot == 90) || (rot == 180);
4127   bool sub_i = (rot == 180) || (rot == 270);
4128 
4129   const bool round = true;
4130   sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);
4131   sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);
4132   zip1(vform, dst, srca_r, srca_i);
4133   return dst;
4134 }
4135 
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)4136 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4137                                     LogicVRegister dst,
4138                                     const LogicVRegister& srca,
4139                                     const LogicVRegister& src1,
4140                                     const LogicVRegister& src2,
4141                                     int index,
4142                                     int rot) {
4143   SimVRegister temp;
4144   dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
4145   return sqrdcmlah(vform, dst, srca, src1, temp, rot);
4146 }
4147 
sqrdmlash_d(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4148 LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,
4149                                       LogicVRegister dst,
4150                                       const LogicVRegister& src1,
4151                                       const LogicVRegister& src2,
4152                                       bool round,
4153                                       bool sub_op) {
4154   // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.
4155   // To avoid this, we use:
4156   //     (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4157   // which is same as:
4158   //     (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4159 
4160   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4161   int esize = kDRegSize;
4162   vixl_uint128_t round_const, accum;
4163   round_const.first = 0;
4164   if (round) {
4165     round_const.second = UINT64_C(1) << (esize - 2);
4166   } else {
4167     round_const.second = 0;
4168   }
4169 
4170   dst.ClearForWrite(vform);
4171   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4172     // Shift the whole value left by `esize - 1` bits.
4173     accum.first = dst.Int(vform, i) >> 1;
4174     accum.second = dst.Int(vform, i) << (esize - 1);
4175 
4176     vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));
4177 
4178     if (sub_op) {
4179       product = Neg128(product);
4180     }
4181     accum = Add128(accum, product);
4182 
4183     // Perform rounding.
4184     accum = Add128(accum, round_const);
4185 
4186     // Arithmetic shift the whole value right by `esize - 1` bits.
4187     accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
4188     accum.first = UnsignedNegate(accum.first >> (esize - 1));
4189 
4190     // Perform saturation.
4191     bool is_pos = (accum.first == 0) ? true : false;
4192     if (is_pos &&
4193         (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {
4194       accum.second = MaxIntFromFormat(vform);
4195     } else if (!is_pos && (accum.second <
4196                            static_cast<uint64_t>(MinIntFromFormat(vform)))) {
4197       accum.second = MinIntFromFormat(vform);
4198     }
4199 
4200     dst.SetInt(vform, i, accum.second);
4201   }
4202 
4203   return dst;
4204 }
4205 
sqrdmlash(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4206 LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4207                                     LogicVRegister dst,
4208                                     const LogicVRegister& src1,
4209                                     const LogicVRegister& src2,
4210                                     bool round,
4211                                     bool sub_op) {
4212   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4213   // To avoid this, we use:
4214   //     (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4215   // which is same as:
4216   //     (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4217 
4218   if (vform == kFormatVnD) {
4219     return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);
4220   }
4221 
4222   int esize = LaneSizeInBitsFromFormat(vform);
4223   int round_const = round ? (1 << (esize - 2)) : 0;
4224   int64_t accum;
4225 
4226   dst.ClearForWrite(vform);
4227   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4228     accum = dst.Int(vform, i) << (esize - 1);
4229     if (sub_op) {
4230       accum -= src1.Int(vform, i) * src2.Int(vform, i);
4231     } else {
4232       accum += src1.Int(vform, i) * src2.Int(vform, i);
4233     }
4234     accum += round_const;
4235     accum = accum >> (esize - 1);
4236 
4237     if (accum > MaxIntFromFormat(vform)) {
4238       accum = MaxIntFromFormat(vform);
4239     } else if (accum < MinIntFromFormat(vform)) {
4240       accum = MinIntFromFormat(vform);
4241     }
4242     dst.SetInt(vform, i, accum);
4243   }
4244   return dst;
4245 }
4246 
4247 
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4248 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4249                                    LogicVRegister dst,
4250                                    const LogicVRegister& src1,
4251                                    const LogicVRegister& src2,
4252                                    bool round) {
4253   return sqrdmlash(vform, dst, src1, src2, round, false);
4254 }
4255 
4256 
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4257 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4258                                    LogicVRegister dst,
4259                                    const LogicVRegister& src1,
4260                                    const LogicVRegister& src2,
4261                                    bool round) {
4262   return sqrdmlash(vform, dst, src1, src2, round, true);
4263 }
4264 
4265 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4266 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4267                                   LogicVRegister dst,
4268                                   const LogicVRegister& src1,
4269                                   const LogicVRegister& src2) {
4270   return sqrdmulh(vform, dst, src1, src2, false);
4271 }
4272 
4273 
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4274 LogicVRegister Simulator::addhn(VectorFormat vform,
4275                                 LogicVRegister dst,
4276                                 const LogicVRegister& src1,
4277                                 const LogicVRegister& src2) {
4278   SimVRegister temp;
4279   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4280   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4281   return dst;
4282 }
4283 
4284 
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4285 LogicVRegister Simulator::addhn2(VectorFormat vform,
4286                                  LogicVRegister dst,
4287                                  const LogicVRegister& src1,
4288                                  const LogicVRegister& src2) {
4289   SimVRegister temp;
4290   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4291   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4292   return dst;
4293 }
4294 
4295 
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4296 LogicVRegister Simulator::raddhn(VectorFormat vform,
4297                                  LogicVRegister dst,
4298                                  const LogicVRegister& src1,
4299                                  const LogicVRegister& src2) {
4300   SimVRegister temp;
4301   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4302   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4303   return dst;
4304 }
4305 
4306 
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4307 LogicVRegister Simulator::raddhn2(VectorFormat vform,
4308                                   LogicVRegister dst,
4309                                   const LogicVRegister& src1,
4310                                   const LogicVRegister& src2) {
4311   SimVRegister temp;
4312   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4313   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4314   return dst;
4315 }
4316 
4317 
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4318 LogicVRegister Simulator::subhn(VectorFormat vform,
4319                                 LogicVRegister dst,
4320                                 const LogicVRegister& src1,
4321                                 const LogicVRegister& src2) {
4322   SimVRegister temp;
4323   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4324   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4325   return dst;
4326 }
4327 
4328 
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4329 LogicVRegister Simulator::subhn2(VectorFormat vform,
4330                                  LogicVRegister dst,
4331                                  const LogicVRegister& src1,
4332                                  const LogicVRegister& src2) {
4333   SimVRegister temp;
4334   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4335   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4336   return dst;
4337 }
4338 
4339 
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4340 LogicVRegister Simulator::rsubhn(VectorFormat vform,
4341                                  LogicVRegister dst,
4342                                  const LogicVRegister& src1,
4343                                  const LogicVRegister& src2) {
4344   SimVRegister temp;
4345   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4346   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4347   return dst;
4348 }
4349 
4350 
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4351 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4352                                   LogicVRegister dst,
4353                                   const LogicVRegister& src1,
4354                                   const LogicVRegister& src2) {
4355   SimVRegister temp;
4356   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4357   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4358   return dst;
4359 }
4360 
4361 
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4362 LogicVRegister Simulator::trn1(VectorFormat vform,
4363                                LogicVRegister dst,
4364                                const LogicVRegister& src1,
4365                                const LogicVRegister& src2) {
4366   uint64_t result[kZRegMaxSizeInBytes] = {};
4367   int lane_count = LaneCountFromFormat(vform);
4368   int pairs = lane_count / 2;
4369   for (int i = 0; i < pairs; ++i) {
4370     result[2 * i] = src1.Uint(vform, 2 * i);
4371     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4372   }
4373 
4374   dst.ClearForWrite(vform);
4375   for (int i = 0; i < lane_count; ++i) {
4376     dst.SetUint(vform, i, result[i]);
4377   }
4378   return dst;
4379 }
4380 
4381 
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4382 LogicVRegister Simulator::trn2(VectorFormat vform,
4383                                LogicVRegister dst,
4384                                const LogicVRegister& src1,
4385                                const LogicVRegister& src2) {
4386   uint64_t result[kZRegMaxSizeInBytes] = {};
4387   int lane_count = LaneCountFromFormat(vform);
4388   int pairs = lane_count / 2;
4389   for (int i = 0; i < pairs; ++i) {
4390     result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4391     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4392   }
4393 
4394   dst.ClearForWrite(vform);
4395   for (int i = 0; i < lane_count; ++i) {
4396     dst.SetUint(vform, i, result[i]);
4397   }
4398   return dst;
4399 }
4400 
4401 
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4402 LogicVRegister Simulator::zip1(VectorFormat vform,
4403                                LogicVRegister dst,
4404                                const LogicVRegister& src1,
4405                                const LogicVRegister& src2) {
4406   uint64_t result[kZRegMaxSizeInBytes] = {};
4407   int lane_count = LaneCountFromFormat(vform);
4408   int pairs = lane_count / 2;
4409   for (int i = 0; i < pairs; ++i) {
4410     result[2 * i] = src1.Uint(vform, i);
4411     result[(2 * i) + 1] = src2.Uint(vform, i);
4412   }
4413 
4414   dst.ClearForWrite(vform);
4415   for (int i = 0; i < lane_count; ++i) {
4416     dst.SetUint(vform, i, result[i]);
4417   }
4418   return dst;
4419 }
4420 
4421 
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4422 LogicVRegister Simulator::zip2(VectorFormat vform,
4423                                LogicVRegister dst,
4424                                const LogicVRegister& src1,
4425                                const LogicVRegister& src2) {
4426   uint64_t result[kZRegMaxSizeInBytes] = {};
4427   int lane_count = LaneCountFromFormat(vform);
4428   int pairs = lane_count / 2;
4429   for (int i = 0; i < pairs; ++i) {
4430     result[2 * i] = src1.Uint(vform, pairs + i);
4431     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4432   }
4433 
4434   dst.ClearForWrite(vform);
4435   for (int i = 0; i < lane_count; ++i) {
4436     dst.SetUint(vform, i, result[i]);
4437   }
4438   return dst;
4439 }
4440 
4441 
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4442 LogicVRegister Simulator::uzp1(VectorFormat vform,
4443                                LogicVRegister dst,
4444                                const LogicVRegister& src1,
4445                                const LogicVRegister& src2) {
4446   uint64_t result[kZRegMaxSizeInBytes * 2];
4447   int lane_count = LaneCountFromFormat(vform);
4448   for (int i = 0; i < lane_count; ++i) {
4449     result[i] = src1.Uint(vform, i);
4450     result[lane_count + i] = src2.Uint(vform, i);
4451   }
4452 
4453   dst.ClearForWrite(vform);
4454   for (int i = 0; i < lane_count; ++i) {
4455     dst.SetUint(vform, i, result[2 * i]);
4456   }
4457   return dst;
4458 }
4459 
4460 
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4461 LogicVRegister Simulator::uzp2(VectorFormat vform,
4462                                LogicVRegister dst,
4463                                const LogicVRegister& src1,
4464                                const LogicVRegister& src2) {
4465   uint64_t result[kZRegMaxSizeInBytes * 2];
4466   int lane_count = LaneCountFromFormat(vform);
4467   for (int i = 0; i < lane_count; ++i) {
4468     result[i] = src1.Uint(vform, i);
4469     result[lane_count + i] = src2.Uint(vform, i);
4470   }
4471 
4472   dst.ClearForWrite(vform);
4473   for (int i = 0; i < lane_count; ++i) {
4474     dst.SetUint(vform, i, result[(2 * i) + 1]);
4475   }
4476   return dst;
4477 }
4478 
interleave_top_bottom(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4479 LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,
4480                                                 LogicVRegister dst,
4481                                                 const LogicVRegister& src) {
4482   // Interleave the top and bottom half of a vector, ie. for a vector:
4483   //
4484   //   [ ... | F | D | B | ... | E | C | A ]
4485   //
4486   // where B is the first element in the top half of the vector, produce a
4487   // result vector:
4488   //
4489   //   [ ... | ... | F | E | D | C | B | A ]
4490 
4491   uint64_t result[kZRegMaxSizeInBytes] = {};
4492   int lane_count = LaneCountFromFormat(vform);
4493   for (int i = 0; i < lane_count; i += 2) {
4494     result[i] = src.Uint(vform, i / 2);
4495     result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));
4496   }
4497   dst.SetUintArray(vform, result);
4498   return dst;
4499 }
4500 
4501 template <typename T>
FPNeg(T op)4502 T Simulator::FPNeg(T op) {
4503   return -op;
4504 }
4505 
4506 template <typename T>
FPAdd(T op1,T op2)4507 T Simulator::FPAdd(T op1, T op2) {
4508   T result = FPProcessNaNs(op1, op2);
4509   if (IsNaN(result)) {
4510     return result;
4511   }
4512 
4513   if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4514     // inf + -inf returns the default NaN.
4515     FPProcessException();
4516     return FPDefaultNaN<T>();
4517   } else {
4518     // Other cases should be handled by standard arithmetic.
4519     return op1 + op2;
4520   }
4521 }
4522 
4523 
4524 template <typename T>
FPSub(T op1,T op2)4525 T Simulator::FPSub(T op1, T op2) {
4526   // NaNs should be handled elsewhere.
4527   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4528 
4529   if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4530     // inf - inf returns the default NaN.
4531     FPProcessException();
4532     return FPDefaultNaN<T>();
4533   } else {
4534     // Other cases should be handled by standard arithmetic.
4535     return op1 - op2;
4536   }
4537 }
4538 
4539 template <typename T>
FPMulNaNs(T op1,T op2)4540 T Simulator::FPMulNaNs(T op1, T op2) {
4541   T result = FPProcessNaNs(op1, op2);
4542   return IsNaN(result) ? result : FPMul(op1, op2);
4543 }
4544 
4545 template <typename T>
FPMul(T op1,T op2)4546 T Simulator::FPMul(T op1, T op2) {
4547   // NaNs should be handled elsewhere.
4548   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4549 
4550   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4551     // inf * 0.0 returns the default NaN.
4552     FPProcessException();
4553     return FPDefaultNaN<T>();
4554   } else {
4555     // Other cases should be handled by standard arithmetic.
4556     return op1 * op2;
4557   }
4558 }
4559 
4560 
4561 template <typename T>
FPMulx(T op1,T op2)4562 T Simulator::FPMulx(T op1, T op2) {
4563   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4564     // inf * 0.0 returns +/-2.0.
4565     T two = 2.0;
4566     return copysign(T(1.0), op1) * copysign(T(1.0), op2) * two;
4567   }
4568   return FPMul(op1, op2);
4569 }
4570 
4571 
4572 template <typename T>
FPMulAdd(T a,T op1,T op2)4573 T Simulator::FPMulAdd(T a, T op1, T op2) {
4574   T result = FPProcessNaNs3(a, op1, op2);
4575 
4576   T sign_a = copysign(T(1.0), a);
4577   T sign_prod = copysign(T(1.0), op1) * copysign(T(1.0), op2);
4578   bool isinf_prod = IsInf(op1) || IsInf(op2);
4579   bool operation_generates_nan =
4580       (IsInf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
4581       (IsInf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
4582       (IsInf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
4583 
4584   if (IsNaN(result)) {
4585     // Generated NaNs override quiet NaNs propagated from a.
4586     if (operation_generates_nan && IsQuietNaN(a)) {
4587       FPProcessException();
4588       return FPDefaultNaN<T>();
4589     } else {
4590       return result;
4591     }
4592   }
4593 
4594   // If the operation would produce a NaN, return the default NaN.
4595   if (operation_generates_nan) {
4596     FPProcessException();
4597     return FPDefaultNaN<T>();
4598   }
4599 
4600   // Work around broken fma implementations for exact zero results: The sign of
4601   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4602   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4603     return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? T(-0.0) : T(0.0);
4604   }
4605 
4606   result = FusedMultiplyAdd(op1, op2, a);
4607   VIXL_ASSERT(!IsNaN(result));
4608 
4609   // Work around broken fma implementations for rounded zero results: If a is
4610   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4611   if ((a == 0.0) && (result == 0.0)) {
4612     return copysign(T(0.0), sign_prod);
4613   }
4614 
4615   return result;
4616 }
4617 
4618 
4619 template <typename T>
FPDiv(T op1,T op2)4620 T Simulator::FPDiv(T op1, T op2) {
4621   // NaNs should be handled elsewhere.
4622   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4623 
4624   if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4625     // inf / inf and 0.0 / 0.0 return the default NaN.
4626     FPProcessException();
4627     return FPDefaultNaN<T>();
4628   } else {
4629     if (op2 == 0.0) {
4630       FPProcessException();
4631       if (!IsNaN(op1)) {
4632         double op1_sign = copysign(1.0, op1);
4633         double op2_sign = copysign(1.0, op2);
4634         return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4635       }
4636     }
4637 
4638     // Other cases should be handled by standard arithmetic.
4639     return op1 / op2;
4640   }
4641 }
4642 
4643 
4644 template <typename T>
FPSqrt(T op)4645 T Simulator::FPSqrt(T op) {
4646   if (IsNaN(op)) {
4647     return FPProcessNaN(op);
4648   } else if (op < T(0.0)) {
4649     FPProcessException();
4650     return FPDefaultNaN<T>();
4651   } else {
4652     return sqrt(op);
4653   }
4654 }
4655 
4656 
4657 template <typename T>
FPMax(T a,T b)4658 T Simulator::FPMax(T a, T b) {
4659   T result = FPProcessNaNs(a, b);
4660   if (IsNaN(result)) return result;
4661 
4662   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4663     // a and b are zero, and the sign differs: return +0.0.
4664     return 0.0;
4665   } else {
4666     return (a > b) ? a : b;
4667   }
4668 }
4669 
4670 
4671 template <typename T>
FPMaxNM(T a,T b)4672 T Simulator::FPMaxNM(T a, T b) {
4673   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4674     a = T(kFP64NegativeInfinity);
4675   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4676     b = T(kFP64NegativeInfinity);
4677   }
4678 
4679   T result = FPProcessNaNs(a, b);
4680   return IsNaN(result) ? result : FPMax(a, b);
4681 }
4682 
4683 
4684 template <typename T>
FPMin(T a,T b)4685 T Simulator::FPMin(T a, T b) {
4686   T result = FPProcessNaNs(a, b);
4687   if (IsNaN(result)) return result;
4688 
4689   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4690     // a and b are zero, and the sign differs: return -0.0.
4691     return -0.0;
4692   } else {
4693     return (a < b) ? a : b;
4694   }
4695 }
4696 
4697 
4698 template <typename T>
FPMinNM(T a,T b)4699 T Simulator::FPMinNM(T a, T b) {
4700   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4701     a = T(kFP64PositiveInfinity);
4702   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4703     b = T(kFP64PositiveInfinity);
4704   }
4705 
4706   T result = FPProcessNaNs(a, b);
4707   return IsNaN(result) ? result : FPMin(a, b);
4708 }
4709 
4710 
4711 template <typename T>
FPRecipStepFused(T op1,T op2)4712 T Simulator::FPRecipStepFused(T op1, T op2) {
4713   const T two = 2.0;
4714   if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4715     return two;
4716   } else if (IsInf(op1) || IsInf(op2)) {
4717     // Return +inf if signs match, otherwise -inf.
4718     return ((op1 >= 0.0) == (op2 >= 0.0)) ? T(kFP64PositiveInfinity)
4719                                           : T(kFP64NegativeInfinity);
4720   } else {
4721     return FusedMultiplyAdd(op1, op2, two);
4722   }
4723 }
4724 
4725 template <typename T>
IsNormal(T value)4726 bool IsNormal(T value) {
4727   return std::isnormal(value);
4728 }
4729 
4730 template <>
IsNormal(SimFloat16 value)4731 bool IsNormal(SimFloat16 value) {
4732   uint16_t rawbits = Float16ToRawbits(value);
4733   uint16_t exp_mask = 0x7c00;
4734   // Check that the exponent is neither all zeroes or all ones.
4735   return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4736 }
4737 
4738 
4739 template <typename T>
FPRSqrtStepFused(T op1,T op2)4740 T Simulator::FPRSqrtStepFused(T op1, T op2) {
4741   const T one_point_five = 1.5;
4742   const T two = 2.0;
4743 
4744   if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4745     return one_point_five;
4746   } else if (IsInf(op1) || IsInf(op2)) {
4747     // Return +inf if signs match, otherwise -inf.
4748     return ((op1 >= 0.0) == (op2 >= 0.0)) ? T(kFP64PositiveInfinity)
4749                                           : T(kFP64NegativeInfinity);
4750   } else {
4751     // The multiply-add-halve operation must be fully fused, so avoid interim
4752     // rounding by checking which operand can be losslessly divided by two
4753     // before doing the multiply-add.
4754     if (IsNormal(op1 / two)) {
4755       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4756     } else if (IsNormal(op2 / two)) {
4757       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4758     } else {
4759       // Neither operand is normal after halving: the result is dominated by
4760       // the addition term, so just return that.
4761       return one_point_five;
4762     }
4763   }
4764 }
4765 
FPToFixedJS(double value)4766 int32_t Simulator::FPToFixedJS(double value) {
4767   // The Z-flag is set when the conversion from double precision floating-point
4768   // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4769   // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4770   // Z-flag is unset.
4771   int Z = 1;
4772   int32_t result;
4773 
4774   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4775       (value == kFP64NegativeInfinity)) {
4776     // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4777     // unset the Z-flag.
4778     result = 0;
4779     if ((value != 0.0) || std::signbit(value)) {
4780       Z = 0;
4781     }
4782   } else if (std::isnan(value)) {
4783     // NaN values unset the Z-flag and set the result to 0.
4784     FPProcessNaN(value);
4785     result = 0;
4786     Z = 0;
4787   } else {
4788     // All other values are converted to an integer representation, rounded
4789     // toward zero.
4790     double int_result = std::floor(value);
4791     double error = value - int_result;
4792 
4793     if ((error != 0.0) && (int_result < 0.0)) {
4794       int_result++;
4795     }
4796 
4797     // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4798     // write a one-liner with std::round, but the behaviour on ties is incorrect
4799     // for our purposes.
4800     double mod_const = static_cast<double>(UINT64_C(1) << 32);
4801     double mod_error =
4802         (int_result / mod_const) - std::floor(int_result / mod_const);
4803     double constrained;
4804     if (mod_error == 0.5) {
4805       constrained = INT32_MIN;
4806     } else {
4807       constrained = int_result - mod_const * round(int_result / mod_const);
4808     }
4809 
4810     VIXL_ASSERT(std::floor(constrained) == constrained);
4811     VIXL_ASSERT(constrained >= INT32_MIN);
4812     VIXL_ASSERT(constrained <= INT32_MAX);
4813 
4814     // Take the bottom 32 bits of the result as a 32-bit integer.
4815     result = static_cast<int32_t>(constrained);
4816 
4817     if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4818         (error != 0.0)) {
4819       // If the integer result is out of range or the conversion isn't exact,
4820       // take exception and unset the Z-flag.
4821       FPProcessException();
4822       Z = 0;
4823     }
4824   }
4825 
4826   ReadNzcv().SetN(0);
4827   ReadNzcv().SetZ(Z);
4828   ReadNzcv().SetC(0);
4829   ReadNzcv().SetV(0);
4830 
4831   return result;
4832 }
4833 
FPRoundIntCommon(double value,FPRounding round_mode)4834 double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4835   VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4836               (value != kFP64NegativeInfinity));
4837   VIXL_ASSERT(!IsNaN(value));
4838 
4839   double int_result = std::floor(value);
4840   double error = value - int_result;
4841   switch (round_mode) {
4842     case FPTieAway: {
4843       // Take care of correctly handling the range ]-0.5, -0.0], which must
4844       // yield -0.0.
4845       if ((-0.5 < value) && (value < 0.0)) {
4846         int_result = -0.0;
4847 
4848       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4849         // If the error is greater than 0.5, or is equal to 0.5 and the integer
4850         // result is positive, round up.
4851         int_result++;
4852       }
4853       break;
4854     }
4855     case FPTieEven: {
4856       // Take care of correctly handling the range [-0.5, -0.0], which must
4857       // yield -0.0.
4858       if ((-0.5 <= value) && (value < 0.0)) {
4859         int_result = -0.0;
4860 
4861         // If the error is greater than 0.5, or is equal to 0.5 and the integer
4862         // result is odd, round up.
4863       } else if ((error > 0.5) ||
4864                  ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4865         int_result++;
4866       }
4867       break;
4868     }
4869     case FPZero: {
4870       // If value>0 then we take floor(value)
4871       // otherwise, ceil(value).
4872       if (value < 0) {
4873         int_result = ceil(value);
4874       }
4875       break;
4876     }
4877     case FPNegativeInfinity: {
4878       // We always use floor(value).
4879       break;
4880     }
4881     case FPPositiveInfinity: {
4882       // Take care of correctly handling the range ]-1.0, -0.0], which must
4883       // yield -0.0.
4884       if ((-1.0 < value) && (value < 0.0)) {
4885         int_result = -0.0;
4886 
4887         // If the error is non-zero, round up.
4888       } else if (error > 0.0) {
4889         int_result++;
4890       }
4891       break;
4892     }
4893     default:
4894       VIXL_UNIMPLEMENTED();
4895   }
4896   return int_result;
4897 }
4898 
FPRoundInt(double value,FPRounding round_mode)4899 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4900   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4901       (value == kFP64NegativeInfinity)) {
4902     return value;
4903   } else if (IsNaN(value)) {
4904     return FPProcessNaN(value);
4905   }
4906   return FPRoundIntCommon(value, round_mode);
4907 }
4908 
FPRoundInt(double value,FPRounding round_mode,FrintMode frint_mode)4909 double Simulator::FPRoundInt(double value,
4910                              FPRounding round_mode,
4911                              FrintMode frint_mode) {
4912   if (frint_mode == kFrintToInteger) {
4913     return FPRoundInt(value, round_mode);
4914   }
4915 
4916   VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4917 
4918   if (value == 0.0) {
4919     return value;
4920   }
4921 
4922   if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4923       IsNaN(value)) {
4924     if (frint_mode == kFrintToInt32) {
4925       return INT32_MIN;
4926     } else {
4927       return INT64_MIN;
4928     }
4929   }
4930 
4931   double result = FPRoundIntCommon(value, round_mode);
4932 
4933   // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4934   // representable as a double, and is rounded to (INT64_MAX + 1) when
4935   // converted. To avoid this, we compare `result >= int64_max_plus_one`
4936   // instead; this is safe because `result` is known to be integral, and
4937   // `int64_max_plus_one` is exactly representable as a double.
4938   constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4939   VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4940                          int64_max_plus_one)) == int64_max_plus_one);
4941 
4942   if (frint_mode == kFrintToInt32) {
4943     if ((result > INT32_MAX) || (result < INT32_MIN)) {
4944       return INT32_MIN;
4945     }
4946   } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4947     return INT64_MIN;
4948   }
4949 
4950   return result;
4951 }
4952 
FPToInt16(double value,FPRounding rmode)4953 int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4954   value = FPRoundInt(value, rmode);
4955   if (value >= kHMaxInt) {
4956     return kHMaxInt;
4957   } else if (value < kHMinInt) {
4958     return kHMinInt;
4959   }
4960   return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4961 }
4962 
4963 
FPToInt32(double value,FPRounding rmode)4964 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4965   value = FPRoundInt(value, rmode);
4966   if (value >= kWMaxInt) {
4967     return kWMaxInt;
4968   } else if (value < kWMinInt) {
4969     return kWMinInt;
4970   }
4971   return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4972 }
4973 
4974 
FPToInt64(double value,FPRounding rmode)4975 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4976   value = FPRoundInt(value, rmode);
4977   // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
4978   // as a result of kMaxInt not being representable as a double.
4979   if (value >= 9223372036854775808.) {
4980     return kXMaxInt;
4981   } else if (value < kXMinInt) {
4982     return kXMinInt;
4983   }
4984   return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4985 }
4986 
4987 
FPToUInt16(double value,FPRounding rmode)4988 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4989   value = FPRoundInt(value, rmode);
4990   if (value >= kHMaxUInt) {
4991     return kHMaxUInt;
4992   } else if (value < 0.0) {
4993     return 0;
4994   }
4995   return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4996 }
4997 
4998 
FPToUInt32(double value,FPRounding rmode)4999 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
5000   value = FPRoundInt(value, rmode);
5001   if (value >= kWMaxUInt) {
5002     return kWMaxUInt;
5003   } else if (value < 0.0) {
5004     return 0;
5005   }
5006   return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
5007 }
5008 
5009 
FPToUInt64(double value,FPRounding rmode)5010 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
5011   value = FPRoundInt(value, rmode);
5012   // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
5013   // as a result of kMaxUInt not being representable as a double.
5014   if (value >= 18446744073709551616.) {
5015     return kXMaxUInt;
5016   } else if (value < 0.0) {
5017     return 0;
5018   }
5019   return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
5020 }
5021 
5022 
5023 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
5024   template <typename T>                                          \
5025   LogicVRegister Simulator::FN(VectorFormat vform,               \
5026                                LogicVRegister dst,               \
5027                                const LogicVRegister& src1,       \
5028                                const LogicVRegister& src2) {     \
5029     dst.ClearForWrite(vform);                                    \
5030     for (int i = 0; i < LaneCountFromFormat(vform); i++) {       \
5031       T op1 = src1.Float<T>(i);                                  \
5032       T op2 = src2.Float<T>(i);                                  \
5033       T result;                                                  \
5034       if (PROCNAN) {                                             \
5035         result = FPProcessNaNs(op1, op2);                        \
5036         if (!IsNaN(result)) {                                    \
5037           result = OP(op1, op2);                                 \
5038         }                                                        \
5039       } else {                                                   \
5040         result = OP(op1, op2);                                   \
5041       }                                                          \
5042       dst.SetFloat(vform, i, result);                            \
5043     }                                                            \
5044     return dst;                                                  \
5045   }                                                              \
5046                                                                  \
5047   LogicVRegister Simulator::FN(VectorFormat vform,               \
5048                                LogicVRegister dst,               \
5049                                const LogicVRegister& src1,       \
5050                                const LogicVRegister& src2) {     \
5051     if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {          \
5052       FN<SimFloat16>(vform, dst, src1, src2);                    \
5053     } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {   \
5054       FN<float>(vform, dst, src1, src2);                         \
5055     } else {                                                     \
5056       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
5057       FN<double>(vform, dst, src1, src2);                        \
5058     }                                                            \
5059     return dst;                                                  \
5060   }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)5061 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
5062 #undef DEFINE_NEON_FP_VECTOR_OP
5063 
5064 
5065 LogicVRegister Simulator::fnmul(VectorFormat vform,
5066                                 LogicVRegister dst,
5067                                 const LogicVRegister& src1,
5068                                 const LogicVRegister& src2) {
5069   SimVRegister temp;
5070   LogicVRegister product = fmul(vform, temp, src1, src2);
5071   return fneg(vform, dst, product);
5072 }
5073 
5074 
5075 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5076 LogicVRegister Simulator::frecps(VectorFormat vform,
5077                                  LogicVRegister dst,
5078                                  const LogicVRegister& src1,
5079                                  const LogicVRegister& src2) {
5080   dst.ClearForWrite(vform);
5081   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5082     T op1 = -src1.Float<T>(i);
5083     T op2 = src2.Float<T>(i);
5084     T result = FPProcessNaNs(op1, op2);
5085     dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
5086   }
5087   return dst;
5088 }
5089 
5090 
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5091 LogicVRegister Simulator::frecps(VectorFormat vform,
5092                                  LogicVRegister dst,
5093                                  const LogicVRegister& src1,
5094                                  const LogicVRegister& src2) {
5095   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5096     frecps<SimFloat16>(vform, dst, src1, src2);
5097   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5098     frecps<float>(vform, dst, src1, src2);
5099   } else {
5100     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5101     frecps<double>(vform, dst, src1, src2);
5102   }
5103   return dst;
5104 }
5105 
5106 
5107 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5108 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5109                                   LogicVRegister dst,
5110                                   const LogicVRegister& src1,
5111                                   const LogicVRegister& src2) {
5112   dst.ClearForWrite(vform);
5113   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5114     T op1 = -src1.Float<T>(i);
5115     T op2 = src2.Float<T>(i);
5116     T result = FPProcessNaNs(op1, op2);
5117     dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
5118   }
5119   return dst;
5120 }
5121 
5122 
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5123 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5124                                   LogicVRegister dst,
5125                                   const LogicVRegister& src1,
5126                                   const LogicVRegister& src2) {
5127   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5128     frsqrts<SimFloat16>(vform, dst, src1, src2);
5129   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5130     frsqrts<float>(vform, dst, src1, src2);
5131   } else {
5132     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5133     frsqrts<double>(vform, dst, src1, src2);
5134   }
5135   return dst;
5136 }
5137 
5138 
5139 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5140 LogicVRegister Simulator::fcmp(VectorFormat vform,
5141                                LogicVRegister dst,
5142                                const LogicVRegister& src1,
5143                                const LogicVRegister& src2,
5144                                Condition cond) {
5145   dst.ClearForWrite(vform);
5146   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5147     bool result = false;
5148     T op1 = src1.Float<T>(i);
5149     T op2 = src2.Float<T>(i);
5150     bool unordered = IsNaN(FPProcessNaNs(op1, op2));
5151 
5152     switch (cond) {
5153       case eq:
5154         result = (op1 == op2);
5155         break;
5156       case ge:
5157         result = (op1 >= op2);
5158         break;
5159       case gt:
5160         result = (op1 > op2);
5161         break;
5162       case le:
5163         result = (op1 <= op2);
5164         break;
5165       case lt:
5166         result = (op1 < op2);
5167         break;
5168       case ne:
5169         result = (op1 != op2);
5170         break;
5171       case uo:
5172         result = unordered;
5173         break;
5174       default:
5175         // Other conditions are defined in terms of those above.
5176         VIXL_UNREACHABLE();
5177         break;
5178     }
5179 
5180     if (result && unordered) {
5181       // Only `uo` and `ne` can be true for unordered comparisons.
5182       VIXL_ASSERT((cond == uo) || (cond == ne));
5183     }
5184 
5185     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
5186   }
5187   return dst;
5188 }
5189 
5190 
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5191 LogicVRegister Simulator::fcmp(VectorFormat vform,
5192                                LogicVRegister dst,
5193                                const LogicVRegister& src1,
5194                                const LogicVRegister& src2,
5195                                Condition cond) {
5196   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5197     fcmp<SimFloat16>(vform, dst, src1, src2, cond);
5198   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5199     fcmp<float>(vform, dst, src1, src2, cond);
5200   } else {
5201     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5202     fcmp<double>(vform, dst, src1, src2, cond);
5203   }
5204   return dst;
5205 }
5206 
5207 
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)5208 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
5209                                     LogicVRegister dst,
5210                                     const LogicVRegister& src,
5211                                     Condition cond) {
5212   SimVRegister temp;
5213   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5214     LogicVRegister zero_reg =
5215         dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
5216     fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
5217   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5218     LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
5219     fcmp<float>(vform, dst, src, zero_reg, cond);
5220   } else {
5221     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5222     LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5223     fcmp<double>(vform, dst, src, zero_reg, cond);
5224   }
5225   return dst;
5226 }
5227 
5228 
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5229 LogicVRegister Simulator::fabscmp(VectorFormat vform,
5230                                   LogicVRegister dst,
5231                                   const LogicVRegister& src1,
5232                                   const LogicVRegister& src2,
5233                                   Condition cond) {
5234   SimVRegister temp1, temp2;
5235   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5236     LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5237     LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5238     fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5239   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5240     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5241     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5242     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5243   } else {
5244     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5245     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5246     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5247     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5248   }
5249   return dst;
5250 }
5251 
5252 
5253 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5254 LogicVRegister Simulator::fmla(VectorFormat vform,
5255                                LogicVRegister dst,
5256                                const LogicVRegister& srca,
5257                                const LogicVRegister& src1,
5258                                const LogicVRegister& src2) {
5259   dst.ClearForWrite(vform);
5260   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5261     T op1 = src1.Float<T>(i);
5262     T op2 = src2.Float<T>(i);
5263     T acc = srca.Float<T>(i);
5264     T result = FPMulAdd(acc, op1, op2);
5265     dst.SetFloat(vform, i, result);
5266   }
5267   return dst;
5268 }
5269 
5270 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5271 LogicVRegister Simulator::fmla(VectorFormat vform,
5272                                LogicVRegister dst,
5273                                const LogicVRegister& srca,
5274                                const LogicVRegister& src1,
5275                                const LogicVRegister& src2) {
5276   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5277     fmla<SimFloat16>(vform, dst, srca, src1, src2);
5278   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5279     fmla<float>(vform, dst, srca, src1, src2);
5280   } else {
5281     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5282     fmla<double>(vform, dst, srca, src1, src2);
5283   }
5284   return dst;
5285 }
5286 
5287 
5288 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5289 LogicVRegister Simulator::fmls(VectorFormat vform,
5290                                LogicVRegister dst,
5291                                const LogicVRegister& srca,
5292                                const LogicVRegister& src1,
5293                                const LogicVRegister& src2) {
5294   dst.ClearForWrite(vform);
5295   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5296     T op1 = -src1.Float<T>(i);
5297     T op2 = src2.Float<T>(i);
5298     T acc = srca.Float<T>(i);
5299     T result = FPMulAdd(acc, op1, op2);
5300     dst.SetFloat(i, result);
5301   }
5302   return dst;
5303 }
5304 
5305 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5306 LogicVRegister Simulator::fmls(VectorFormat vform,
5307                                LogicVRegister dst,
5308                                const LogicVRegister& srca,
5309                                const LogicVRegister& src1,
5310                                const LogicVRegister& src2) {
5311   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5312     fmls<SimFloat16>(vform, dst, srca, src1, src2);
5313   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5314     fmls<float>(vform, dst, srca, src1, src2);
5315   } else {
5316     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5317     fmls<double>(vform, dst, srca, src1, src2);
5318   }
5319   return dst;
5320 }
5321 
5322 
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5323 LogicVRegister Simulator::fmlal(VectorFormat vform,
5324                                 LogicVRegister dst,
5325                                 const LogicVRegister& src1,
5326                                 const LogicVRegister& src2) {
5327   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5328   dst.ClearForWrite(vform);
5329   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5330     float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5331     float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5332     float acc = dst.Float<float>(i);
5333     float result = FPMulAdd(acc, op1, op2);
5334     dst.SetFloat(i, result);
5335   }
5336   return dst;
5337 }
5338 
5339 
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5340 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5341                                  LogicVRegister dst,
5342                                  const LogicVRegister& src1,
5343                                  const LogicVRegister& src2) {
5344   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5345   dst.ClearForWrite(vform);
5346   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5347     int src = i + LaneCountFromFormat(vform);
5348     float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5349     float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5350     float acc = dst.Float<float>(i);
5351     float result = FPMulAdd(acc, op1, op2);
5352     dst.SetFloat(i, result);
5353   }
5354   return dst;
5355 }
5356 
5357 
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5358 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5359                                 LogicVRegister dst,
5360                                 const LogicVRegister& src1,
5361                                 const LogicVRegister& src2) {
5362   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5363   dst.ClearForWrite(vform);
5364   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5365     float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5366     float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5367     float acc = dst.Float<float>(i);
5368     float result = FPMulAdd(acc, op1, op2);
5369     dst.SetFloat(i, result);
5370   }
5371   return dst;
5372 }
5373 
5374 
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5375 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5376                                  LogicVRegister dst,
5377                                  const LogicVRegister& src1,
5378                                  const LogicVRegister& src2) {
5379   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5380   dst.ClearForWrite(vform);
5381   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5382     int src = i + LaneCountFromFormat(vform);
5383     float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5384     float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5385     float acc = dst.Float<float>(i);
5386     float result = FPMulAdd(acc, op1, op2);
5387     dst.SetFloat(i, result);
5388   }
5389   return dst;
5390 }
5391 
5392 
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5393 LogicVRegister Simulator::fmlal(VectorFormat vform,
5394                                 LogicVRegister dst,
5395                                 const LogicVRegister& src1,
5396                                 const LogicVRegister& src2,
5397                                 int index) {
5398   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5399   dst.ClearForWrite(vform);
5400   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5401   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5402     float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5403     float acc = dst.Float<float>(i);
5404     float result = FPMulAdd(acc, op1, op2);
5405     dst.SetFloat(i, result);
5406   }
5407   return dst;
5408 }
5409 
5410 
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5411 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5412                                  LogicVRegister dst,
5413                                  const LogicVRegister& src1,
5414                                  const LogicVRegister& src2,
5415                                  int index) {
5416   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5417   dst.ClearForWrite(vform);
5418   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5419   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5420     int src = i + LaneCountFromFormat(vform);
5421     float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5422     float acc = dst.Float<float>(i);
5423     float result = FPMulAdd(acc, op1, op2);
5424     dst.SetFloat(i, result);
5425   }
5426   return dst;
5427 }
5428 
5429 
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5430 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5431                                 LogicVRegister dst,
5432                                 const LogicVRegister& src1,
5433                                 const LogicVRegister& src2,
5434                                 int index) {
5435   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5436   dst.ClearForWrite(vform);
5437   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5438   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5439     float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5440     float acc = dst.Float<float>(i);
5441     float result = FPMulAdd(acc, op1, op2);
5442     dst.SetFloat(i, result);
5443   }
5444   return dst;
5445 }
5446 
5447 
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5448 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5449                                  LogicVRegister dst,
5450                                  const LogicVRegister& src1,
5451                                  const LogicVRegister& src2,
5452                                  int index) {
5453   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5454   dst.ClearForWrite(vform);
5455   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5456   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5457     int src = i + LaneCountFromFormat(vform);
5458     float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5459     float acc = dst.Float<float>(i);
5460     float result = FPMulAdd(acc, op1, op2);
5461     dst.SetFloat(i, result);
5462   }
5463   return dst;
5464 }
5465 
5466 
5467 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5468 LogicVRegister Simulator::fneg(VectorFormat vform,
5469                                LogicVRegister dst,
5470                                const LogicVRegister& src) {
5471   dst.ClearForWrite(vform);
5472   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5473     T op = src.Float<T>(i);
5474     op = -op;
5475     dst.SetFloat(i, op);
5476   }
5477   return dst;
5478 }
5479 
5480 
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5481 LogicVRegister Simulator::fneg(VectorFormat vform,
5482                                LogicVRegister dst,
5483                                const LogicVRegister& src) {
5484   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5485     fneg<SimFloat16>(vform, dst, src);
5486   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5487     fneg<float>(vform, dst, src);
5488   } else {
5489     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5490     fneg<double>(vform, dst, src);
5491   }
5492   return dst;
5493 }
5494 
5495 
5496 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5497 LogicVRegister Simulator::fabs_(VectorFormat vform,
5498                                 LogicVRegister dst,
5499                                 const LogicVRegister& src) {
5500   dst.ClearForWrite(vform);
5501   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5502     T op = src.Float<T>(i);
5503     if (copysign(1.0, op) < 0.0) {
5504       op = -op;
5505     }
5506     dst.SetFloat(i, op);
5507   }
5508   return dst;
5509 }
5510 
5511 
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5512 LogicVRegister Simulator::fabs_(VectorFormat vform,
5513                                 LogicVRegister dst,
5514                                 const LogicVRegister& src) {
5515   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5516     fabs_<SimFloat16>(vform, dst, src);
5517   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5518     fabs_<float>(vform, dst, src);
5519   } else {
5520     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5521     fabs_<double>(vform, dst, src);
5522   }
5523   return dst;
5524 }
5525 
5526 
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5527 LogicVRegister Simulator::fabd(VectorFormat vform,
5528                                LogicVRegister dst,
5529                                const LogicVRegister& src1,
5530                                const LogicVRegister& src2) {
5531   SimVRegister temp;
5532   fsub(vform, temp, src1, src2);
5533   fabs_(vform, dst, temp);
5534   return dst;
5535 }
5536 
5537 
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5538 LogicVRegister Simulator::fsqrt(VectorFormat vform,
5539                                 LogicVRegister dst,
5540                                 const LogicVRegister& src) {
5541   dst.ClearForWrite(vform);
5542   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5543     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5544       SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5545       dst.SetFloat(i, result);
5546     }
5547   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5548     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5549       float result = FPSqrt(src.Float<float>(i));
5550       dst.SetFloat(i, result);
5551     }
5552   } else {
5553     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5554     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5555       double result = FPSqrt(src.Float<double>(i));
5556       dst.SetFloat(i, result);
5557     }
5558   }
5559   return dst;
5560 }
5561 
5562 
5563 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                                   \
5564   LogicVRegister Simulator::FNP(VectorFormat vform,                           \
5565                                 LogicVRegister dst,                           \
5566                                 const LogicVRegister& src1,                   \
5567                                 const LogicVRegister& src2) {                 \
5568     SimVRegister temp1, temp2;                                                \
5569     uzp1(vform, temp1, src1, src2);                                           \
5570     uzp2(vform, temp2, src1, src2);                                           \
5571     FN(vform, dst, temp1, temp2);                                             \
5572     if (IsSVEFormat(vform)) {                                                 \
5573       interleave_top_bottom(vform, dst, dst);                                 \
5574     }                                                                         \
5575     return dst;                                                               \
5576   }                                                                           \
5577                                                                               \
5578   LogicVRegister Simulator::FNP(VectorFormat vform,                           \
5579                                 LogicVRegister dst,                           \
5580                                 const LogicVRegister& src) {                  \
5581     if (vform == kFormatH) {                                                  \
5582       SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(                       \
5583                                static_cast<uint16_t>(src.Uint(vform, 0)))),   \
5584                            SimFloat16(RawbitsToFloat16(                       \
5585                                static_cast<uint16_t>(src.Uint(vform, 1)))))); \
5586       dst.SetUint(vform, 0, Float16ToRawbits(result));                        \
5587     } else if (vform == kFormatS) {                                           \
5588       float result = OP(src.Float<float>(0), src.Float<float>(1));            \
5589       dst.SetFloat(0, result);                                                \
5590     } else {                                                                  \
5591       VIXL_ASSERT(vform == kFormatD);                                         \
5592       double result = OP(src.Float<double>(0), src.Float<double>(1));         \
5593       dst.SetFloat(0, result);                                                \
5594     }                                                                         \
5595     dst.ClearForWrite(vform);                                                 \
5596     return dst;                                                               \
5597   }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)5598 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5599 #undef DEFINE_NEON_FP_PAIR_OP
5600 
5601 template <typename T>
5602 LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5603                                                LogicVRegister dst,
5604                                                const LogicVRegister& src,
5605                                                typename TFPPairOp<T>::type fn,
5606                                                uint64_t inactive_value) {
5607   int lane_count = LaneCountFromFormat(vform);
5608   T result[kZRegMaxSizeInBytes / sizeof(T)];
5609   // Copy the source vector into a working array. Initialise the unused elements
5610   // at the end of the array to the same value that a false predicate would set.
5611   for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5612     result[i] = (i < lane_count)
5613                     ? src.Float<T>(i)
5614                     : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5615   }
5616 
5617   // Pairwise reduce the elements to a single value, using the pair op function
5618   // argument.
5619   for (int step = 1; step < lane_count; step *= 2) {
5620     for (int i = 0; i < lane_count; i += step * 2) {
5621       result[i] = (this->*fn)(result[i], result[i + step]);
5622     }
5623   }
5624   dst.ClearForWrite(ScalarFormatFromFormat(vform));
5625   dst.SetFloat<T>(0, result[0]);
5626   return dst;
5627 }
5628 
FPPairedAcrossHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,typename TFPPairOp<SimFloat16>::type fn16,typename TFPPairOp<float>::type fn32,typename TFPPairOp<double>::type fn64,uint64_t inactive_value)5629 LogicVRegister Simulator::FPPairedAcrossHelper(
5630     VectorFormat vform,
5631     LogicVRegister dst,
5632     const LogicVRegister& src,
5633     typename TFPPairOp<SimFloat16>::type fn16,
5634     typename TFPPairOp<float>::type fn32,
5635     typename TFPPairOp<double>::type fn64,
5636     uint64_t inactive_value) {
5637   switch (LaneSizeInBitsFromFormat(vform)) {
5638     case kHRegSize:
5639       return FPPairedAcrossHelper<SimFloat16>(vform,
5640                                               dst,
5641                                               src,
5642                                               fn16,
5643                                               inactive_value);
5644     case kSRegSize:
5645       return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5646     default:
5647       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5648       return FPPairedAcrossHelper<double>(vform,
5649                                           dst,
5650                                           src,
5651                                           fn64,
5652                                           inactive_value);
5653   }
5654 }
5655 
faddv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5656 LogicVRegister Simulator::faddv(VectorFormat vform,
5657                                 LogicVRegister dst,
5658                                 const LogicVRegister& src) {
5659   return FPPairedAcrossHelper(vform,
5660                               dst,
5661                               src,
5662                               &Simulator::FPAdd<SimFloat16>,
5663                               &Simulator::FPAdd<float>,
5664                               &Simulator::FPAdd<double>,
5665                               0);
5666 }
5667 
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5668 LogicVRegister Simulator::fmaxv(VectorFormat vform,
5669                                 LogicVRegister dst,
5670                                 const LogicVRegister& src) {
5671   int lane_size = LaneSizeInBitsFromFormat(vform);
5672   uint64_t inactive_value =
5673       FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5674   return FPPairedAcrossHelper(vform,
5675                               dst,
5676                               src,
5677                               &Simulator::FPMax<SimFloat16>,
5678                               &Simulator::FPMax<float>,
5679                               &Simulator::FPMax<double>,
5680                               inactive_value);
5681 }
5682 
5683 
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5684 LogicVRegister Simulator::fminv(VectorFormat vform,
5685                                 LogicVRegister dst,
5686                                 const LogicVRegister& src) {
5687   int lane_size = LaneSizeInBitsFromFormat(vform);
5688   uint64_t inactive_value =
5689       FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5690   return FPPairedAcrossHelper(vform,
5691                               dst,
5692                               src,
5693                               &Simulator::FPMin<SimFloat16>,
5694                               &Simulator::FPMin<float>,
5695                               &Simulator::FPMin<double>,
5696                               inactive_value);
5697 }
5698 
5699 
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5700 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5701                                   LogicVRegister dst,
5702                                   const LogicVRegister& src) {
5703   int lane_size = LaneSizeInBitsFromFormat(vform);
5704   uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5705   return FPPairedAcrossHelper(vform,
5706                               dst,
5707                               src,
5708                               &Simulator::FPMaxNM<SimFloat16>,
5709                               &Simulator::FPMaxNM<float>,
5710                               &Simulator::FPMaxNM<double>,
5711                               inactive_value);
5712 }
5713 
5714 
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5715 LogicVRegister Simulator::fminnmv(VectorFormat vform,
5716                                   LogicVRegister dst,
5717                                   const LogicVRegister& src) {
5718   int lane_size = LaneSizeInBitsFromFormat(vform);
5719   uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5720   return FPPairedAcrossHelper(vform,
5721                               dst,
5722                               src,
5723                               &Simulator::FPMinNM<SimFloat16>,
5724                               &Simulator::FPMinNM<float>,
5725                               &Simulator::FPMinNM<double>,
5726                               inactive_value);
5727 }
5728 
5729 
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5730 LogicVRegister Simulator::fmul(VectorFormat vform,
5731                                LogicVRegister dst,
5732                                const LogicVRegister& src1,
5733                                const LogicVRegister& src2,
5734                                int index) {
5735   dst.ClearForWrite(vform);
5736   SimVRegister temp;
5737   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5738     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5739     fmul<SimFloat16>(vform, dst, src1, index_reg);
5740   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5741     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5742     fmul<float>(vform, dst, src1, index_reg);
5743   } else {
5744     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5745     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5746     fmul<double>(vform, dst, src1, index_reg);
5747   }
5748   return dst;
5749 }
5750 
5751 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5752 LogicVRegister Simulator::fmla(VectorFormat vform,
5753                                LogicVRegister dst,
5754                                const LogicVRegister& src1,
5755                                const LogicVRegister& src2,
5756                                int index) {
5757   dst.ClearForWrite(vform);
5758   SimVRegister temp;
5759   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5760     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5761     fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5762   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5763     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5764     fmla<float>(vform, dst, dst, src1, index_reg);
5765   } else {
5766     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5767     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5768     fmla<double>(vform, dst, dst, src1, index_reg);
5769   }
5770   return dst;
5771 }
5772 
5773 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5774 LogicVRegister Simulator::fmls(VectorFormat vform,
5775                                LogicVRegister dst,
5776                                const LogicVRegister& src1,
5777                                const LogicVRegister& src2,
5778                                int index) {
5779   dst.ClearForWrite(vform);
5780   SimVRegister temp;
5781   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5782     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5783     fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5784   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5785     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5786     fmls<float>(vform, dst, dst, src1, index_reg);
5787   } else {
5788     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5789     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5790     fmls<double>(vform, dst, dst, src1, index_reg);
5791   }
5792   return dst;
5793 }
5794 
5795 
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5796 LogicVRegister Simulator::fmulx(VectorFormat vform,
5797                                 LogicVRegister dst,
5798                                 const LogicVRegister& src1,
5799                                 const LogicVRegister& src2,
5800                                 int index) {
5801   dst.ClearForWrite(vform);
5802   SimVRegister temp;
5803   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5804     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5805     fmulx<SimFloat16>(vform, dst, src1, index_reg);
5806   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5807     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5808     fmulx<float>(vform, dst, src1, index_reg);
5809   } else {
5810     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5811     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5812     fmulx<double>(vform, dst, src1, index_reg);
5813   }
5814   return dst;
5815 }
5816 
5817 
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception,FrintMode frint_mode)5818 LogicVRegister Simulator::frint(VectorFormat vform,
5819                                 LogicVRegister dst,
5820                                 const LogicVRegister& src,
5821                                 FPRounding rounding_mode,
5822                                 bool inexact_exception,
5823                                 FrintMode frint_mode) {
5824   dst.ClearForWrite(vform);
5825   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5826     VIXL_ASSERT(frint_mode == kFrintToInteger);
5827     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5828       SimFloat16 input = src.Float<SimFloat16>(i);
5829       SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5830       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5831         FPProcessException();
5832       }
5833       dst.SetFloat<SimFloat16>(i, rounded);
5834     }
5835   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5836     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5837       float input = src.Float<float>(i);
5838       float rounded =
5839           static_cast<float>(FPRoundInt(input, rounding_mode, frint_mode));
5840 
5841       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5842         FPProcessException();
5843       }
5844       dst.SetFloat<float>(i, rounded);
5845     }
5846   } else {
5847     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5848     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5849       double input = src.Float<double>(i);
5850       double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5851       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5852         FPProcessException();
5853       }
5854       dst.SetFloat<double>(i, rounded);
5855     }
5856   }
5857   return dst;
5858 }
5859 
fcvt(VectorFormat dst_vform,VectorFormat src_vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)5860 LogicVRegister Simulator::fcvt(VectorFormat dst_vform,
5861                                VectorFormat src_vform,
5862                                LogicVRegister dst,
5863                                const LogicPRegister& pg,
5864                                const LogicVRegister& src) {
5865   unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);
5866   unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);
5867   VectorFormat vform = SVEFormatFromLaneSizeInBits(
5868       std::max(dst_data_size_in_bits, src_data_size_in_bits));
5869 
5870   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5871     if (!pg.IsActive(vform, i)) continue;
5872 
5873     uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5874                                                       0,
5875                                                       src.Uint(vform, i));
5876     double dst_value =
5877         RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5878 
5879     uint64_t dst_raw_bits =
5880         FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5881 
5882     dst.SetUint(vform, i, dst_raw_bits);
5883   }
5884 
5885   return dst;
5886 }
5887 
fcvts(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5888 LogicVRegister Simulator::fcvts(VectorFormat vform,
5889                                 unsigned dst_data_size_in_bits,
5890                                 unsigned src_data_size_in_bits,
5891                                 LogicVRegister dst,
5892                                 const LogicPRegister& pg,
5893                                 const LogicVRegister& src,
5894                                 FPRounding round,
5895                                 int fbits) {
5896   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5897   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5898 
5899   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5900     if (!pg.IsActive(vform, i)) continue;
5901 
5902     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5903                                                0,
5904                                                src.Uint(vform, i));
5905     double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5906                     std::pow(2.0, fbits);
5907 
5908     switch (dst_data_size_in_bits) {
5909       case kHRegSize:
5910         dst.SetInt(vform, i, FPToInt16(result, round));
5911         break;
5912       case kSRegSize:
5913         dst.SetInt(vform, i, FPToInt32(result, round));
5914         break;
5915       case kDRegSize:
5916         dst.SetInt(vform, i, FPToInt64(result, round));
5917         break;
5918       default:
5919         VIXL_UNIMPLEMENTED();
5920         break;
5921     }
5922   }
5923 
5924   return dst;
5925 }
5926 
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5927 LogicVRegister Simulator::fcvts(VectorFormat vform,
5928                                 LogicVRegister dst,
5929                                 const LogicVRegister& src,
5930                                 FPRounding round,
5931                                 int fbits) {
5932   dst.ClearForWrite(vform);
5933   return fcvts(vform,
5934                LaneSizeInBitsFromFormat(vform),
5935                LaneSizeInBitsFromFormat(vform),
5936                dst,
5937                GetPTrue(),
5938                src,
5939                round,
5940                fbits);
5941 }
5942 
fcvtu(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5943 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5944                                 unsigned dst_data_size_in_bits,
5945                                 unsigned src_data_size_in_bits,
5946                                 LogicVRegister dst,
5947                                 const LogicPRegister& pg,
5948                                 const LogicVRegister& src,
5949                                 FPRounding round,
5950                                 int fbits) {
5951   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5952   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5953 
5954   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5955     if (!pg.IsActive(vform, i)) continue;
5956 
5957     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5958                                                0,
5959                                                src.Uint(vform, i));
5960     double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5961                     std::pow(2.0, fbits);
5962 
5963     switch (dst_data_size_in_bits) {
5964       case kHRegSize:
5965         dst.SetUint(vform, i, FPToUInt16(result, round));
5966         break;
5967       case kSRegSize:
5968         dst.SetUint(vform, i, FPToUInt32(result, round));
5969         break;
5970       case kDRegSize:
5971         dst.SetUint(vform, i, FPToUInt64(result, round));
5972         break;
5973       default:
5974         VIXL_UNIMPLEMENTED();
5975         break;
5976     }
5977   }
5978 
5979   return dst;
5980 }
5981 
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5982 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5983                                 LogicVRegister dst,
5984                                 const LogicVRegister& src,
5985                                 FPRounding round,
5986                                 int fbits) {
5987   dst.ClearForWrite(vform);
5988   return fcvtu(vform,
5989                LaneSizeInBitsFromFormat(vform),
5990                LaneSizeInBitsFromFormat(vform),
5991                dst,
5992                GetPTrue(),
5993                src,
5994                round,
5995                fbits);
5996 }
5997 
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5998 LogicVRegister Simulator::fcvtl(VectorFormat vform,
5999                                 LogicVRegister dst,
6000                                 const LogicVRegister& src) {
6001   dst.ClearForWrite(vform);
6002   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6003     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
6004       // TODO: Full support for SimFloat16 in SimRegister(s).
6005       dst.SetFloat(i,
6006                    FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
6007                              ReadDN()));
6008     }
6009   } else {
6010     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6011     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
6012       dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
6013     }
6014   }
6015   return dst;
6016 }
6017 
6018 
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6019 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
6020                                  LogicVRegister dst,
6021                                  const LogicVRegister& src) {
6022   dst.ClearForWrite(vform);
6023   int lane_count = LaneCountFromFormat(vform);
6024   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6025     for (int i = 0; i < lane_count; i++) {
6026       // TODO: Full support for SimFloat16 in SimRegister(s).
6027       dst.SetFloat(i,
6028                    FPToFloat(RawbitsToFloat16(
6029                                  src.Float<uint16_t>(i + lane_count)),
6030                              ReadDN()));
6031     }
6032   } else {
6033     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6034     for (int i = 0; i < lane_count; i++) {
6035       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
6036     }
6037   }
6038   return dst;
6039 }
6040 
6041 
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6042 LogicVRegister Simulator::fcvtn(VectorFormat vform,
6043                                 LogicVRegister dst,
6044                                 const LogicVRegister& src) {
6045   SimVRegister tmp;
6046   LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6047   dst.ClearForWrite(vform);
6048   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6049     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6050       dst.SetFloat(i,
6051                    Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
6052                                                 FPTieEven,
6053                                                 ReadDN())));
6054     }
6055   } else {
6056     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6057     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6058       dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
6059     }
6060   }
6061   return dst;
6062 }
6063 
6064 
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6065 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
6066                                  LogicVRegister dst,
6067                                  const LogicVRegister& src) {
6068   dst.ClearForWrite(vform);
6069   int lane_count = LaneCountFromFormat(vform) / 2;
6070   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6071     for (int i = lane_count - 1; i >= 0; i--) {
6072       dst.SetFloat(i + lane_count,
6073                    Float16ToRawbits(
6074                        FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
6075     }
6076   } else {
6077     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6078     for (int i = lane_count - 1; i >= 0; i--) {
6079       dst.SetFloat(i + lane_count,
6080                    FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
6081     }
6082   }
6083   return dst;
6084 }
6085 
6086 
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6087 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
6088                                  LogicVRegister dst,
6089                                  const LogicVRegister& src) {
6090   SimVRegister tmp;
6091   LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6092   int input_lane_count = LaneCountFromFormat(vform);
6093   if (IsSVEFormat(vform)) {
6094     mov(kFormatVnB, tmp, src);
6095     input_lane_count /= 2;
6096   }
6097 
6098   dst.ClearForWrite(vform);
6099   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6100 
6101   for (int i = 0; i < input_lane_count; i++) {
6102     dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
6103   }
6104   return dst;
6105 }
6106 
6107 
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6108 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
6109                                   LogicVRegister dst,
6110                                   const LogicVRegister& src) {
6111   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6112   dst.ClearForWrite(vform);
6113   int lane_count = LaneCountFromFormat(vform) / 2;
6114   for (int i = lane_count - 1; i >= 0; i--) {
6115     dst.SetFloat(i + lane_count,
6116                  FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
6117   }
6118   return dst;
6119 }
6120 
6121 
6122 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)6123 double Simulator::recip_sqrt_estimate(double a) {
6124   int quot0, quot1, s;
6125   double r;
6126   if (a < 0.5) {
6127     quot0 = static_cast<int>(a * 512.0);
6128     r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);
6129   } else {
6130     quot1 = static_cast<int>(a * 256.0);
6131     r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);
6132   }
6133   s = static_cast<int>(256.0 * r + 0.5);
6134   return static_cast<double>(s) / 256.0;
6135 }
6136 
6137 
Bits(uint64_t val,int start_bit,int end_bit)6138 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
6139   return ExtractUnsignedBitfield64(start_bit, end_bit, val);
6140 }
6141 
6142 
6143 template <typename T>
FPRecipSqrtEstimate(T op)6144 T Simulator::FPRecipSqrtEstimate(T op) {
6145   if (IsNaN(op)) {
6146     return FPProcessNaN(op);
6147   } else if (op == 0.0) {
6148     if (copysign(1.0, op) < 0.0) {
6149       return T(kFP64NegativeInfinity);
6150     } else {
6151       return T(kFP64PositiveInfinity);
6152     }
6153   } else if (copysign(1.0, op) < 0.0) {
6154     FPProcessException();
6155     return FPDefaultNaN<T>();
6156   } else if (IsInf(op)) {
6157     return 0.0;
6158   } else {
6159     uint64_t fraction;
6160     int exp, result_exp;
6161 
6162     if constexpr (IsFloat16<T>()) {
6163       exp = Float16Exp(op);
6164       fraction = Float16Mantissa(op);
6165       fraction <<= 42;
6166     } else if constexpr (IsFloat32<T>()) {
6167       exp = FloatExp(op);
6168       fraction = FloatMantissa(op);
6169       fraction <<= 29;
6170     } else {
6171       VIXL_ASSERT(IsFloat64<T>());
6172       exp = DoubleExp(op);
6173       fraction = DoubleMantissa(op);
6174     }
6175 
6176     if (exp == 0) {
6177       while (Bits(fraction, 51, 51) == 0) {
6178         fraction = Bits(fraction, 50, 0) << 1;
6179         exp -= 1;
6180       }
6181       fraction = Bits(fraction, 50, 0) << 1;
6182     }
6183 
6184     double scaled;
6185     if (Bits(exp, 0, 0) == 0) {
6186       scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6187     } else {
6188       scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
6189     }
6190 
6191     if constexpr (IsFloat16<T>()) {
6192       result_exp = (44 - exp) / 2;
6193     } else if constexpr (IsFloat32<T>()) {
6194       result_exp = (380 - exp) / 2;
6195     } else {
6196       VIXL_ASSERT(IsFloat64<T>());
6197       result_exp = (3068 - exp) / 2;
6198     }
6199 
6200     uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
6201 
6202     if constexpr (IsFloat16<T>()) {
6203       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6204       uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
6205       return Float16Pack(0, exp_bits, est_bits);
6206     } else if constexpr (IsFloat32<T>()) {
6207       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6208       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
6209       return FloatPack(0, exp_bits, est_bits);
6210     } else {
6211       VIXL_ASSERT(IsFloat64<T>());
6212       return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
6213     }
6214   }
6215 }
6216 
6217 
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6218 LogicVRegister Simulator::frsqrte(VectorFormat vform,
6219                                   LogicVRegister dst,
6220                                   const LogicVRegister& src) {
6221   dst.ClearForWrite(vform);
6222   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6223     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6224       SimFloat16 input = src.Float<SimFloat16>(i);
6225       dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
6226     }
6227   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6228     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6229       float input = src.Float<float>(i);
6230       dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
6231     }
6232   } else {
6233     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6234     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6235       double input = src.Float<double>(i);
6236       dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
6237     }
6238   }
6239   return dst;
6240 }
6241 
6242 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)6243 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
6244   uint32_t sign;
6245 
6246   if constexpr (IsFloat16<T>()) {
6247     sign = Float16Sign(op);
6248   } else if constexpr (IsFloat32<T>()) {
6249     sign = FloatSign(op);
6250   } else {
6251     VIXL_ASSERT(IsFloat64<T>());
6252     sign = DoubleSign(op);
6253   }
6254 
6255   if (IsNaN(op)) {
6256     return FPProcessNaN(op);
6257   } else if (IsInf(op)) {
6258     return (sign == 1) ? T(-0.0) : T(0.0);
6259   } else if (op == 0.0) {
6260     FPProcessException();  // FPExc_DivideByZero exception.
6261     return (sign == 1) ? T(kFP64NegativeInfinity) : T(kFP64PositiveInfinity);
6262   } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6263              (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6264              (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6265     bool overflow_to_inf = false;
6266     switch (rounding) {
6267       case FPTieEven:
6268         overflow_to_inf = true;
6269         break;
6270       case FPPositiveInfinity:
6271         overflow_to_inf = (sign == 0);
6272         break;
6273       case FPNegativeInfinity:
6274         overflow_to_inf = (sign == 1);
6275         break;
6276       case FPZero:
6277         overflow_to_inf = false;
6278         break;
6279       default:
6280         break;
6281     }
6282     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
6283     if (overflow_to_inf) {
6284       return (sign == 1) ? T(kFP64NegativeInfinity) : T(kFP64PositiveInfinity);
6285     } else {
6286       // Return FPMaxNormal(sign).
6287       if constexpr (IsFloat16<T>()) {
6288         return Float16Pack(sign, 0x1f, 0x3ff);
6289       } else if constexpr (IsFloat32<T>()) {
6290         return FloatPack(sign, 0xfe, 0x07fffff);
6291       } else {
6292         VIXL_ASSERT(IsFloat64<T>());
6293         return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6294       }
6295     }
6296   } else {
6297     uint64_t fraction;
6298     int exp, result_exp;
6299 
6300     if constexpr (IsFloat16<T>()) {
6301       sign = Float16Sign(op);
6302       exp = Float16Exp(op);
6303       fraction = Float16Mantissa(op);
6304       fraction <<= 42;
6305     } else if constexpr (IsFloat32<T>()) {
6306       sign = FloatSign(op);
6307       exp = FloatExp(op);
6308       fraction = FloatMantissa(op);
6309       fraction <<= 29;
6310     } else {
6311       VIXL_ASSERT(IsFloat64<T>());
6312       sign = DoubleSign(op);
6313       exp = DoubleExp(op);
6314       fraction = DoubleMantissa(op);
6315     }
6316 
6317     if (exp == 0) {
6318       if (Bits(fraction, 51, 51) == 0) {
6319         exp -= 1;
6320         fraction = Bits(fraction, 49, 0) << 2;
6321       } else {
6322         fraction = Bits(fraction, 50, 0) << 1;
6323       }
6324     }
6325 
6326     double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6327 
6328     if constexpr (IsFloat16<T>()) {
6329       result_exp = (29 - exp);  // In range 29-30 = -1 to 29+1 = 30.
6330     } else if constexpr (IsFloat32<T>()) {
6331       result_exp = (253 - exp);  // In range 253-254 = -1 to 253+1 = 254.
6332     } else {
6333       VIXL_ASSERT(IsFloat64<T>());
6334       result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
6335     }
6336 
6337     double estimate = recip_estimate(scaled);
6338 
6339     fraction = DoubleMantissa(estimate);
6340     if (result_exp == 0) {
6341       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6342     } else if (result_exp == -1) {
6343       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6344       result_exp = 0;
6345     }
6346     if constexpr (IsFloat16<T>()) {
6347       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6348       uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6349       return Float16Pack(sign, exp_bits, frac_bits);
6350     } else if constexpr (IsFloat32<T>()) {
6351       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6352       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6353       return FloatPack(sign, exp_bits, frac_bits);
6354     } else {
6355       VIXL_ASSERT(IsFloat64<T>());
6356       return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6357     }
6358   }
6359 }
6360 
6361 
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)6362 LogicVRegister Simulator::frecpe(VectorFormat vform,
6363                                  LogicVRegister dst,
6364                                  const LogicVRegister& src,
6365                                  FPRounding round) {
6366   dst.ClearForWrite(vform);
6367   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6368     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6369       SimFloat16 input = src.Float<SimFloat16>(i);
6370       dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6371     }
6372   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6373     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6374       float input = src.Float<float>(i);
6375       dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6376     }
6377   } else {
6378     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6379     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6380       double input = src.Float<double>(i);
6381       dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6382     }
6383   }
6384   return dst;
6385 }
6386 
6387 
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6388 LogicVRegister Simulator::ursqrte(VectorFormat vform,
6389                                   LogicVRegister dst,
6390                                   const LogicVRegister& src) {
6391   dst.ClearForWrite(vform);
6392   uint64_t operand;
6393   uint32_t result;
6394   double dp_operand, dp_result;
6395   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6396     operand = src.Uint(vform, i);
6397     if (operand <= 0x3FFFFFFF) {
6398       result = 0xFFFFFFFF;
6399     } else {
6400       dp_operand = operand * std::pow(2.0, -32);
6401       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6402       result = static_cast<uint32_t>(dp_result);
6403     }
6404     dst.SetUint(vform, i, result);
6405   }
6406   return dst;
6407 }
6408 
6409 
6410 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)6411 double Simulator::recip_estimate(double a) {
6412   int q, s;
6413   double r;
6414   q = static_cast<int>(a * 512.0);
6415   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6416   s = static_cast<int>(256.0 * r + 0.5);
6417   return static_cast<double>(s) / 256.0;
6418 }
6419 
6420 
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6421 LogicVRegister Simulator::urecpe(VectorFormat vform,
6422                                  LogicVRegister dst,
6423                                  const LogicVRegister& src) {
6424   dst.ClearForWrite(vform);
6425   uint64_t operand;
6426   uint32_t result;
6427   double dp_operand, dp_result;
6428   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6429     operand = src.Uint(vform, i);
6430     if (operand <= 0x7FFFFFFF) {
6431       result = 0xFFFFFFFF;
6432     } else {
6433       dp_operand = operand * std::pow(2.0, -32);
6434       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6435       result = static_cast<uint32_t>(dp_result);
6436     }
6437     dst.SetUint(vform, i, result);
6438   }
6439   return dst;
6440 }
6441 
pfalse(LogicPRegister dst)6442 LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6443   dst.Clear();
6444   return dst;
6445 }
6446 
pfirst(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6447 LogicPRegister Simulator::pfirst(LogicPRegister dst,
6448                                  const LogicPRegister& pg,
6449                                  const LogicPRegister& src) {
6450   int first_pg = GetFirstActive(kFormatVnB, pg);
6451   VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6452   mov(dst, src);
6453   if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6454   return dst;
6455 }
6456 
ptrue(VectorFormat vform,LogicPRegister dst,int pattern)6457 LogicPRegister Simulator::ptrue(VectorFormat vform,
6458                                 LogicPRegister dst,
6459                                 int pattern) {
6460   int count = GetPredicateConstraintLaneCount(vform, pattern);
6461   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6462     dst.SetActive(vform, i, i < count);
6463   }
6464   return dst;
6465 }
6466 
pnext(VectorFormat vform,LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6467 LogicPRegister Simulator::pnext(VectorFormat vform,
6468                                 LogicPRegister dst,
6469                                 const LogicPRegister& pg,
6470                                 const LogicPRegister& src) {
6471   int next = GetLastActive(vform, src) + 1;
6472   while (next < LaneCountFromFormat(vform)) {
6473     if (pg.IsActive(vform, next)) break;
6474     next++;
6475   }
6476 
6477   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6478     dst.SetActive(vform, i, (i == next));
6479   }
6480   return dst;
6481 }
6482 
6483 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6484 LogicVRegister Simulator::frecpx(VectorFormat vform,
6485                                  LogicVRegister dst,
6486                                  const LogicVRegister& src) {
6487   dst.ClearForWrite(vform);
6488   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6489     T op = src.Float<T>(i);
6490     T result;
6491     if (IsNaN(op)) {
6492       result = FPProcessNaN(op);
6493     } else {
6494       int exp;
6495       uint32_t sign;
6496       if constexpr (IsFloat16<T>()) {
6497         sign = Float16Sign(op);
6498         exp = Float16Exp(op);
6499         exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6500         result = Float16Pack(sign, exp, 0);
6501       } else if constexpr (IsFloat32<T>()) {
6502         sign = FloatSign(op);
6503         exp = FloatExp(op);
6504         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6505         result = FloatPack(sign, exp, 0);
6506       } else {
6507         VIXL_ASSERT(IsFloat64<T>());
6508         sign = DoubleSign(op);
6509         exp = DoubleExp(op);
6510         exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6511         result = DoublePack(sign, exp, 0);
6512       }
6513     }
6514     dst.SetFloat(i, result);
6515   }
6516   return dst;
6517 }
6518 
6519 
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6520 LogicVRegister Simulator::frecpx(VectorFormat vform,
6521                                  LogicVRegister dst,
6522                                  const LogicVRegister& src) {
6523   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6524     frecpx<SimFloat16>(vform, dst, src);
6525   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6526     frecpx<float>(vform, dst, src);
6527   } else {
6528     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6529     frecpx<double>(vform, dst, src);
6530   }
6531   return dst;
6532 }
6533 
flogb(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6534 LogicVRegister Simulator::flogb(VectorFormat vform,
6535                                 LogicVRegister dst,
6536                                 const LogicVRegister& src) {
6537   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6538     double op = 0.0;
6539     switch (vform) {
6540       case kFormatVnH:
6541         op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);
6542         break;
6543       case kFormatVnS:
6544         op = src.Float<float>(i);
6545         break;
6546       case kFormatVnD:
6547         op = src.Float<double>(i);
6548         break;
6549       default:
6550         VIXL_UNREACHABLE();
6551     }
6552 
6553     switch (std::fpclassify(op)) {
6554       case FP_INFINITE:
6555         dst.SetInt(vform, i, MaxIntFromFormat(vform));
6556         break;
6557       case FP_NAN:
6558       case FP_ZERO:
6559         dst.SetInt(vform, i, MinIntFromFormat(vform));
6560         break;
6561       case FP_SUBNORMAL: {
6562         // DoubleMantissa returns the mantissa of its input, leaving 12 zero
6563         // bits where the sign and exponent would be. We subtract 12 to
6564         // find the number of leading zero bits in the mantissa itself.
6565         int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;
6566         // Log2 of a subnormal is the lowest exponent a normal number can
6567         // represent, together with the zeros in the mantissa.
6568         dst.SetInt(vform, i, -1023 - mant_zero_count);
6569         break;
6570       }
6571       case FP_NORMAL:
6572         // Log2 of a normal number is the exponent minus the bias.
6573         dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);
6574         break;
6575     }
6576   }
6577   return dst;
6578 }
6579 
ftsmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6580 LogicVRegister Simulator::ftsmul(VectorFormat vform,
6581                                  LogicVRegister dst,
6582                                  const LogicVRegister& src1,
6583                                  const LogicVRegister& src2) {
6584   SimVRegister maybe_neg_src1;
6585 
6586   // The bottom bit of src2 controls the sign of the result. Use it to
6587   // conditionally invert the sign of one `fmul` operand.
6588   shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6589   eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6590 
6591   // Multiply src1 by the modified neg_src1, which is potentially its negation.
6592   // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6593   // rather than neg_src1, must be the first source argument.
6594   fmul(vform, dst, src1, maybe_neg_src1);
6595 
6596   return dst;
6597 }
6598 
ftssel(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6599 LogicVRegister Simulator::ftssel(VectorFormat vform,
6600                                  LogicVRegister dst,
6601                                  const LogicVRegister& src1,
6602                                  const LogicVRegister& src2) {
6603   unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6604   uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6605   uint64_t one;
6606 
6607   if (lane_bits == kHRegSize) {
6608     one = Float16ToRawbits(Float16(1.0));
6609   } else if (lane_bits == kSRegSize) {
6610     one = FloatToRawbits(1.0);
6611   } else {
6612     VIXL_ASSERT(lane_bits == kDRegSize);
6613     one = DoubleToRawbits(1.0);
6614   }
6615 
6616   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6617     // Use integer accessors for this operation, as this is a data manipulation
6618     // task requiring no calculation.
6619     uint64_t op = src1.Uint(vform, i);
6620 
6621     // Only the bottom two bits of the src2 register are significant, indicating
6622     // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6623     // determines the sign of the value written to dst.
6624     uint64_t q = src2.Uint(vform, i);
6625     if ((q & 1) == 1) op = one;
6626     if ((q & 2) == 2) op ^= sign_bit;
6627 
6628     dst.SetUint(vform, i, op);
6629   }
6630 
6631   return dst;
6632 }
6633 
6634 template <typename T>
FTMaddHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,uint64_t coeff_pos,uint64_t coeff_neg)6635 LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6636                                        LogicVRegister dst,
6637                                        const LogicVRegister& src1,
6638                                        const LogicVRegister& src2,
6639                                        uint64_t coeff_pos,
6640                                        uint64_t coeff_neg) {
6641   SimVRegister zero;
6642   dup_immediate(kFormatVnB, zero, 0);
6643 
6644   SimVRegister cf;
6645   SimVRegister cfn;
6646   dup_immediate(vform, cf, coeff_pos);
6647   dup_immediate(vform, cfn, coeff_neg);
6648 
6649   // The specification requires testing the top bit of the raw value, rather
6650   // than the sign of the floating point number, so use an integer comparison
6651   // here.
6652   SimPRegister is_neg;
6653   SVEIntCompareVectorsHelper(lt,
6654                              vform,
6655                              is_neg,
6656                              GetPTrue(),
6657                              src2,
6658                              zero,
6659                              false,
6660                              LeaveFlags);
6661   mov_merging(vform, cf, is_neg, cfn);
6662 
6663   SimVRegister temp;
6664   fabs_<T>(vform, temp, src2);
6665   fmla<T>(vform, cf, cf, src1, temp);
6666   mov(vform, dst, cf);
6667   return dst;
6668 }
6669 
6670 
ftmad(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,unsigned index)6671 LogicVRegister Simulator::ftmad(VectorFormat vform,
6672                                 LogicVRegister dst,
6673                                 const LogicVRegister& src1,
6674                                 const LogicVRegister& src2,
6675                                 unsigned index) {
6676   static const uint64_t ftmad_coeff16[] = {0x3c00,
6677                                            0xb155,
6678                                            0x2030,
6679                                            0x0000,
6680                                            0x0000,
6681                                            0x0000,
6682                                            0x0000,
6683                                            0x0000,
6684                                            0x3c00,
6685                                            0xb800,
6686                                            0x293a,
6687                                            0x0000,
6688                                            0x0000,
6689                                            0x0000,
6690                                            0x0000,
6691                                            0x0000};
6692 
6693   static const uint64_t ftmad_coeff32[] = {0x3f800000,
6694                                            0xbe2aaaab,
6695                                            0x3c088886,
6696                                            0xb95008b9,
6697                                            0x36369d6d,
6698                                            0x00000000,
6699                                            0x00000000,
6700                                            0x00000000,
6701                                            0x3f800000,
6702                                            0xbf000000,
6703                                            0x3d2aaaa6,
6704                                            0xbab60705,
6705                                            0x37cd37cc,
6706                                            0x00000000,
6707                                            0x00000000,
6708                                            0x00000000};
6709 
6710   static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6711                                            0xbfc5555555555543,
6712                                            0x3f8111111110f30c,
6713                                            0xbf2a01a019b92fc6,
6714                                            0x3ec71de351f3d22b,
6715                                            0xbe5ae5e2b60f7b91,
6716                                            0x3de5d8408868552f,
6717                                            0x0000000000000000,
6718                                            0x3ff0000000000000,
6719                                            0xbfe0000000000000,
6720                                            0x3fa5555555555536,
6721                                            0xbf56c16c16c13a0b,
6722                                            0x3efa01a019b1e8d8,
6723                                            0xbe927e4f7282f468,
6724                                            0x3e21ee96d2641b13,
6725                                            0xbda8f76380fbb401};
6726   VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6727   VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6728   VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6729 
6730   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6731     FTMaddHelper<SimFloat16>(vform,
6732                              dst,
6733                              src1,
6734                              src2,
6735                              ftmad_coeff16[index],
6736                              ftmad_coeff16[index + 8]);
6737   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6738     FTMaddHelper<float>(vform,
6739                         dst,
6740                         src1,
6741                         src2,
6742                         ftmad_coeff32[index],
6743                         ftmad_coeff32[index + 8]);
6744   } else {
6745     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6746     FTMaddHelper<double>(vform,
6747                          dst,
6748                          src1,
6749                          src2,
6750                          ftmad_coeff64[index],
6751                          ftmad_coeff64[index + 8]);
6752   }
6753   return dst;
6754 }
6755 
fexpa(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6756 LogicVRegister Simulator::fexpa(VectorFormat vform,
6757                                 LogicVRegister dst,
6758                                 const LogicVRegister& src) {
6759   static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6760                                            0x005d, 0x0075, 0x008e, 0x00a8,
6761                                            0x00c2, 0x00dc, 0x00f8, 0x0114,
6762                                            0x0130, 0x014d, 0x016b, 0x0189,
6763                                            0x01a8, 0x01c8, 0x01e8, 0x0209,
6764                                            0x022b, 0x024e, 0x0271, 0x0295,
6765                                            0x02ba, 0x02e0, 0x0306, 0x032e,
6766                                            0x0356, 0x037f, 0x03a9, 0x03d4};
6767 
6768   static const uint64_t fexpa_coeff32[] =
6769       {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6770        0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6771        0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6772        0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6773        0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6774        0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6775        0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6776        0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6777        0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6778        0x7d3e0c};
6779 
6780   static const uint64_t fexpa_coeff64[] =
6781       {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6782        0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6783        0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6784        0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6785        0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6786        0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6787        0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6788        0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6789        0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6790        0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6791        0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6792        0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6793        0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6794        0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6795        0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6796        0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6797 
6798   unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6799   int index_highbit = 5;
6800   int op_highbit, op_shift;
6801   const uint64_t* fexpa_coeff;
6802 
6803   if (lane_size == kHRegSize) {
6804     index_highbit = 4;
6805     VIXL_ASSERT(ArrayLength(fexpa_coeff16) ==
6806                 (uint64_t{1} << (index_highbit + 1)));
6807     fexpa_coeff = fexpa_coeff16;
6808     op_highbit = 9;
6809     op_shift = 10;
6810   } else if (lane_size == kSRegSize) {
6811     VIXL_ASSERT(ArrayLength(fexpa_coeff32) ==
6812                 (uint64_t{1} << (index_highbit + 1)));
6813     fexpa_coeff = fexpa_coeff32;
6814     op_highbit = 13;
6815     op_shift = 23;
6816   } else {
6817     VIXL_ASSERT(lane_size == kDRegSize);
6818     VIXL_ASSERT(ArrayLength(fexpa_coeff64) ==
6819                 (uint64_t{1} << (index_highbit + 1)));
6820     fexpa_coeff = fexpa_coeff64;
6821     op_highbit = 16;
6822     op_shift = 52;
6823   }
6824 
6825   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6826     uint64_t op = src.Uint(vform, i);
6827     uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6828     result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6829     dst.SetUint(vform, i, result);
6830   }
6831   return dst;
6832 }
6833 
6834 template <typename T>
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6835 LogicVRegister Simulator::fscale(VectorFormat vform,
6836                                  LogicVRegister dst,
6837                                  const LogicVRegister& src1,
6838                                  const LogicVRegister& src2) {
6839   T two = T(2.0);
6840   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6841     T src1_val = src1.Float<T>(i);
6842     if (!IsNaN(src1_val)) {
6843       int64_t scale = src2.Int(vform, i);
6844       // TODO: this is a low-performance implementation, but it's simple and
6845       // less likely to be buggy. Consider replacing it with something faster.
6846 
6847       // Scales outside of these bounds become infinity or zero, so there's no
6848       // point iterating further.
6849       scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6850 
6851       // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and
6852       // decrement scale until it's zero.
6853       while (scale-- > 0) {
6854         src1_val = FPMul(src1_val, two);
6855       }
6856 
6857       // If scale is negative, divide by two and increment scale until it's
6858       // zero. Initially, scale is (src2 - 1), so we pre-increment.
6859       while (++scale < 0) {
6860         src1_val = FPDiv(src1_val, two);
6861       }
6862     }
6863     dst.SetFloat<T>(i, src1_val);
6864   }
6865   return dst;
6866 }
6867 
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6868 LogicVRegister Simulator::fscale(VectorFormat vform,
6869                                  LogicVRegister dst,
6870                                  const LogicVRegister& src1,
6871                                  const LogicVRegister& src2) {
6872   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6873     fscale<SimFloat16>(vform, dst, src1, src2);
6874   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6875     fscale<float>(vform, dst, src1, src2);
6876   } else {
6877     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6878     fscale<double>(vform, dst, src1, src2);
6879   }
6880   return dst;
6881 }
6882 
scvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6883 LogicVRegister Simulator::scvtf(VectorFormat vform,
6884                                 unsigned dst_data_size_in_bits,
6885                                 unsigned src_data_size_in_bits,
6886                                 LogicVRegister dst,
6887                                 const LogicPRegister& pg,
6888                                 const LogicVRegister& src,
6889                                 FPRounding round,
6890                                 int fbits) {
6891   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6892   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6893   dst.ClearForWrite(vform);
6894 
6895   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6896     if (!pg.IsActive(vform, i)) continue;
6897 
6898     int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6899                                             0,
6900                                             src.Uint(vform, i));
6901 
6902     switch (dst_data_size_in_bits) {
6903       case kHRegSize: {
6904         SimFloat16 result = FixedToFloat16(value, fbits, round);
6905         dst.SetUint(vform, i, Float16ToRawbits(result));
6906         break;
6907       }
6908       case kSRegSize: {
6909         float result = FixedToFloat(value, fbits, round);
6910         dst.SetUint(vform, i, FloatToRawbits(result));
6911         break;
6912       }
6913       case kDRegSize: {
6914         double result = FixedToDouble(value, fbits, round);
6915         dst.SetUint(vform, i, DoubleToRawbits(result));
6916         break;
6917       }
6918       default:
6919         VIXL_UNIMPLEMENTED();
6920         break;
6921     }
6922   }
6923 
6924   return dst;
6925 }
6926 
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6927 LogicVRegister Simulator::scvtf(VectorFormat vform,
6928                                 LogicVRegister dst,
6929                                 const LogicVRegister& src,
6930                                 int fbits,
6931                                 FPRounding round) {
6932   return scvtf(vform,
6933                LaneSizeInBitsFromFormat(vform),
6934                LaneSizeInBitsFromFormat(vform),
6935                dst,
6936                GetPTrue(),
6937                src,
6938                round,
6939                fbits);
6940 }
6941 
ucvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6942 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6943                                 unsigned dst_data_size_in_bits,
6944                                 unsigned src_data_size_in_bits,
6945                                 LogicVRegister dst,
6946                                 const LogicPRegister& pg,
6947                                 const LogicVRegister& src,
6948                                 FPRounding round,
6949                                 int fbits) {
6950   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6951   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6952   dst.ClearForWrite(vform);
6953 
6954   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6955     if (!pg.IsActive(vform, i)) continue;
6956 
6957     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6958                                                0,
6959                                                src.Uint(vform, i));
6960 
6961     switch (dst_data_size_in_bits) {
6962       case kHRegSize: {
6963         SimFloat16 result = UFixedToFloat16(value, fbits, round);
6964         dst.SetUint(vform, i, Float16ToRawbits(result));
6965         break;
6966       }
6967       case kSRegSize: {
6968         float result = UFixedToFloat(value, fbits, round);
6969         dst.SetUint(vform, i, FloatToRawbits(result));
6970         break;
6971       }
6972       case kDRegSize: {
6973         double result = UFixedToDouble(value, fbits, round);
6974         dst.SetUint(vform, i, DoubleToRawbits(result));
6975         break;
6976       }
6977       default:
6978         VIXL_UNIMPLEMENTED();
6979         break;
6980     }
6981   }
6982 
6983   return dst;
6984 }
6985 
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6986 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6987                                 LogicVRegister dst,
6988                                 const LogicVRegister& src,
6989                                 int fbits,
6990                                 FPRounding round) {
6991   return ucvtf(vform,
6992                LaneSizeInBitsFromFormat(vform),
6993                LaneSizeInBitsFromFormat(vform),
6994                dst,
6995                GetPTrue(),
6996                src,
6997                round,
6998                fbits);
6999 }
7000 
unpk(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,UnpackType unpack_type,ExtendType extend_type)7001 LogicVRegister Simulator::unpk(VectorFormat vform,
7002                                LogicVRegister dst,
7003                                const LogicVRegister& src,
7004                                UnpackType unpack_type,
7005                                ExtendType extend_type) {
7006   VectorFormat vform_half = VectorFormatHalfWidth(vform);
7007   const int lane_count = LaneCountFromFormat(vform);
7008   const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
7009 
7010   switch (extend_type) {
7011     case kSignedExtend: {
7012       int64_t result[kZRegMaxSizeInBytes];
7013       for (int i = 0; i < lane_count; ++i) {
7014         result[i] = src.Int(vform_half, i + src_start_lane);
7015       }
7016       for (int i = 0; i < lane_count; ++i) {
7017         dst.SetInt(vform, i, result[i]);
7018       }
7019       break;
7020     }
7021     case kUnsignedExtend: {
7022       uint64_t result[kZRegMaxSizeInBytes];
7023       for (int i = 0; i < lane_count; ++i) {
7024         result[i] = src.Uint(vform_half, i + src_start_lane);
7025       }
7026       for (int i = 0; i < lane_count; ++i) {
7027         dst.SetUint(vform, i, result[i]);
7028       }
7029       break;
7030     }
7031     default:
7032       VIXL_UNREACHABLE();
7033   }
7034   return dst;
7035 }
7036 
SVEIntCompareVectorsHelper(Condition cond,VectorFormat vform,LogicPRegister dst,const LogicPRegister & mask,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements,FlagsUpdate flags)7037 LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
7038                                                      VectorFormat vform,
7039                                                      LogicPRegister dst,
7040                                                      const LogicPRegister& mask,
7041                                                      const LogicVRegister& src1,
7042                                                      const LogicVRegister& src2,
7043                                                      bool is_wide_elements,
7044                                                      FlagsUpdate flags) {
7045   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7046     bool result = false;
7047     if (mask.IsActive(vform, lane)) {
7048       int64_t op1 = 0xbadbeef;
7049       int64_t op2 = 0xbadbeef;
7050       int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
7051       switch (cond) {
7052         case eq:
7053         case ge:
7054         case gt:
7055         case lt:
7056         case le:
7057         case ne:
7058           op1 = src1.Int(vform, lane);
7059           op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
7060                                  : src2.Int(vform, lane);
7061           break;
7062         case hi:
7063         case hs:
7064         case ls:
7065         case lo:
7066           op1 = src1.Uint(vform, lane);
7067           op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
7068                                  : src2.Uint(vform, lane);
7069           break;
7070         default:
7071           VIXL_UNREACHABLE();
7072       }
7073 
7074       switch (cond) {
7075         case eq:
7076           result = (op1 == op2);
7077           break;
7078         case ne:
7079           result = (op1 != op2);
7080           break;
7081         case ge:
7082           result = (op1 >= op2);
7083           break;
7084         case gt:
7085           result = (op1 > op2);
7086           break;
7087         case le:
7088           result = (op1 <= op2);
7089           break;
7090         case lt:
7091           result = (op1 < op2);
7092           break;
7093         case hs:
7094           result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
7095           break;
7096         case hi:
7097           result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
7098           break;
7099         case ls:
7100           result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
7101           break;
7102         case lo:
7103           result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
7104           break;
7105         default:
7106           VIXL_UNREACHABLE();
7107       }
7108     }
7109     dst.SetActive(vform, lane, result);
7110   }
7111 
7112   if (flags == SetFlags) PredTest(vform, mask, dst);
7113 
7114   return dst;
7115 }
7116 
SVEBitwiseShiftHelper(Shift shift_op,VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements)7117 LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
7118                                                 VectorFormat vform,
7119                                                 LogicVRegister dst,
7120                                                 const LogicVRegister& src1,
7121                                                 const LogicVRegister& src2,
7122                                                 bool is_wide_elements) {
7123   unsigned lane_size = LaneSizeInBitsFromFormat(vform);
7124   VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
7125 
7126   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7127     int shift_src_lane = lane;
7128     if (is_wide_elements) {
7129       // If the shift amount comes from wide elements, select the D-sized lane
7130       // which occupies the corresponding lanes of the value to be shifted.
7131       shift_src_lane = (lane * lane_size) / kDRegSize;
7132     }
7133     uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
7134 
7135     // Saturate shift_amount to the size of the lane that will be shifted.
7136     if (shift_amount > lane_size) shift_amount = lane_size;
7137 
7138     uint64_t value = src1.Uint(vform, lane);
7139     int64_t result = ShiftOperand(lane_size,
7140                                   value,
7141                                   shift_op,
7142                                   static_cast<unsigned>(shift_amount));
7143     dst.SetUint(vform, lane, result);
7144   }
7145 
7146   return dst;
7147 }
7148 
asrd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int shift)7149 LogicVRegister Simulator::asrd(VectorFormat vform,
7150                                LogicVRegister dst,
7151                                const LogicVRegister& src1,
7152                                int shift) {
7153   VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
7154                               LaneSizeInBitsFromFormat(vform)));
7155 
7156   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7157     int64_t value = src1.Int(vform, i);
7158     if (shift <= 63) {
7159       if (value < 0) {
7160         // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
7161         // cast to int64_t, and cannot cause signed overflow in the result.
7162         value = value + GetUintMask(shift);
7163       }
7164       value = ShiftOperand(kDRegSize, value, ASR, shift);
7165     } else {
7166       value = 0;
7167     }
7168     dst.SetInt(vform, i, value);
7169   }
7170   return dst;
7171 }
7172 
SVEBitwiseLogicalUnpredicatedHelper(LogicalOp logical_op,VectorFormat vform,LogicVRegister zd,const LogicVRegister & zn,const LogicVRegister & zm)7173 LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
7174     LogicalOp logical_op,
7175     VectorFormat vform,
7176     LogicVRegister zd,
7177     const LogicVRegister& zn,
7178     const LogicVRegister& zm) {
7179   VIXL_ASSERT(IsSVEFormat(vform));
7180   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7181     uint64_t op1 = zn.Uint(vform, i);
7182     uint64_t op2 = zm.Uint(vform, i);
7183     uint64_t result = 0;
7184     switch (logical_op) {
7185       case AND:
7186         result = op1 & op2;
7187         break;
7188       case BIC:
7189         result = op1 & ~op2;
7190         break;
7191       case EOR:
7192         result = op1 ^ op2;
7193         break;
7194       case ORR:
7195         result = op1 | op2;
7196         break;
7197       default:
7198         VIXL_UNIMPLEMENTED();
7199     }
7200     zd.SetUint(vform, i, result);
7201   }
7202 
7203   return zd;
7204 }
7205 
SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,LogicPRegister pd,const LogicPRegister & pn,const LogicPRegister & pm)7206 LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
7207                                                     LogicPRegister pd,
7208                                                     const LogicPRegister& pn,
7209                                                     const LogicPRegister& pm) {
7210   for (int i = 0; i < pn.GetChunkCount(); i++) {
7211     LogicPRegister::ChunkType op1 = pn.GetChunk(i);
7212     LogicPRegister::ChunkType op2 = pm.GetChunk(i);
7213     LogicPRegister::ChunkType result = 0;
7214     switch (op) {
7215       case ANDS_p_p_pp_z:
7216       case AND_p_p_pp_z:
7217         result = op1 & op2;
7218         break;
7219       case BICS_p_p_pp_z:
7220       case BIC_p_p_pp_z:
7221         result = op1 & ~op2;
7222         break;
7223       case EORS_p_p_pp_z:
7224       case EOR_p_p_pp_z:
7225         result = op1 ^ op2;
7226         break;
7227       case NANDS_p_p_pp_z:
7228       case NAND_p_p_pp_z:
7229         result = ~(op1 & op2);
7230         break;
7231       case NORS_p_p_pp_z:
7232       case NOR_p_p_pp_z:
7233         result = ~(op1 | op2);
7234         break;
7235       case ORNS_p_p_pp_z:
7236       case ORN_p_p_pp_z:
7237         result = op1 | ~op2;
7238         break;
7239       case ORRS_p_p_pp_z:
7240       case ORR_p_p_pp_z:
7241         result = op1 | op2;
7242         break;
7243       default:
7244         VIXL_UNIMPLEMENTED();
7245     }
7246     pd.SetChunk(i, result);
7247   }
7248   return pd;
7249 }
7250 
SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,VectorFormat vform,LogicVRegister zd,uint64_t imm)7251 LogicVRegister Simulator::SVEBitwiseImmHelper(
7252     SVEBitwiseLogicalWithImm_UnpredicatedOp op,
7253     VectorFormat vform,
7254     LogicVRegister zd,
7255     uint64_t imm) {
7256   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7257     uint64_t op1 = zd.Uint(vform, i);
7258     uint64_t result = 0;
7259     switch (op) {
7260       case AND_z_zi:
7261         result = op1 & imm;
7262         break;
7263       case EOR_z_zi:
7264         result = op1 ^ imm;
7265         break;
7266       case ORR_z_zi:
7267         result = op1 | imm;
7268         break;
7269       default:
7270         VIXL_UNIMPLEMENTED();
7271     }
7272     zd.SetUint(vform, i, result);
7273   }
7274 
7275   return zd;
7276 }
7277 
SVEStructuredStoreHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr)7278 void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
7279                                          const LogicPRegister& pg,
7280                                          unsigned zt_code,
7281                                          const LogicSVEAddressVector& addr) {
7282   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7283 
7284   int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7285   int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7286   int msize_in_bytes = addr.GetMsizeInBytes();
7287   int reg_count = addr.GetRegCount();
7288 
7289   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7290   VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7291 
7292   unsigned zt_codes[4] = {zt_code,
7293                           (zt_code + 1) % kNumberOfZRegisters,
7294                           (zt_code + 2) % kNumberOfZRegisters,
7295                           (zt_code + 3) % kNumberOfZRegisters};
7296 
7297   LogicVRegister zt[4] = {
7298       ReadVRegister(zt_codes[0]),
7299       ReadVRegister(zt_codes[1]),
7300       ReadVRegister(zt_codes[2]),
7301       ReadVRegister(zt_codes[3]),
7302   };
7303 
7304   // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7305   // are ignored, so read the source register using the VectorFormat that
7306   // corresponds with the storage format, and multiply the index accordingly.
7307   VectorFormat unpack_vform =
7308       SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7309   int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7310 
7311   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7312     if (!pg.IsActive(vform, i)) continue;
7313 
7314     for (int r = 0; r < reg_count; r++) {
7315       uint64_t element_address = addr.GetElementAddress(i, r);
7316       if (!StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address)) {
7317         return;
7318       }
7319     }
7320   }
7321 
7322   if (ShouldTraceWrites()) {
7323     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7324     if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7325       // Use an FP format where it's likely that we're accessing FP data.
7326       format = GetPrintRegisterFormatTryFP(format);
7327     }
7328     // Stores don't represent a change to the source register's value, so only
7329     // print the relevant part of the value.
7330     format = GetPrintRegPartial(format);
7331 
7332     PrintZStructAccess(zt_code,
7333                        reg_count,
7334                        pg,
7335                        format,
7336                        msize_in_bytes,
7337                        "->",
7338                        addr);
7339   }
7340 }
7341 
SVEStructuredLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,bool is_signed)7342 bool Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7343                                         const LogicPRegister& pg,
7344                                         unsigned zt_code,
7345                                         const LogicSVEAddressVector& addr,
7346                                         bool is_signed) {
7347   int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7348   int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7349   int msize_in_bytes = addr.GetMsizeInBytes();
7350   int reg_count = addr.GetRegCount();
7351 
7352   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7353   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7354   VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7355 
7356   unsigned zt_codes[4] = {zt_code,
7357                           (zt_code + 1) % kNumberOfZRegisters,
7358                           (zt_code + 2) % kNumberOfZRegisters,
7359                           (zt_code + 3) % kNumberOfZRegisters};
7360   LogicVRegister zt[4] = {
7361       ReadVRegister(zt_codes[0]),
7362       ReadVRegister(zt_codes[1]),
7363       ReadVRegister(zt_codes[2]),
7364       ReadVRegister(zt_codes[3]),
7365   };
7366 
7367   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7368     for (int r = 0; r < reg_count; r++) {
7369       uint64_t element_address = addr.GetElementAddress(i, r);
7370 
7371       if (!pg.IsActive(vform, i)) {
7372         zt[r].SetUint(vform, i, 0);
7373         continue;
7374       }
7375 
7376       if (is_signed) {
7377         if (!LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address)) {
7378           return false;
7379         }
7380       } else {
7381         if (!LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address)) {
7382           return false;
7383         }
7384       }
7385     }
7386   }
7387 
7388   if (ShouldTraceVRegs()) {
7389     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7390     if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7391       // Use an FP format where it's likely that we're accessing FP data.
7392       format = GetPrintRegisterFormatTryFP(format);
7393     }
7394     PrintZStructAccess(zt_code,
7395                        reg_count,
7396                        pg,
7397                        format,
7398                        msize_in_bytes,
7399                        "<-",
7400                        addr);
7401   }
7402   return true;
7403 }
7404 
brka(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7405 LogicPRegister Simulator::brka(LogicPRegister pd,
7406                                const LogicPRegister& pg,
7407                                const LogicPRegister& pn) {
7408   bool break_ = false;
7409   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7410     if (pg.IsActive(kFormatVnB, i)) {
7411       pd.SetActive(kFormatVnB, i, !break_);
7412       break_ |= pn.IsActive(kFormatVnB, i);
7413     }
7414   }
7415 
7416   return pd;
7417 }
7418 
brkb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7419 LogicPRegister Simulator::brkb(LogicPRegister pd,
7420                                const LogicPRegister& pg,
7421                                const LogicPRegister& pn) {
7422   bool break_ = false;
7423   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7424     if (pg.IsActive(kFormatVnB, i)) {
7425       break_ |= pn.IsActive(kFormatVnB, i);
7426       pd.SetActive(kFormatVnB, i, !break_);
7427     }
7428   }
7429 
7430   return pd;
7431 }
7432 
brkn(LogicPRegister pdm,const LogicPRegister & pg,const LogicPRegister & pn)7433 LogicPRegister Simulator::brkn(LogicPRegister pdm,
7434                                const LogicPRegister& pg,
7435                                const LogicPRegister& pn) {
7436   if (!IsLastActive(kFormatVnB, pg, pn)) {
7437     pfalse(pdm);
7438   }
7439   return pdm;
7440 }
7441 
brkpa(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7442 LogicPRegister Simulator::brkpa(LogicPRegister pd,
7443                                 const LogicPRegister& pg,
7444                                 const LogicPRegister& pn,
7445                                 const LogicPRegister& pm) {
7446   bool last_active = IsLastActive(kFormatVnB, pg, pn);
7447 
7448   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7449     bool active = false;
7450     if (pg.IsActive(kFormatVnB, i)) {
7451       active = last_active;
7452       last_active = last_active && !pm.IsActive(kFormatVnB, i);
7453     }
7454     pd.SetActive(kFormatVnB, i, active);
7455   }
7456 
7457   return pd;
7458 }
7459 
brkpb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7460 LogicPRegister Simulator::brkpb(LogicPRegister pd,
7461                                 const LogicPRegister& pg,
7462                                 const LogicPRegister& pn,
7463                                 const LogicPRegister& pm) {
7464   bool last_active = IsLastActive(kFormatVnB, pg, pn);
7465 
7466   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7467     bool active = false;
7468     if (pg.IsActive(kFormatVnB, i)) {
7469       last_active = last_active && !pm.IsActive(kFormatVnB, i);
7470       active = last_active;
7471     }
7472     pd.SetActive(kFormatVnB, i, active);
7473   }
7474 
7475   return pd;
7476 }
7477 
SVEFaultTolerantLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,SVEFaultTolerantLoadType type,bool is_signed)7478 void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7479                                            const LogicPRegister& pg,
7480                                            unsigned zt_code,
7481                                            const LogicSVEAddressVector& addr,
7482                                            SVEFaultTolerantLoadType type,
7483                                            bool is_signed) {
7484   int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7485   int msize_in_bits = addr.GetMsizeInBits();
7486   int msize_in_bytes = addr.GetMsizeInBytes();
7487 
7488   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7489   VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7490   VIXL_ASSERT(addr.GetRegCount() == 1);
7491 
7492   LogicVRegister zt = ReadVRegister(zt_code);
7493   LogicPRegister ffr = ReadFFR();
7494 
7495   // Non-faulting loads are allowed to fail arbitrarily. To stress user
7496   // code, fail a random element in roughly one in eight full-vector loads.
7497   uint32_t rnd = static_cast<uint32_t>(rand_gen_());
7498   int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7499 
7500   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7501     uint64_t value = 0;
7502 
7503     if (pg.IsActive(vform, i)) {
7504       uint64_t element_address = addr.GetElementAddress(i, 0);
7505 
7506       if (type == kSVEFirstFaultLoad) {
7507         // First-faulting loads always load the first active element, regardless
7508         // of FFR. The result will be discarded if its FFR lane is inactive, but
7509         // it could still generate a fault.
7510         VIXL_DEFINE_OR_RETURN(mem_result,
7511                               MemReadUint(msize_in_bytes, element_address));
7512         value = mem_result;
7513         // All subsequent elements have non-fault semantics.
7514         type = kSVENonFaultLoad;
7515 
7516       } else if (ffr.IsActive(vform, i)) {
7517         // Simulation of fault-tolerant loads relies on system calls, and is
7518         // likely to be relatively slow, so we only actually perform the load if
7519         // its FFR lane is active.
7520 
7521         bool can_read = (i < fake_fault_at_lane) &&
7522                         CanReadMemory(element_address, msize_in_bytes);
7523         if (can_read) {
7524           VIXL_DEFINE_OR_RETURN(mem_result,
7525                                 MemReadUint(msize_in_bytes, element_address));
7526           value = mem_result;
7527         } else {
7528           // Propagate the fault to the end of FFR.
7529           for (int j = i; j < LaneCountFromFormat(vform); j++) {
7530             ffr.SetActive(vform, j, false);
7531           }
7532         }
7533       }
7534     }
7535 
7536     // The architecture permits a few possible results for inactive FFR lanes
7537     // (including those caused by a fault in this instruction). We choose to
7538     // leave the register value unchanged (like merging predication) because
7539     // no other input to this instruction can have the same behaviour.
7540     //
7541     // Note that this behaviour takes precedence over pg's zeroing predication.
7542 
7543     if (ffr.IsActive(vform, i)) {
7544       int msb = msize_in_bits - 1;
7545       if (is_signed) {
7546         zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7547       } else {
7548         zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7549       }
7550     }
7551   }
7552 
7553   if (ShouldTraceVRegs()) {
7554     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7555     if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7556       // Use an FP format where it's likely that we're accessing FP data.
7557       format = GetPrintRegisterFormatTryFP(format);
7558     }
7559     // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7560     // expects a single mask, so combine the two predicates.
7561     SimPRegister mask;
7562     SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7563     PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7564   }
7565 }
7566 
SVEGatherLoadScalarPlusVectorHelper(const Instruction * instr,VectorFormat vform,SVEOffsetModifier mod)7567 void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7568                                                     VectorFormat vform,
7569                                                     SVEOffsetModifier mod) {
7570   bool is_signed = instr->ExtractBit(14) == 0;
7571   bool is_ff = instr->ExtractBit(13) == 1;
7572   // Note that these instructions don't use the Dtype encoding.
7573   int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7574   int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7575   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
7576   LogicSVEAddressVector addr(base,
7577                              &ReadVRegister(instr->GetRm()),
7578                              vform,
7579                              mod,
7580                              scale);
7581   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7582   if (is_ff) {
7583     SVEFaultTolerantLoadHelper(vform,
7584                                ReadPRegister(instr->GetPgLow8()),
7585                                instr->GetRt(),
7586                                addr,
7587                                kSVEFirstFaultLoad,
7588                                is_signed);
7589   } else {
7590     SVEStructuredLoadHelper(vform,
7591                             ReadPRegister(instr->GetPgLow8()),
7592                             instr->GetRt(),
7593                             addr,
7594                             is_signed);
7595   }
7596 }
7597 
GetFirstActive(VectorFormat vform,const LogicPRegister & pg) const7598 int Simulator::GetFirstActive(VectorFormat vform,
7599                               const LogicPRegister& pg) const {
7600   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7601     if (pg.IsActive(vform, i)) return i;
7602   }
7603   return -1;
7604 }
7605 
GetLastActive(VectorFormat vform,const LogicPRegister & pg) const7606 int Simulator::GetLastActive(VectorFormat vform,
7607                              const LogicPRegister& pg) const {
7608   for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7609     if (pg.IsActive(vform, i)) return i;
7610   }
7611   return -1;
7612 }
7613 
CountActiveLanes(VectorFormat vform,const LogicPRegister & pg) const7614 int Simulator::CountActiveLanes(VectorFormat vform,
7615                                 const LogicPRegister& pg) const {
7616   int count = 0;
7617   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7618     count += pg.IsActive(vform, i) ? 1 : 0;
7619   }
7620   return count;
7621 }
7622 
CountActiveAndTrueLanes(VectorFormat vform,const LogicPRegister & pg,const LogicPRegister & pn) const7623 int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7624                                        const LogicPRegister& pg,
7625                                        const LogicPRegister& pn) const {
7626   int count = 0;
7627   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7628     count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7629   }
7630   return count;
7631 }
7632 
GetPredicateConstraintLaneCount(VectorFormat vform,int pattern) const7633 int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7634                                                int pattern) const {
7635   VIXL_ASSERT(IsSVEFormat(vform));
7636   int all = LaneCountFromFormat(vform);
7637   VIXL_ASSERT(all > 0);
7638 
7639   switch (pattern) {
7640     case SVE_VL1:
7641     case SVE_VL2:
7642     case SVE_VL3:
7643     case SVE_VL4:
7644     case SVE_VL5:
7645     case SVE_VL6:
7646     case SVE_VL7:
7647     case SVE_VL8:
7648       // VL1-VL8 are encoded directly.
7649       VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7650       VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7651       return (pattern <= all) ? pattern : 0;
7652     case SVE_VL16:
7653     case SVE_VL32:
7654     case SVE_VL64:
7655     case SVE_VL128:
7656     case SVE_VL256: {
7657       // VL16-VL256 are encoded as log2(N) + c.
7658       int min = 16 << (pattern - SVE_VL16);
7659       return (min <= all) ? min : 0;
7660     }
7661     // Special cases.
7662     case SVE_POW2:
7663       return 1 << HighestSetBitPosition(all);
7664     case SVE_MUL4:
7665       return all - (all % 4);
7666     case SVE_MUL3:
7667       return all - (all % 3);
7668     case SVE_ALL:
7669       return all;
7670   }
7671   // Unnamed cases architecturally return 0.
7672   return 0;
7673 }
7674 
match(VectorFormat vform,LogicPRegister dst,const LogicVRegister & haystack,const LogicVRegister & needles,bool negate_match)7675 LogicPRegister Simulator::match(VectorFormat vform,
7676                                 LogicPRegister dst,
7677                                 const LogicVRegister& haystack,
7678                                 const LogicVRegister& needles,
7679                                 bool negate_match) {
7680   SimVRegister ztemp;
7681   SimPRegister ptemp;
7682 
7683   pfalse(dst);
7684   int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
7685   for (int i = 0; i < lanes_per_segment; i++) {
7686     dup_elements_to_segments(vform, ztemp, needles, i);
7687     SVEIntCompareVectorsHelper(eq,
7688                                vform,
7689                                ptemp,
7690                                GetPTrue(),
7691                                haystack,
7692                                ztemp,
7693                                false,
7694                                LeaveFlags);
7695     SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);
7696   }
7697   if (negate_match) {
7698     ptrue(vform, ptemp, SVE_ALL);
7699     SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);
7700   }
7701   return dst;
7702 }
7703 
GetStructAddress(int lane) const7704 uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7705   if (IsContiguous()) {
7706     return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7707   }
7708 
7709   VIXL_ASSERT(IsScatterGather());
7710   VIXL_ASSERT(vector_ != NULL);
7711 
7712   // For scatter-gather accesses, we need to extract the offset from vector_,
7713   // and apply modifiers.
7714 
7715   uint64_t offset = 0;
7716   switch (vector_form_) {
7717     case kFormatVnS:
7718       offset = vector_->GetLane<uint32_t>(lane);
7719       break;
7720     case kFormatVnD:
7721       offset = vector_->GetLane<uint64_t>(lane);
7722       break;
7723     default:
7724       VIXL_UNIMPLEMENTED();
7725       break;
7726   }
7727 
7728   switch (vector_mod_) {
7729     case SVE_MUL_VL:
7730       VIXL_UNIMPLEMENTED();
7731       break;
7732     case SVE_LSL:
7733       // We apply the shift below. There's nothing to do here.
7734       break;
7735     case NO_SVE_OFFSET_MODIFIER:
7736       VIXL_ASSERT(vector_shift_ == 0);
7737       break;
7738     case SVE_UXTW:
7739       offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7740       break;
7741     case SVE_SXTW:
7742       offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7743       break;
7744   }
7745 
7746   return base_ + (offset << vector_shift_);
7747 }
7748 
pack_odd_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7749 LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,
7750                                             LogicVRegister dst,
7751                                             const LogicVRegister& src) {
7752   SimVRegister zero;
7753   zero.Clear();
7754   return uzp2(vform, dst, src, zero);
7755 }
7756 
pack_even_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7757 LogicVRegister Simulator::pack_even_elements(VectorFormat vform,
7758                                              LogicVRegister dst,
7759                                              const LogicVRegister& src) {
7760   SimVRegister zero;
7761   zero.Clear();
7762   return uzp1(vform, dst, src, zero);
7763 }
7764 
adcl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool top)7765 LogicVRegister Simulator::adcl(VectorFormat vform,
7766                                LogicVRegister dst,
7767                                const LogicVRegister& src1,
7768                                const LogicVRegister& src2,
7769                                bool top) {
7770   unsigned reg_size = LaneSizeInBitsFromFormat(vform);
7771   VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));
7772 
7773   for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
7774     uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));
7775     uint64_t right = dst.Uint(vform, i);
7776     unsigned carry_in = src2.Uint(vform, i + 1) & 1;
7777     std::pair<uint64_t, uint8_t> val_and_flags =
7778         AddWithCarry(reg_size, left, right, carry_in);
7779 
7780     // Set even lanes to the result of the addition.
7781     dst.SetUint(vform, i, val_and_flags.first);
7782 
7783     // Set odd lanes to the carry flag from the addition.
7784     uint64_t carry_out = (val_and_flags.second >> 1) & 1;
7785     dst.SetUint(vform, i + 1, carry_out);
7786   }
7787   return dst;
7788 }
7789 
7790 // Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add
7791 // the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.
7792 //
7793 // Matrices of the form:
7794 //
7795 //  src1 = ( a b c d e f g h )  src2 = ( A B )
7796 //         ( i j k l m n o p )         ( C D )
7797 //                                     ( E F )
7798 //                                     ( G H )
7799 //                                     ( I J )
7800 //                                     ( K L )
7801 //                                     ( M N )
7802 //                                     ( O P )
7803 //
7804 // Are stored in the input vector registers as:
7805 //
7806 //           15  14  13  12  11  10  9   8   7   6   5   4   3   2   1   0
7807 //  src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]
7808 //  src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]
7809 //
matmul(VectorFormat vform_dst,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,bool src1_signed,bool src2_signed)7810 LogicVRegister Simulator::matmul(VectorFormat vform_dst,
7811                                  LogicVRegister srcdst,
7812                                  const LogicVRegister& src1,
7813                                  const LogicVRegister& src2,
7814                                  bool src1_signed,
7815                                  bool src2_signed) {
7816   // Two destination forms are supported: Q register containing four S-sized
7817   // elements (4S) and Z register containing n S-sized elements (VnS).
7818   VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));
7819   VectorFormat vform_src = kFormatVnB;
7820   int b_per_segment = kQRegSize / kBRegSize;
7821   int s_per_segment = kQRegSize / kSRegSize;
7822   int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};
7823   int segment_count = LaneCountFromFormat(vform_dst) / 4;
7824   for (int seg = 0; seg < segment_count; seg++) {
7825     for (int i = 0; i < 2; i++) {
7826       for (int j = 0; j < 2; j++) {
7827         int dstidx = (2 * i) + j + (seg * s_per_segment);
7828         int64_t sum = srcdst.Int(vform_dst, dstidx);
7829         for (int k = 0; k < 8; k++) {
7830           int idx1 = (8 * i) + k + (seg * b_per_segment);
7831           int idx2 = (8 * j) + k + (seg * b_per_segment);
7832           int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)
7833                                    : src1.Uint(vform_src, idx1);
7834           int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)
7835                                    : src2.Uint(vform_src, idx2);
7836           sum += e1 * e2;
7837         }
7838         result[dstidx] = sum;
7839       }
7840     }
7841   }
7842   srcdst.SetIntArray(vform_dst, result);
7843   return srcdst;
7844 }
7845 
7846 // Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2
7847 // result to the matrix in srcdst, and write back to srcdst.
7848 //
7849 // Matrices of the form:
7850 //
7851 //  src1 = ( a b )  src2 = ( A B )
7852 //         ( c d )         ( C D )
7853 //
7854 // Are stored in the input vector registers as:
7855 //
7856 //           3   2   1   0
7857 //  src1 = [ d | c | b | a ]
7858 //  src2 = [ D | B | C | A ]
7859 //
7860 template <typename T>
fmatmul(VectorFormat vform,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)7861 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7862                                   LogicVRegister srcdst,
7863                                   const LogicVRegister& src1,
7864                                   const LogicVRegister& src2) {
7865   T result[kZRegMaxSizeInBytes / sizeof(T)];
7866   int T_per_segment = 4;
7867   int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
7868   for (int seg = 0; seg < segment_count; seg++) {
7869     int segoff = seg * T_per_segment;
7870     for (int i = 0; i < 2; i++) {
7871       for (int j = 0; j < 2; j++) {
7872         T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),
7873                             src2.Float<T>(2 * j + 0 + segoff));
7874         T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),
7875                             src2.Float<T>(2 * j + 1 + segoff));
7876         T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);
7877         result[2 * i + j + segoff] = FPAdd(sum, prod1);
7878       }
7879     }
7880   }
7881   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7882     // Elements outside a multiple of 4T are set to zero. This happens only
7883     // for double precision operations, when the VL is a multiple of 128 bits,
7884     // but not a multiple of 256 bits.
7885     T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
7886     srcdst.SetFloat<T>(vform, i, value);
7887   }
7888   return srcdst;
7889 }
7890 
fmatmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)7891 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7892                                   LogicVRegister dst,
7893                                   const LogicVRegister& src1,
7894                                   const LogicVRegister& src2) {
7895   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
7896     fmatmul<float>(vform, dst, src1, src2);
7897   } else {
7898     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
7899     fmatmul<double>(vform, dst, src1, src2);
7900   }
7901   return dst;
7902 }
7903 
7904 template <>
CryptoOp(uint64_t x,uint64_t y,uint64_t z)7905 uint64_t CryptoOp<"choose"_h>(uint64_t x, uint64_t y, uint64_t z) {
7906   return ((y ^ z) & x) ^ z;
7907 }
7908 
7909 template <>
CryptoOp(uint64_t x,uint64_t y,uint64_t z)7910 uint64_t CryptoOp<"majority"_h>(uint64_t x, uint64_t y, uint64_t z) {
7911   return (x & y) | ((x | y) & z);
7912 }
7913 
7914 template <>
CryptoOp(uint64_t x,uint64_t y,uint64_t z)7915 uint64_t CryptoOp<"parity"_h>(uint64_t x, uint64_t y, uint64_t z) {
7916   return x ^ y ^ z;
7917 }
7918 
7919 template <typename T, unsigned A, unsigned B, unsigned C>
SHASigma(uint64_t x)7920 static uint64_t SHASigma(uint64_t x) {
7921   return static_cast<T>(RotateRight(x, A, sizeof(T) * kBitsPerByte) ^
7922                         RotateRight(x, B, sizeof(T) * kBitsPerByte) ^
7923                         RotateRight(x, C, sizeof(T) * kBitsPerByte));
7924 }
7925 
sha2h(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,bool part1)7926 LogicVRegister Simulator::sha2h(LogicVRegister srcdst,
7927                                 const LogicVRegister& src1,
7928                                 const LogicVRegister& src2,
7929                                 bool part1) {
7930   uint64_t x[4] = {};
7931   uint64_t y[4] = {};
7932   if (part1) {
7933     // Switch input order based on which part is being handled.
7934     srcdst.UintArray(kFormat4S, x);
7935     src1.UintArray(kFormat4S, y);
7936   } else {
7937     src1.UintArray(kFormat4S, x);
7938     srcdst.UintArray(kFormat4S, y);
7939   }
7940 
7941   for (unsigned i = 0; i < ArrayLength(x); i++) {
7942     uint64_t chs = CryptoOp<"choose"_h>(y[0], y[1], y[2]);
7943     uint64_t maj = CryptoOp<"majority"_h>(x[0], x[1], x[2]);
7944 
7945     uint64_t w = src2.Uint(kFormat4S, i);
7946     uint64_t t = y[3] + SHASigma<uint32_t, 6, 11, 25>(y[0]) + chs + w;
7947 
7948     x[3] += t;
7949     y[3] = t + SHASigma<uint32_t, 2, 13, 22>(x[0]) + maj;
7950 
7951     // y:x = ROL(y:x, 32)
7952     SHARotateEltsLeftOne(x);
7953     SHARotateEltsLeftOne(y);
7954     std::swap(x[0], y[0]);
7955   }
7956 
7957   srcdst.SetUintArray(kFormat4S, part1 ? x : y);
7958   return srcdst;
7959 }
7960 
7961 template <typename T, unsigned A, unsigned B, unsigned C>
SHASURotate(uint64_t x)7962 static uint64_t SHASURotate(uint64_t x) {
7963   return RotateRight(x, A, sizeof(T) * kBitsPerByte) ^
7964          RotateRight(x, B, sizeof(T) * kBitsPerByte) ^
7965          ((x & ~static_cast<T>(0)) >> C);
7966 }
7967 
sha2su0(LogicVRegister srcdst,const LogicVRegister & src1)7968 LogicVRegister Simulator::sha2su0(LogicVRegister srcdst,
7969                                   const LogicVRegister& src1) {
7970   uint64_t w[4] = {};
7971   uint64_t result[4];
7972   srcdst.UintArray(kFormat4S, w);
7973   uint64_t x = src1.Uint(kFormat4S, 0);
7974 
7975   result[0] = SHASURotate<uint32_t, 7, 18, 3>(w[1]) + w[0];
7976   result[1] = SHASURotate<uint32_t, 7, 18, 3>(w[2]) + w[1];
7977   result[2] = SHASURotate<uint32_t, 7, 18, 3>(w[3]) + w[2];
7978   result[3] = SHASURotate<uint32_t, 7, 18, 3>(x) + w[3];
7979 
7980   srcdst.SetUintArray(kFormat4S, result);
7981   return srcdst;
7982 }
7983 
sha2su1(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)7984 LogicVRegister Simulator::sha2su1(LogicVRegister srcdst,
7985                                   const LogicVRegister& src1,
7986                                   const LogicVRegister& src2) {
7987   uint64_t w[4] = {};
7988   uint64_t x[4] = {};
7989   uint64_t y[4] = {};
7990   uint64_t result[4];
7991   srcdst.UintArray(kFormat4S, w);
7992   src1.UintArray(kFormat4S, x);
7993   src2.UintArray(kFormat4S, y);
7994 
7995   result[0] = SHASURotate<uint32_t, 17, 19, 10>(y[2]) + w[0] + x[1];
7996   result[1] = SHASURotate<uint32_t, 17, 19, 10>(y[3]) + w[1] + x[2];
7997   result[2] = SHASURotate<uint32_t, 17, 19, 10>(result[0]) + w[2] + x[3];
7998   result[3] = SHASURotate<uint32_t, 17, 19, 10>(result[1]) + w[3] + y[0];
7999 
8000   srcdst.SetUintArray(kFormat4S, result);
8001   return srcdst;
8002 }
8003 
sha512h(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)8004 LogicVRegister Simulator::sha512h(LogicVRegister srcdst,
8005                                   const LogicVRegister& src1,
8006                                   const LogicVRegister& src2) {
8007   uint64_t w[2] = {};
8008   uint64_t x[2] = {};
8009   uint64_t y[2] = {};
8010   uint64_t result[2] = {};
8011   srcdst.UintArray(kFormat2D, w);
8012   src1.UintArray(kFormat2D, x);
8013   src2.UintArray(kFormat2D, y);
8014 
8015   result[1] = (y[1] & x[0]) ^ (~y[1] & x[1]);
8016   result[1] += SHASigma<uint64_t, 14, 18, 41>(y[1]) + w[1];
8017 
8018   uint64_t tmp = result[1] + y[0];
8019 
8020   result[0] = (tmp & y[1]) ^ (~tmp & x[0]);
8021   result[0] += SHASigma<uint64_t, 14, 18, 41>(tmp) + w[0];
8022 
8023   srcdst.SetUintArray(kFormat2D, result);
8024   return srcdst;
8025 }
8026 
sha512h2(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)8027 LogicVRegister Simulator::sha512h2(LogicVRegister srcdst,
8028                                    const LogicVRegister& src1,
8029                                    const LogicVRegister& src2) {
8030   uint64_t w[2] = {};
8031   uint64_t x[2] = {};
8032   uint64_t y[2] = {};
8033   uint64_t result[2] = {};
8034   srcdst.UintArray(kFormat2D, w);
8035   src1.UintArray(kFormat2D, x);
8036   src2.UintArray(kFormat2D, y);
8037 
8038   result[1] = (x[0] & y[1]) ^ (x[0] & y[0]) ^ (y[1] & y[0]);
8039   result[1] += SHASigma<uint64_t, 28, 34, 39>(y[0]) + w[1];
8040 
8041   result[0] = (result[1] & y[0]) ^ (result[1] & y[1]) ^ (y[1] & y[0]);
8042   result[0] += SHASigma<uint64_t, 28, 34, 39>(result[1]) + w[0];
8043 
8044   srcdst.SetUintArray(kFormat2D, result);
8045   return srcdst;
8046 }
8047 
sha512su0(LogicVRegister srcdst,const LogicVRegister & src1)8048 LogicVRegister Simulator::sha512su0(LogicVRegister srcdst,
8049                                     const LogicVRegister& src1) {
8050   uint64_t w[2] = {};
8051   uint64_t x[2] = {};
8052   uint64_t result[2] = {};
8053   srcdst.UintArray(kFormat2D, w);
8054   src1.UintArray(kFormat2D, x);
8055 
8056   result[0] = SHASURotate<uint64_t, 1, 8, 7>(w[1]) + w[0];
8057   result[1] = SHASURotate<uint64_t, 1, 8, 7>(x[0]) + w[1];
8058 
8059   srcdst.SetUintArray(kFormat2D, result);
8060   return srcdst;
8061 }
8062 
sha512su1(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)8063 LogicVRegister Simulator::sha512su1(LogicVRegister srcdst,
8064                                     const LogicVRegister& src1,
8065                                     const LogicVRegister& src2) {
8066   uint64_t w[2] = {};
8067   uint64_t x[2] = {};
8068   uint64_t y[2] = {};
8069   uint64_t result[2] = {};
8070   srcdst.UintArray(kFormat2D, w);
8071   src1.UintArray(kFormat2D, x);
8072   src2.UintArray(kFormat2D, y);
8073 
8074   result[1] = w[1] + SHASURotate<uint64_t, 19, 61, 6>(x[1]) + y[1];
8075   result[0] = w[0] + SHASURotate<uint64_t, 19, 61, 6>(x[0]) + y[0];
8076 
8077   srcdst.SetUintArray(kFormat2D, result);
8078   return srcdst;
8079 }
8080 
GalMul(int table,uint64_t x)8081 static uint8_t GalMul(int table, uint64_t x) {
8082   // Galois multiplication lookup tables.
8083   static const uint8_t ffmul02[256] = {
8084       0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16,
8085       0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e,
8086       0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46,
8087       0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
8088       0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76,
8089       0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e,
8090       0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6,
8091       0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
8092       0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6,
8093       0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee,
8094       0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, 0x1b, 0x19, 0x1f, 0x1d,
8095       0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
8096       0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d,
8097       0x23, 0x21, 0x27, 0x25, 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55,
8098       0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, 0x7b, 0x79, 0x7f, 0x7d,
8099       0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
8100       0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d,
8101       0x83, 0x81, 0x87, 0x85, 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5,
8102       0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, 0xdb, 0xd9, 0xdf, 0xdd,
8103       0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
8104       0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed,
8105       0xe3, 0xe1, 0xe7, 0xe5,
8106   };
8107 
8108   static const uint8_t ffmul03[256] = {
8109       0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d,
8110       0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39,
8111       0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, 0x60, 0x63, 0x66, 0x65,
8112       0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
8113       0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d,
8114       0x44, 0x47, 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9,
8115       0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, 0xf6, 0xf5,
8116       0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
8117       0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd,
8118       0xb4, 0xb7, 0xb2, 0xb1, 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99,
8119       0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9b, 0x98, 0x9d, 0x9e,
8120       0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
8121       0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6,
8122       0xbf, 0xbc, 0xb9, 0xba, 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2,
8123       0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, 0xcb, 0xc8, 0xcd, 0xce,
8124       0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
8125       0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46,
8126       0x4f, 0x4c, 0x49, 0x4a, 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62,
8127       0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, 0x3b, 0x38, 0x3d, 0x3e,
8128       0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
8129       0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16,
8130       0x1f, 0x1c, 0x19, 0x1a,
8131   };
8132 
8133   static const uint8_t ffmul09[256] = {
8134       0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53,
8135       0x6c, 0x65, 0x7e, 0x77, 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf,
8136       0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, 0x3b, 0x32, 0x29, 0x20,
8137       0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
8138       0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8,
8139       0xc7, 0xce, 0xd5, 0xdc, 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49,
8140       0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, 0xe6, 0xef, 0xf4, 0xfd,
8141       0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
8142       0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e,
8143       0x21, 0x28, 0x33, 0x3a, 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2,
8144       0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, 0xec, 0xe5, 0xfe, 0xf7,
8145       0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
8146       0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f,
8147       0x10, 0x19, 0x02, 0x0b, 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8,
8148       0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, 0x47, 0x4e, 0x55, 0x5c,
8149       0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
8150       0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9,
8151       0xf6, 0xff, 0xe4, 0xed, 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35,
8152       0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, 0xa1, 0xa8, 0xb3, 0xba,
8153       0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
8154       0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62,
8155       0x5d, 0x54, 0x4f, 0x46,
8156   };
8157 
8158   static const uint8_t ffmul0b[256] = {
8159       0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45,
8160       0x74, 0x7f, 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81,
8161       0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, 0x7b, 0x70, 0x6d, 0x66,
8162       0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
8163       0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e,
8164       0xbf, 0xb4, 0xa9, 0xa2, 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7,
8165       0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, 0x46, 0x4d, 0x50, 0x5b,
8166       0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
8167       0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8,
8168       0xf9, 0xf2, 0xef, 0xe4, 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c,
8169       0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, 0xf7, 0xfc, 0xe1, 0xea,
8170       0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
8171       0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02,
8172       0x33, 0x38, 0x25, 0x2e, 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd,
8173       0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, 0x3c, 0x37, 0x2a, 0x21,
8174       0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
8175       0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44,
8176       0x75, 0x7e, 0x63, 0x68, 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80,
8177       0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, 0x7a, 0x71, 0x6c, 0x67,
8178       0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
8179       0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f,
8180       0xbe, 0xb5, 0xa8, 0xa3,
8181   };
8182 
8183   static const uint8_t ffmul0d[256] = {
8184       0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f,
8185       0x5c, 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3,
8186       0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, 0xbb, 0xb6, 0xa1, 0xac,
8187       0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
8188       0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14,
8189       0x37, 0x3a, 0x2d, 0x20, 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e,
8190       0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, 0xbd, 0xb0, 0xa7, 0xaa,
8191       0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
8192       0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9,
8193       0x8a, 0x87, 0x90, 0x9d, 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25,
8194       0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, 0xda, 0xd7, 0xc0, 0xcd,
8195       0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
8196       0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75,
8197       0x56, 0x5b, 0x4c, 0x41, 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42,
8198       0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, 0xb1, 0xbc, 0xab, 0xa6,
8199       0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
8200       0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8,
8201       0xeb, 0xe6, 0xf1, 0xfc, 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44,
8202       0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0x0c, 0x01, 0x16, 0x1b,
8203       0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
8204       0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3,
8205       0x80, 0x8d, 0x9a, 0x97,
8206   };
8207 
8208   static const uint8_t ffmul0e[256] = {
8209       0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62,
8210       0x48, 0x46, 0x54, 0x5a, 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca,
8211       0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, 0xdb, 0xd5, 0xc7, 0xc9,
8212       0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
8213       0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59,
8214       0x73, 0x7d, 0x6f, 0x61, 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87,
8215       0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, 0x4d, 0x43, 0x51, 0x5f,
8216       0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
8217       0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14,
8218       0x3e, 0x30, 0x22, 0x2c, 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc,
8219       0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, 0x41, 0x4f, 0x5d, 0x53,
8220       0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
8221       0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3,
8222       0xe9, 0xe7, 0xf5, 0xfb, 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0,
8223       0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, 0x7a, 0x74, 0x66, 0x68,
8224       0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
8225       0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e,
8226       0xa4, 0xaa, 0xb8, 0xb6, 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26,
8227       0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, 0x37, 0x39, 0x2b, 0x25,
8228       0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
8229       0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5,
8230       0x9f, 0x91, 0x83, 0x8d,
8231   };
8232 
8233   x &= 255;
8234   switch (table) {
8235     case 0x2:
8236       return ffmul02[x];
8237     case 0x3:
8238       return ffmul03[x];
8239     case 0x9:
8240       return ffmul09[x];
8241     case 0xb:
8242       return ffmul0b[x];
8243     case 0xd:
8244       return ffmul0d[x];
8245     case 0xe:
8246       return ffmul0e[x];
8247     case 0:
8248       // Case 0 indicates no table lookup, used for some forward mix stages.
8249       return static_cast<uint8_t>(x);
8250     default:
8251       VIXL_UNREACHABLE();
8252       return static_cast<uint8_t>(x);
8253   }
8254 }
8255 
8256 
AESMixInner(uint64_t * x,int stage,bool inverse)8257 static uint8_t AESMixInner(uint64_t* x, int stage, bool inverse) {
8258   VIXL_ASSERT(IsUint2(stage));
8259 
8260   int imc_gm[7] = {0xb, 0xd, 0x9, 0xe};
8261   int mc_gm[7] = {0x3, 0x0, 0x0, 0x2};
8262 
8263   int* gm = inverse ? imc_gm : mc_gm;
8264   int index = 3 - stage;
8265 
8266   uint8_t result = 0;
8267   for (int i = 0; i < 4; i++) {
8268     result ^= GalMul(gm[(index + i) % 4], x[i]);
8269   }
8270   return result;
8271 }
8272 
8273 
aesmix(LogicVRegister dst,const LogicVRegister & src,bool inverse)8274 LogicVRegister Simulator::aesmix(LogicVRegister dst,
8275                                  const LogicVRegister& src,
8276                                  bool inverse) {
8277   uint64_t in[16] = {};
8278   src.UintArray(kFormat16B, in);
8279   dst.ClearForWrite(kFormat16B);
8280 
8281   for (int c = 0; c < 16; c++) {
8282     int cmod4 = c % 4;
8283     int d = c - cmod4;
8284     VIXL_ASSERT((d == 0) || (d == 4) || (d == 8) || (d == 12));
8285     dst.SetUint(kFormat16B, c, AESMixInner(&in[d], cmod4, inverse));
8286   }
8287 
8288   return dst;
8289 }
8290 
aes(LogicVRegister dst,const LogicVRegister & src,bool decrypt)8291 LogicVRegister Simulator::aes(LogicVRegister dst,
8292                               const LogicVRegister& src,
8293                               bool decrypt) {
8294   dst.ClearForWrite(kFormat16B);
8295 
8296   // (Inverse) shift rows.
8297   uint8_t shift[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11};
8298   uint8_t shift_inv[] = {0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3};
8299   for (int i = 0; i < LaneCountFromFormat(kFormat16B); i++) {
8300     uint8_t index = decrypt ? shift_inv[i] : shift[i];
8301     dst.SetUint(kFormat16B, i, src.Uint(kFormat16B, index));
8302   }
8303 
8304   // (Inverse) substitute bytes.
8305   static const uint8_t gf2[256] = {
8306       0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b,
8307       0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
8308       0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26,
8309       0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
8310       0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
8311       0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
8312       0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed,
8313       0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
8314       0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f,
8315       0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
8316       0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec,
8317       0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
8318       0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14,
8319       0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
8320       0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d,
8321       0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
8322       0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f,
8323       0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
8324       0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11,
8325       0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
8326       0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f,
8327       0xb0, 0x54, 0xbb, 0x16,
8328   };
8329   static const uint8_t gf2_inv[256] = {
8330       0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e,
8331       0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
8332       0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32,
8333       0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
8334       0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49,
8335       0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
8336       0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50,
8337       0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
8338       0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05,
8339       0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
8340       0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41,
8341       0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
8342       0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8,
8343       0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
8344       0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b,
8345       0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
8346       0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59,
8347       0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
8348       0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d,
8349       0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
8350       0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63,
8351       0x55, 0x21, 0x0c, 0x7d,
8352   };
8353 
8354   for (int i = 0; i < LaneCountFromFormat(kFormat16B); i++) {
8355     const uint8_t* table = decrypt ? gf2_inv : gf2;
8356     dst.SetUint(kFormat16B, i, table[dst.Uint(kFormat16B, i)]);
8357   }
8358   return dst;
8359 }
8360 
sm3partw1(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)8361 LogicVRegister Simulator::sm3partw1(LogicVRegister srcdst,
8362                                     const LogicVRegister& src1,
8363                                     const LogicVRegister& src2) {
8364   using namespace std::placeholders;
8365   auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
8366 
8367   SimVRegister temp;
8368 
8369   ext(kFormat16B, temp, src2, temp, 4);
8370   rol(kFormat4S, temp, temp, 15);
8371   eor(kFormat4S, temp, temp, src1);
8372   LogicVRegister r = eor(kFormat4S, temp, temp, srcdst);
8373 
8374   uint64_t result[4] = {};
8375   r.UintArray(kFormat4S, result);
8376   for (int i = 0; i < 4; i++) {
8377     if (i == 3) {
8378       // result[3] already contains srcdst[3] ^ src1[3] from the operations
8379       // above.
8380       result[i] ^= ROL(result[0], 15);
8381     }
8382     result[i] ^= ROL(result[i], 15) ^ ROL(result[i], 23);
8383   }
8384   srcdst.SetUintArray(kFormat4S, result);
8385   return srcdst;
8386 }
8387 
sm3partw2(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)8388 LogicVRegister Simulator::sm3partw2(LogicVRegister srcdst,
8389                                     const LogicVRegister& src1,
8390                                     const LogicVRegister& src2) {
8391   using namespace std::placeholders;
8392   auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
8393 
8394   SimVRegister temp;
8395   VectorFormat vf = kFormat4S;
8396 
8397   rol(vf, temp, src2, 7);
8398   LogicVRegister r = eor(vf, temp, temp, src1);
8399   eor(vf, srcdst, temp, srcdst);
8400 
8401   uint64_t tmp2 = ROL(r.Uint(vf, 0), 15);
8402   tmp2 ^= ROL(tmp2, 15) ^ ROL(tmp2, 23);
8403   srcdst.SetUint(vf, 3, srcdst.Uint(vf, 3) ^ tmp2);
8404   return srcdst;
8405 }
8406 
sm3ss1(LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & src3)8407 LogicVRegister Simulator::sm3ss1(LogicVRegister dst,
8408                                  const LogicVRegister& src1,
8409                                  const LogicVRegister& src2,
8410                                  const LogicVRegister& src3) {
8411   using namespace std::placeholders;
8412   auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
8413 
8414   VectorFormat vf = kFormat4S;
8415   uint64_t result = ROL(src1.Uint(vf, 3), 12);
8416   result += src2.Uint(vf, 3) + src3.Uint(vf, 3);
8417   dst.Clear();
8418   dst.SetUint(vf, 3, ROL(result, 7));
8419   return dst;
8420 }
8421 
sm3tt1(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,int index,bool is_a)8422 LogicVRegister Simulator::sm3tt1(LogicVRegister srcdst,
8423                                  const LogicVRegister& src1,
8424                                  const LogicVRegister& src2,
8425                                  int index,
8426                                  bool is_a) {
8427   VectorFormat vf = kFormat4S;
8428   using namespace std::placeholders;
8429   auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
8430   auto sd = std::bind(&LogicVRegister::Uint, srcdst, vf, _1);
8431 
8432   VIXL_ASSERT(IsUint2(index));
8433 
8434   uint64_t wjprime = src2.Uint(vf, index);
8435   uint64_t ss2 = src1.Uint(vf, 3) ^ ROL(sd(3), 12);
8436 
8437   uint64_t tt1;
8438   if (is_a) {
8439     tt1 = CryptoOp<"parity"_h>(sd(1), sd(2), sd(3));
8440   } else {
8441     tt1 = CryptoOp<"majority"_h>(sd(1), sd(2), sd(3));
8442   }
8443   tt1 += sd(0) + ss2 + wjprime;
8444 
8445   ext(kFormat16B, srcdst, srcdst, srcdst, 4);
8446   srcdst.SetUint(vf, 1, ROL(sd(1), 9));
8447   srcdst.SetUint(vf, 3, tt1);
8448   return srcdst;
8449 }
8450 
sm3tt2(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,int index,bool is_a)8451 LogicVRegister Simulator::sm3tt2(LogicVRegister srcdst,
8452                                  const LogicVRegister& src1,
8453                                  const LogicVRegister& src2,
8454                                  int index,
8455                                  bool is_a) {
8456   VectorFormat vf = kFormat4S;
8457   using namespace std::placeholders;
8458   auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
8459   auto sd = std::bind(&LogicVRegister::Uint, srcdst, vf, _1);
8460 
8461   VIXL_ASSERT(IsUint2(index));
8462 
8463   uint64_t wj = src2.Uint(vf, index);
8464 
8465   uint64_t tt2;
8466   if (is_a) {
8467     tt2 = CryptoOp<"parity"_h>(sd(1), sd(2), sd(3));
8468   } else {
8469     tt2 = CryptoOp<"choose"_h>(sd(3), sd(2), sd(1));
8470   }
8471   tt2 += sd(0) + src1.Uint(vf, 3) + wj;
8472 
8473   ext(kFormat16B, srcdst, srcdst, srcdst, 4);
8474   srcdst.SetUint(vf, 1, ROL(sd(1), 19));
8475   tt2 ^= ROL(tt2, 9) ^ ROL(tt2, 17);
8476   srcdst.SetUint(vf, 3, tt2);
8477   return srcdst;
8478 }
8479 
SM4SBox(uint64_t x)8480 static uint64_t SM4SBox(uint64_t x) {
8481   static const uint8_t sbox[256] = {
8482       0x48, 0x39, 0xcb, 0xd7, 0x3e, 0x5f, 0xee, 0x79, 0x20, 0x4d, 0xdc, 0x3a,
8483       0xec, 0x7d, 0xf0, 0x18, 0x84, 0xc6, 0x6e, 0xc5, 0x09, 0xf1, 0xb9, 0x65,
8484       0x7e, 0x77, 0x96, 0x0c, 0x4a, 0x97, 0x69, 0x89, 0xb0, 0xb4, 0xe5, 0xb8,
8485       0x12, 0xd0, 0x74, 0x2d, 0xbd, 0x7b, 0xcd, 0xa5, 0x88, 0x31, 0xc1, 0x0a,
8486       0xd8, 0x5a, 0x10, 0x1f, 0x41, 0x5c, 0xd9, 0x11, 0x7f, 0xbc, 0xdd, 0xbb,
8487       0x92, 0xaf, 0x1b, 0x8d, 0x51, 0x5b, 0x6c, 0x6d, 0x72, 0x6a, 0xff, 0x03,
8488       0x2f, 0x8e, 0xfd, 0xde, 0x45, 0x37, 0xdb, 0xd5, 0x6f, 0x4e, 0x53, 0x0d,
8489       0xab, 0x23, 0x29, 0xc0, 0x60, 0xca, 0x66, 0x82, 0x2e, 0xe2, 0xf6, 0x1d,
8490       0xe3, 0xb1, 0x8c, 0xf5, 0x30, 0x32, 0x93, 0xad, 0x55, 0x1a, 0x34, 0x9b,
8491       0xa4, 0x5d, 0xae, 0xe0, 0xa1, 0x15, 0x61, 0xf9, 0xce, 0xf2, 0xf7, 0xa3,
8492       0xb5, 0x38, 0xc7, 0x40, 0xd2, 0x8a, 0xbf, 0xea, 0x9e, 0xc8, 0xc4, 0xa0,
8493       0xe7, 0x02, 0x36, 0x4c, 0x52, 0x27, 0xd3, 0x9f, 0x57, 0x46, 0x00, 0xd4,
8494       0x87, 0x78, 0x21, 0x01, 0x3b, 0x7c, 0x22, 0x25, 0xa2, 0xd1, 0x58, 0x63,
8495       0x5e, 0x0e, 0x24, 0x1e, 0x35, 0x9d, 0x56, 0x70, 0x4b, 0x0f, 0xeb, 0xf8,
8496       0x8b, 0xda, 0x64, 0x71, 0xb2, 0x81, 0x6b, 0x68, 0xa8, 0x4f, 0x85, 0xe6,
8497       0x19, 0x3c, 0x59, 0x83, 0xba, 0x17, 0x73, 0xf3, 0xfc, 0xa7, 0x07, 0x47,
8498       0xa6, 0x3f, 0x8f, 0x75, 0xfa, 0x94, 0xdf, 0x80, 0x95, 0xe8, 0x08, 0xc9,
8499       0xa9, 0x1c, 0xb3, 0xe4, 0x62, 0xac, 0xcf, 0xed, 0x43, 0x0b, 0x54, 0x33,
8500       0x7a, 0x98, 0xef, 0x91, 0xf4, 0x50, 0x42, 0x9c, 0x99, 0x06, 0x86, 0x49,
8501       0x26, 0x13, 0x44, 0xaa, 0xc3, 0x04, 0xbe, 0x2a, 0x76, 0x9a, 0x67, 0x2b,
8502       0x05, 0x2c, 0xfb, 0x28, 0xc2, 0x14, 0xb6, 0x16, 0xb7, 0x3d, 0xe1, 0xcc,
8503       0xfe, 0xe9, 0x90, 0xd6,
8504   };
8505   uint64_t result = 0;
8506   for (int j = 24; j >= 0; j -= 8) {
8507     uint8_t s = 255 - ((x >> j) & 0xff);
8508     result = (result << 8) | sbox[s];
8509   }
8510   return result;
8511 }
8512 
sm4(LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_key)8513 LogicVRegister Simulator::sm4(LogicVRegister srcdst,
8514                               const LogicVRegister& src1,
8515                               const LogicVRegister& src2,
8516                               bool is_key) {
8517   using namespace std::placeholders;
8518   auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
8519 
8520   VectorFormat vf = kFormat4S;
8521   uint64_t result[4] = {};
8522   if (is_key) {
8523     src1.UintArray(vf, result);
8524   } else {
8525     srcdst.UintArray(vf, result);
8526   }
8527 
8528   for (int i = 0; i < 4; i++) {
8529     uint64_t k = is_key ? src2.Uint(vf, i) : src1.Uint(vf, i);
8530     uint64_t intval = result[3] ^ result[2] ^ result[1] ^ k;
8531     intval = SM4SBox(intval);
8532 
8533     if (is_key) {
8534       intval ^= ROL(intval, 13) ^ ROL(intval, 23);
8535     } else {
8536       intval ^=
8537           ROL(intval, 2) ^ ROL(intval, 10) ^ ROL(intval, 18) ^ ROL(intval, 24);
8538     }
8539 
8540     intval ^= result[0];
8541 
8542     result[0] = result[1];
8543     result[1] = result[2];
8544     result[2] = result[3];
8545     result[3] = intval;
8546   }
8547   srcdst.SetUintArray(vf, result);
8548   return srcdst;
8549 }
8550 
8551 }  // namespace aarch64
8552 }  // namespace vixl
8553 
8554 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
8555