• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28 
29 #include <cmath>
30 
31 #include "simulator-aarch64.h"
32 
33 namespace vixl {
34 namespace aarch64 {
35 
36 using vixl::internal::SimFloat16;
37 
38 template <typename T>
IsFloat64()39 bool IsFloat64() {
40   return false;
41 }
42 template <>
IsFloat64()43 bool IsFloat64<double>() {
44   return true;
45 }
46 
47 template <typename T>
IsFloat32()48 bool IsFloat32() {
49   return false;
50 }
51 template <>
IsFloat32()52 bool IsFloat32<float>() {
53   return true;
54 }
55 
56 template <typename T>
IsFloat16()57 bool IsFloat16() {
58   return false;
59 }
60 template <>
IsFloat16()61 bool IsFloat16<Float16>() {
62   return true;
63 }
64 template <>
IsFloat16()65 bool IsFloat16<SimFloat16>() {
66   return true;
67 }
68 
69 template <>
FPDefaultNaN()70 double Simulator::FPDefaultNaN<double>() {
71   return kFP64DefaultNaN;
72 }
73 
74 
75 template <>
FPDefaultNaN()76 float Simulator::FPDefaultNaN<float>() {
77   return kFP32DefaultNaN;
78 }
79 
80 
81 template <>
FPDefaultNaN()82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83   return SimFloat16(kFP16DefaultNaN);
84 }
85 
86 
FixedToDouble(int64_t src,int fbits,FPRounding round)87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88   if (src >= 0) {
89     return UFixedToDouble(src, fbits, round);
90   } else if (src == INT64_MIN) {
91     return -UFixedToDouble(src, fbits, round);
92   } else {
93     return -UFixedToDouble(-src, fbits, round);
94   }
95 }
96 
97 
UFixedToDouble(uint64_t src,int fbits,FPRounding round)98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99   // An input of 0 is a special case because the result is effectively
100   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101   if (src == 0) {
102     return 0.0;
103   }
104 
105   // Calculate the exponent. The highest significant bit will have the value
106   // 2^exponent.
107   const int highest_significant_bit = 63 - CountLeadingZeros(src);
108   const int64_t exponent = highest_significant_bit - fbits;
109 
110   return FPRoundToDouble(0, exponent, src, round);
111 }
112 
113 
FixedToFloat(int64_t src,int fbits,FPRounding round)114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115   if (src >= 0) {
116     return UFixedToFloat(src, fbits, round);
117   } else if (src == INT64_MIN) {
118     return -UFixedToFloat(src, fbits, round);
119   } else {
120     return -UFixedToFloat(-src, fbits, round);
121   }
122 }
123 
124 
UFixedToFloat(uint64_t src,int fbits,FPRounding round)125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126   // An input of 0 is a special case because the result is effectively
127   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128   if (src == 0) {
129     return 0.0f;
130   }
131 
132   // Calculate the exponent. The highest significant bit will have the value
133   // 2^exponent.
134   const int highest_significant_bit = 63 - CountLeadingZeros(src);
135   const int32_t exponent = highest_significant_bit - fbits;
136 
137   return FPRoundToFloat(0, exponent, src, round);
138 }
139 
140 
FixedToFloat16(int64_t src,int fbits,FPRounding round)141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142   if (src >= 0) {
143     return UFixedToFloat16(src, fbits, round);
144   } else if (src == INT64_MIN) {
145     return -UFixedToFloat16(src, fbits, round);
146   } else {
147     return -UFixedToFloat16(-src, fbits, round);
148   }
149 }
150 
151 
UFixedToFloat16(uint64_t src,int fbits,FPRounding round)152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153                                       int fbits,
154                                       FPRounding round) {
155   // An input of 0 is a special case because the result is effectively
156   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157   if (src == 0) {
158     return 0.0f;
159   }
160 
161   // Calculate the exponent. The highest significant bit will have the value
162   // 2^exponent.
163   const int highest_significant_bit = 63 - CountLeadingZeros(src);
164   const int16_t exponent = highest_significant_bit - fbits;
165 
166   return FPRoundToFloat16(0, exponent, src, round);
167 }
168 
169 
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)170 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
171   dst.ClearForWrite(vform);
172   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
173     LoadLane(dst, vform, i, addr);
174     addr += LaneSizeInBytesFromFormat(vform);
175   }
176 }
177 
178 
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)179 void Simulator::ld1(VectorFormat vform,
180                     LogicVRegister dst,
181                     int index,
182                     uint64_t addr) {
183   LoadLane(dst, vform, index, addr);
184 }
185 
186 
ld1r(VectorFormat vform,VectorFormat unpack_vform,LogicVRegister dst,uint64_t addr,bool is_signed)187 void Simulator::ld1r(VectorFormat vform,
188                      VectorFormat unpack_vform,
189                      LogicVRegister dst,
190                      uint64_t addr,
191                      bool is_signed) {
192   unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
193   dst.ClearForWrite(vform);
194   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
195     if (is_signed) {
196       LoadIntToLane(dst, vform, unpack_size, i, addr);
197     } else {
198       LoadUintToLane(dst, vform, unpack_size, i, addr);
199     }
200   }
201 }
202 
203 
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)204 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
205   ld1r(vform, vform, dst, addr);
206 }
207 
208 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)209 void Simulator::ld2(VectorFormat vform,
210                     LogicVRegister dst1,
211                     LogicVRegister dst2,
212                     uint64_t addr1) {
213   dst1.ClearForWrite(vform);
214   dst2.ClearForWrite(vform);
215   int esize = LaneSizeInBytesFromFormat(vform);
216   uint64_t addr2 = addr1 + esize;
217   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
218     LoadLane(dst1, vform, i, addr1);
219     LoadLane(dst2, vform, i, addr2);
220     addr1 += 2 * esize;
221     addr2 += 2 * esize;
222   }
223 }
224 
225 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)226 void Simulator::ld2(VectorFormat vform,
227                     LogicVRegister dst1,
228                     LogicVRegister dst2,
229                     int index,
230                     uint64_t addr1) {
231   dst1.ClearForWrite(vform);
232   dst2.ClearForWrite(vform);
233   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
234   LoadLane(dst1, vform, index, addr1);
235   LoadLane(dst2, vform, index, addr2);
236 }
237 
238 
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)239 void Simulator::ld2r(VectorFormat vform,
240                      LogicVRegister dst1,
241                      LogicVRegister dst2,
242                      uint64_t addr) {
243   dst1.ClearForWrite(vform);
244   dst2.ClearForWrite(vform);
245   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
246   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
247     LoadLane(dst1, vform, i, addr);
248     LoadLane(dst2, vform, i, addr2);
249   }
250 }
251 
252 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)253 void Simulator::ld3(VectorFormat vform,
254                     LogicVRegister dst1,
255                     LogicVRegister dst2,
256                     LogicVRegister dst3,
257                     uint64_t addr1) {
258   dst1.ClearForWrite(vform);
259   dst2.ClearForWrite(vform);
260   dst3.ClearForWrite(vform);
261   int esize = LaneSizeInBytesFromFormat(vform);
262   uint64_t addr2 = addr1 + esize;
263   uint64_t addr3 = addr2 + esize;
264   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
265     LoadLane(dst1, vform, i, addr1);
266     LoadLane(dst2, vform, i, addr2);
267     LoadLane(dst3, vform, i, addr3);
268     addr1 += 3 * esize;
269     addr2 += 3 * esize;
270     addr3 += 3 * esize;
271   }
272 }
273 
274 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)275 void Simulator::ld3(VectorFormat vform,
276                     LogicVRegister dst1,
277                     LogicVRegister dst2,
278                     LogicVRegister dst3,
279                     int index,
280                     uint64_t addr1) {
281   dst1.ClearForWrite(vform);
282   dst2.ClearForWrite(vform);
283   dst3.ClearForWrite(vform);
284   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
285   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
286   LoadLane(dst1, vform, index, addr1);
287   LoadLane(dst2, vform, index, addr2);
288   LoadLane(dst3, vform, index, addr3);
289 }
290 
291 
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)292 void Simulator::ld3r(VectorFormat vform,
293                      LogicVRegister dst1,
294                      LogicVRegister dst2,
295                      LogicVRegister dst3,
296                      uint64_t addr) {
297   dst1.ClearForWrite(vform);
298   dst2.ClearForWrite(vform);
299   dst3.ClearForWrite(vform);
300   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
301   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
302   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
303     LoadLane(dst1, vform, i, addr);
304     LoadLane(dst2, vform, i, addr2);
305     LoadLane(dst3, vform, i, addr3);
306   }
307 }
308 
309 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)310 void Simulator::ld4(VectorFormat vform,
311                     LogicVRegister dst1,
312                     LogicVRegister dst2,
313                     LogicVRegister dst3,
314                     LogicVRegister dst4,
315                     uint64_t addr1) {
316   dst1.ClearForWrite(vform);
317   dst2.ClearForWrite(vform);
318   dst3.ClearForWrite(vform);
319   dst4.ClearForWrite(vform);
320   int esize = LaneSizeInBytesFromFormat(vform);
321   uint64_t addr2 = addr1 + esize;
322   uint64_t addr3 = addr2 + esize;
323   uint64_t addr4 = addr3 + esize;
324   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
325     LoadLane(dst1, vform, i, addr1);
326     LoadLane(dst2, vform, i, addr2);
327     LoadLane(dst3, vform, i, addr3);
328     LoadLane(dst4, vform, i, addr4);
329     addr1 += 4 * esize;
330     addr2 += 4 * esize;
331     addr3 += 4 * esize;
332     addr4 += 4 * esize;
333   }
334 }
335 
336 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)337 void Simulator::ld4(VectorFormat vform,
338                     LogicVRegister dst1,
339                     LogicVRegister dst2,
340                     LogicVRegister dst3,
341                     LogicVRegister dst4,
342                     int index,
343                     uint64_t addr1) {
344   dst1.ClearForWrite(vform);
345   dst2.ClearForWrite(vform);
346   dst3.ClearForWrite(vform);
347   dst4.ClearForWrite(vform);
348   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
349   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
350   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
351   LoadLane(dst1, vform, index, addr1);
352   LoadLane(dst2, vform, index, addr2);
353   LoadLane(dst3, vform, index, addr3);
354   LoadLane(dst4, vform, index, addr4);
355 }
356 
357 
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)358 void Simulator::ld4r(VectorFormat vform,
359                      LogicVRegister dst1,
360                      LogicVRegister dst2,
361                      LogicVRegister dst3,
362                      LogicVRegister dst4,
363                      uint64_t addr) {
364   dst1.ClearForWrite(vform);
365   dst2.ClearForWrite(vform);
366   dst3.ClearForWrite(vform);
367   dst4.ClearForWrite(vform);
368   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
369   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
370   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
371   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
372     LoadLane(dst1, vform, i, addr);
373     LoadLane(dst2, vform, i, addr2);
374     LoadLane(dst3, vform, i, addr3);
375     LoadLane(dst4, vform, i, addr4);
376   }
377 }
378 
379 
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)380 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
381   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
382     StoreLane(src, vform, i, addr);
383     addr += LaneSizeInBytesFromFormat(vform);
384   }
385 }
386 
387 
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)388 void Simulator::st1(VectorFormat vform,
389                     LogicVRegister src,
390                     int index,
391                     uint64_t addr) {
392   StoreLane(src, vform, index, addr);
393 }
394 
395 
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,uint64_t addr)396 void Simulator::st2(VectorFormat vform,
397                     LogicVRegister src,
398                     LogicVRegister src2,
399                     uint64_t addr) {
400   int esize = LaneSizeInBytesFromFormat(vform);
401   uint64_t addr2 = addr + esize;
402   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
403     StoreLane(src, vform, i, addr);
404     StoreLane(src2, vform, i, addr2);
405     addr += 2 * esize;
406     addr2 += 2 * esize;
407   }
408 }
409 
410 
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,int index,uint64_t addr)411 void Simulator::st2(VectorFormat vform,
412                     LogicVRegister src,
413                     LogicVRegister src2,
414                     int index,
415                     uint64_t addr) {
416   int esize = LaneSizeInBytesFromFormat(vform);
417   StoreLane(src, vform, index, addr);
418   StoreLane(src2, vform, index, addr + 1 * esize);
419 }
420 
421 
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,uint64_t addr)422 void Simulator::st3(VectorFormat vform,
423                     LogicVRegister src,
424                     LogicVRegister src2,
425                     LogicVRegister src3,
426                     uint64_t addr) {
427   int esize = LaneSizeInBytesFromFormat(vform);
428   uint64_t addr2 = addr + esize;
429   uint64_t addr3 = addr2 + esize;
430   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
431     StoreLane(src, vform, i, addr);
432     StoreLane(src2, vform, i, addr2);
433     StoreLane(src3, vform, i, addr3);
434     addr += 3 * esize;
435     addr2 += 3 * esize;
436     addr3 += 3 * esize;
437   }
438 }
439 
440 
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,int index,uint64_t addr)441 void Simulator::st3(VectorFormat vform,
442                     LogicVRegister src,
443                     LogicVRegister src2,
444                     LogicVRegister src3,
445                     int index,
446                     uint64_t addr) {
447   int esize = LaneSizeInBytesFromFormat(vform);
448   StoreLane(src, vform, index, addr);
449   StoreLane(src2, vform, index, addr + 1 * esize);
450   StoreLane(src3, vform, index, addr + 2 * esize);
451 }
452 
453 
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,uint64_t addr)454 void Simulator::st4(VectorFormat vform,
455                     LogicVRegister src,
456                     LogicVRegister src2,
457                     LogicVRegister src3,
458                     LogicVRegister src4,
459                     uint64_t addr) {
460   int esize = LaneSizeInBytesFromFormat(vform);
461   uint64_t addr2 = addr + esize;
462   uint64_t addr3 = addr2 + esize;
463   uint64_t addr4 = addr3 + esize;
464   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
465     StoreLane(src, vform, i, addr);
466     StoreLane(src2, vform, i, addr2);
467     StoreLane(src3, vform, i, addr3);
468     StoreLane(src4, vform, i, addr4);
469     addr += 4 * esize;
470     addr2 += 4 * esize;
471     addr3 += 4 * esize;
472     addr4 += 4 * esize;
473   }
474 }
475 
476 
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,int index,uint64_t addr)477 void Simulator::st4(VectorFormat vform,
478                     LogicVRegister src,
479                     LogicVRegister src2,
480                     LogicVRegister src3,
481                     LogicVRegister src4,
482                     int index,
483                     uint64_t addr) {
484   int esize = LaneSizeInBytesFromFormat(vform);
485   StoreLane(src, vform, index, addr);
486   StoreLane(src2, vform, index, addr + 1 * esize);
487   StoreLane(src3, vform, index, addr + 2 * esize);
488   StoreLane(src4, vform, index, addr + 3 * esize);
489 }
490 
491 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)492 LogicVRegister Simulator::cmp(VectorFormat vform,
493                               LogicVRegister dst,
494                               const LogicVRegister& src1,
495                               const LogicVRegister& src2,
496                               Condition cond) {
497   dst.ClearForWrite(vform);
498   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
499     int64_t sa = src1.Int(vform, i);
500     int64_t sb = src2.Int(vform, i);
501     uint64_t ua = src1.Uint(vform, i);
502     uint64_t ub = src2.Uint(vform, i);
503     bool result = false;
504     switch (cond) {
505       case eq:
506         result = (ua == ub);
507         break;
508       case ge:
509         result = (sa >= sb);
510         break;
511       case gt:
512         result = (sa > sb);
513         break;
514       case hi:
515         result = (ua > ub);
516         break;
517       case hs:
518         result = (ua >= ub);
519         break;
520       case lt:
521         result = (sa < sb);
522         break;
523       case le:
524         result = (sa <= sb);
525         break;
526       default:
527         VIXL_UNREACHABLE();
528         break;
529     }
530     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
531   }
532   return dst;
533 }
534 
535 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)536 LogicVRegister Simulator::cmp(VectorFormat vform,
537                               LogicVRegister dst,
538                               const LogicVRegister& src1,
539                               int imm,
540                               Condition cond) {
541   SimVRegister temp;
542   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
543   return cmp(vform, dst, src1, imm_reg, cond);
544 }
545 
546 
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)547 LogicVRegister Simulator::cmptst(VectorFormat vform,
548                                  LogicVRegister dst,
549                                  const LogicVRegister& src1,
550                                  const LogicVRegister& src2) {
551   dst.ClearForWrite(vform);
552   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
553     uint64_t ua = src1.Uint(vform, i);
554     uint64_t ub = src2.Uint(vform, i);
555     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
556   }
557   return dst;
558 }
559 
560 
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)561 LogicVRegister Simulator::add(VectorFormat vform,
562                               LogicVRegister dst,
563                               const LogicVRegister& src1,
564                               const LogicVRegister& src2) {
565   int lane_size = LaneSizeInBitsFromFormat(vform);
566   dst.ClearForWrite(vform);
567 
568   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
569     // Test for unsigned saturation.
570     uint64_t ua = src1.UintLeftJustified(vform, i);
571     uint64_t ub = src2.UintLeftJustified(vform, i);
572     uint64_t ur = ua + ub;
573     if (ur < ua) {
574       dst.SetUnsignedSat(i, true);
575     }
576 
577     // Test for signed saturation.
578     bool pos_a = (ua >> 63) == 0;
579     bool pos_b = (ub >> 63) == 0;
580     bool pos_r = (ur >> 63) == 0;
581     // If the signs of the operands are the same, but different from the result,
582     // there was an overflow.
583     if ((pos_a == pos_b) && (pos_a != pos_r)) {
584       dst.SetSignedSat(i, pos_a);
585     }
586     dst.SetInt(vform, i, ur >> (64 - lane_size));
587   }
588   return dst;
589 }
590 
add_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)591 LogicVRegister Simulator::add_uint(VectorFormat vform,
592                                    LogicVRegister dst,
593                                    const LogicVRegister& src1,
594                                    uint64_t value) {
595   int lane_size = LaneSizeInBitsFromFormat(vform);
596   VIXL_ASSERT(IsUintN(lane_size, value));
597   dst.ClearForWrite(vform);
598   // Left-justify `value`.
599   uint64_t ub = value << (64 - lane_size);
600   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
601     // Test for unsigned saturation.
602     uint64_t ua = src1.UintLeftJustified(vform, i);
603     uint64_t ur = ua + ub;
604     if (ur < ua) {
605       dst.SetUnsignedSat(i, true);
606     }
607 
608     // Test for signed saturation.
609     // `value` is always positive, so we have an overflow if the (signed) result
610     // is smaller than the first operand.
611     if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
612       dst.SetSignedSat(i, true);
613     }
614 
615     dst.SetInt(vform, i, ur >> (64 - lane_size));
616   }
617   return dst;
618 }
619 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)620 LogicVRegister Simulator::addp(VectorFormat vform,
621                                LogicVRegister dst,
622                                const LogicVRegister& src1,
623                                const LogicVRegister& src2) {
624   SimVRegister temp1, temp2;
625   uzp1(vform, temp1, src1, src2);
626   uzp2(vform, temp2, src1, src2);
627   add(vform, dst, temp1, temp2);
628   if (IsSVEFormat(vform)) {
629     interleave_top_bottom(vform, dst, dst);
630   }
631   return dst;
632 }
633 
sdiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)634 LogicVRegister Simulator::sdiv(VectorFormat vform,
635                                LogicVRegister dst,
636                                const LogicVRegister& src1,
637                                const LogicVRegister& src2) {
638   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
639 
640   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
641     int64_t val1 = src1.Int(vform, i);
642     int64_t val2 = src2.Int(vform, i);
643     int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
644     int64_t quotient = 0;
645     if ((val1 == min_int) && (val2 == -1)) {
646       quotient = min_int;
647     } else if (val2 != 0) {
648       quotient = val1 / val2;
649     }
650     dst.SetInt(vform, i, quotient);
651   }
652 
653   return dst;
654 }
655 
udiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)656 LogicVRegister Simulator::udiv(VectorFormat vform,
657                                LogicVRegister dst,
658                                const LogicVRegister& src1,
659                                const LogicVRegister& src2) {
660   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
661 
662   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
663     uint64_t val1 = src1.Uint(vform, i);
664     uint64_t val2 = src2.Uint(vform, i);
665     uint64_t quotient = 0;
666     if (val2 != 0) {
667       quotient = val1 / val2;
668     }
669     dst.SetUint(vform, i, quotient);
670   }
671 
672   return dst;
673 }
674 
675 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)676 LogicVRegister Simulator::mla(VectorFormat vform,
677                               LogicVRegister dst,
678                               const LogicVRegister& srca,
679                               const LogicVRegister& src1,
680                               const LogicVRegister& src2) {
681   SimVRegister temp;
682   mul(vform, temp, src1, src2);
683   add(vform, dst, srca, temp);
684   return dst;
685 }
686 
687 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)688 LogicVRegister Simulator::mls(VectorFormat vform,
689                               LogicVRegister dst,
690                               const LogicVRegister& srca,
691                               const LogicVRegister& src1,
692                               const LogicVRegister& src2) {
693   SimVRegister temp;
694   mul(vform, temp, src1, src2);
695   sub(vform, dst, srca, temp);
696   return dst;
697 }
698 
699 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)700 LogicVRegister Simulator::mul(VectorFormat vform,
701                               LogicVRegister dst,
702                               const LogicVRegister& src1,
703                               const LogicVRegister& src2) {
704   dst.ClearForWrite(vform);
705 
706   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
707     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
708   }
709   return dst;
710 }
711 
712 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)713 LogicVRegister Simulator::mul(VectorFormat vform,
714                               LogicVRegister dst,
715                               const LogicVRegister& src1,
716                               const LogicVRegister& src2,
717                               int index) {
718   SimVRegister temp;
719   VectorFormat indexform = VectorFormatFillQ(vform);
720   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
721 }
722 
723 
smulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)724 LogicVRegister Simulator::smulh(VectorFormat vform,
725                                 LogicVRegister dst,
726                                 const LogicVRegister& src1,
727                                 const LogicVRegister& src2) {
728   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
729     int64_t dst_val = 0xbadbeef;
730     int64_t val1 = src1.Int(vform, i);
731     int64_t val2 = src2.Int(vform, i);
732     switch (LaneSizeInBitsFromFormat(vform)) {
733       case 8:
734         dst_val = internal::MultiplyHigh<8>(val1, val2);
735         break;
736       case 16:
737         dst_val = internal::MultiplyHigh<16>(val1, val2);
738         break;
739       case 32:
740         dst_val = internal::MultiplyHigh<32>(val1, val2);
741         break;
742       case 64:
743         dst_val = internal::MultiplyHigh<64>(val1, val2);
744         break;
745       default:
746         VIXL_UNREACHABLE();
747         break;
748     }
749     dst.SetInt(vform, i, dst_val);
750   }
751   return dst;
752 }
753 
754 
umulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)755 LogicVRegister Simulator::umulh(VectorFormat vform,
756                                 LogicVRegister dst,
757                                 const LogicVRegister& src1,
758                                 const LogicVRegister& src2) {
759   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
760     uint64_t dst_val = 0xbadbeef;
761     uint64_t val1 = src1.Uint(vform, i);
762     uint64_t val2 = src2.Uint(vform, i);
763     switch (LaneSizeInBitsFromFormat(vform)) {
764       case 8:
765         dst_val = internal::MultiplyHigh<8>(val1, val2);
766         break;
767       case 16:
768         dst_val = internal::MultiplyHigh<16>(val1, val2);
769         break;
770       case 32:
771         dst_val = internal::MultiplyHigh<32>(val1, val2);
772         break;
773       case 64:
774         dst_val = internal::MultiplyHigh<64>(val1, val2);
775         break;
776       default:
777         VIXL_UNREACHABLE();
778         break;
779     }
780     dst.SetUint(vform, i, dst_val);
781   }
782   return dst;
783 }
784 
785 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)786 LogicVRegister Simulator::mla(VectorFormat vform,
787                               LogicVRegister dst,
788                               const LogicVRegister& src1,
789                               const LogicVRegister& src2,
790                               int index) {
791   SimVRegister temp;
792   VectorFormat indexform = VectorFormatFillQ(vform);
793   return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
794 }
795 
796 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)797 LogicVRegister Simulator::mls(VectorFormat vform,
798                               LogicVRegister dst,
799                               const LogicVRegister& src1,
800                               const LogicVRegister& src2,
801                               int index) {
802   SimVRegister temp;
803   VectorFormat indexform = VectorFormatFillQ(vform);
804   return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
805 }
806 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)807 LogicVRegister Simulator::sqdmull(VectorFormat vform,
808                                   LogicVRegister dst,
809                                   const LogicVRegister& src1,
810                                   const LogicVRegister& src2,
811                                   int index) {
812   SimVRegister temp;
813   VectorFormat indexform =
814       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
815   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
816 }
817 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)818 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
819                                   LogicVRegister dst,
820                                   const LogicVRegister& src1,
821                                   const LogicVRegister& src2,
822                                   int index) {
823   SimVRegister temp;
824   VectorFormat indexform =
825       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
826   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
827 }
828 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)829 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
830                                   LogicVRegister dst,
831                                   const LogicVRegister& src1,
832                                   const LogicVRegister& src2,
833                                   int index) {
834   SimVRegister temp;
835   VectorFormat indexform =
836       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
837   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
838 }
839 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)840 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
841                                   LogicVRegister dst,
842                                   const LogicVRegister& src1,
843                                   const LogicVRegister& src2,
844                                   int index) {
845   SimVRegister temp;
846   VectorFormat indexform = VectorFormatFillQ(vform);
847   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
848 }
849 
850 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)851 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
852                                    LogicVRegister dst,
853                                    const LogicVRegister& src1,
854                                    const LogicVRegister& src2,
855                                    int index) {
856   SimVRegister temp;
857   VectorFormat indexform = VectorFormatFillQ(vform);
858   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
859 }
860 
861 
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)862 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
863                                    LogicVRegister dst,
864                                    const LogicVRegister& src1,
865                                    const LogicVRegister& src2,
866                                    int index) {
867   SimVRegister temp;
868   VectorFormat indexform = VectorFormatFillQ(vform);
869   return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
870 }
871 
872 
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)873 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
874                                    LogicVRegister dst,
875                                    const LogicVRegister& src1,
876                                    const LogicVRegister& src2,
877                                    int index) {
878   SimVRegister temp;
879   VectorFormat indexform = VectorFormatFillQ(vform);
880   return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
881 }
882 
883 
PolynomialMult(uint64_t op1,uint64_t op2,int lane_size_in_bits) const884 uint64_t Simulator::PolynomialMult(uint64_t op1,
885                                    uint64_t op2,
886                                    int lane_size_in_bits) const {
887   VIXL_ASSERT(static_cast<unsigned>(lane_size_in_bits) <= kSRegSize);
888   VIXL_ASSERT(IsUintN(lane_size_in_bits, op1));
889   VIXL_ASSERT(IsUintN(lane_size_in_bits, op2));
890   uint64_t result = 0;
891   for (int i = 0; i < lane_size_in_bits; ++i) {
892     if ((op1 >> i) & 1) {
893       result = result ^ (op2 << i);
894     }
895   }
896   return result;
897 }
898 
899 
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)900 LogicVRegister Simulator::pmul(VectorFormat vform,
901                                LogicVRegister dst,
902                                const LogicVRegister& src1,
903                                const LogicVRegister& src2) {
904   dst.ClearForWrite(vform);
905   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
906     dst.SetUint(vform,
907                 i,
908                 PolynomialMult(src1.Uint(vform, i),
909                                src2.Uint(vform, i),
910                                LaneSizeInBitsFromFormat(vform)));
911   }
912   return dst;
913 }
914 
915 
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)916 LogicVRegister Simulator::pmull(VectorFormat vform,
917                                 LogicVRegister dst,
918                                 const LogicVRegister& src1,
919                                 const LogicVRegister& src2) {
920   dst.ClearForWrite(vform);
921 
922   VectorFormat vform_src = VectorFormatHalfWidth(vform);
923   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
924     dst.SetUint(vform,
925                 i,
926                 PolynomialMult(src1.Uint(vform_src, i),
927                                src2.Uint(vform_src, i),
928                                LaneSizeInBitsFromFormat(vform_src)));
929   }
930 
931   return dst;
932 }
933 
934 
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)935 LogicVRegister Simulator::pmull2(VectorFormat vform,
936                                  LogicVRegister dst,
937                                  const LogicVRegister& src1,
938                                  const LogicVRegister& src2) {
939   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
940   dst.ClearForWrite(vform);
941   int lane_count = LaneCountFromFormat(vform);
942   for (int i = 0; i < lane_count; i++) {
943     dst.SetUint(vform,
944                 i,
945                 PolynomialMult(src1.Uint(vform_src, lane_count + i),
946                                src2.Uint(vform_src, lane_count + i),
947                                LaneSizeInBitsFromFormat(vform_src)));
948   }
949   return dst;
950 }
951 
952 
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)953 LogicVRegister Simulator::sub(VectorFormat vform,
954                               LogicVRegister dst,
955                               const LogicVRegister& src1,
956                               const LogicVRegister& src2) {
957   int lane_size = LaneSizeInBitsFromFormat(vform);
958   dst.ClearForWrite(vform);
959   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
960     // Test for unsigned saturation.
961     uint64_t ua = src1.UintLeftJustified(vform, i);
962     uint64_t ub = src2.UintLeftJustified(vform, i);
963     uint64_t ur = ua - ub;
964     if (ub > ua) {
965       dst.SetUnsignedSat(i, false);
966     }
967 
968     // Test for signed saturation.
969     bool pos_a = (ua >> 63) == 0;
970     bool pos_b = (ub >> 63) == 0;
971     bool pos_r = (ur >> 63) == 0;
972     // If the signs of the operands are different, and the sign of the first
973     // operand doesn't match the result, there was an overflow.
974     if ((pos_a != pos_b) && (pos_a != pos_r)) {
975       dst.SetSignedSat(i, pos_a);
976     }
977 
978     dst.SetInt(vform, i, ur >> (64 - lane_size));
979   }
980   return dst;
981 }
982 
sub_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)983 LogicVRegister Simulator::sub_uint(VectorFormat vform,
984                                    LogicVRegister dst,
985                                    const LogicVRegister& src1,
986                                    uint64_t value) {
987   int lane_size = LaneSizeInBitsFromFormat(vform);
988   VIXL_ASSERT(IsUintN(lane_size, value));
989   dst.ClearForWrite(vform);
990   // Left-justify `value`.
991   uint64_t ub = value << (64 - lane_size);
992   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
993     // Test for unsigned saturation.
994     uint64_t ua = src1.UintLeftJustified(vform, i);
995     uint64_t ur = ua - ub;
996     if (ub > ua) {
997       dst.SetUnsignedSat(i, false);
998     }
999 
1000     // Test for signed saturation.
1001     // `value` is always positive, so we have an overflow if the (signed) result
1002     // is greater than the first operand.
1003     if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1004       dst.SetSignedSat(i, false);
1005     }
1006 
1007     dst.SetInt(vform, i, ur >> (64 - lane_size));
1008   }
1009   return dst;
1010 }
1011 
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1012 LogicVRegister Simulator::and_(VectorFormat vform,
1013                                LogicVRegister dst,
1014                                const LogicVRegister& src1,
1015                                const LogicVRegister& src2) {
1016   dst.ClearForWrite(vform);
1017   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1018     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1019   }
1020   return dst;
1021 }
1022 
1023 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1024 LogicVRegister Simulator::orr(VectorFormat vform,
1025                               LogicVRegister dst,
1026                               const LogicVRegister& src1,
1027                               const LogicVRegister& src2) {
1028   dst.ClearForWrite(vform);
1029   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1030     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1031   }
1032   return dst;
1033 }
1034 
1035 
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1036 LogicVRegister Simulator::orn(VectorFormat vform,
1037                               LogicVRegister dst,
1038                               const LogicVRegister& src1,
1039                               const LogicVRegister& src2) {
1040   dst.ClearForWrite(vform);
1041   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1042     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1043   }
1044   return dst;
1045 }
1046 
1047 
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1048 LogicVRegister Simulator::eor(VectorFormat vform,
1049                               LogicVRegister dst,
1050                               const LogicVRegister& src1,
1051                               const LogicVRegister& src2) {
1052   dst.ClearForWrite(vform);
1053   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1054     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1055   }
1056   return dst;
1057 }
1058 
1059 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1060 LogicVRegister Simulator::bic(VectorFormat vform,
1061                               LogicVRegister dst,
1062                               const LogicVRegister& src1,
1063                               const LogicVRegister& src2) {
1064   dst.ClearForWrite(vform);
1065   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1066     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1067   }
1068   return dst;
1069 }
1070 
1071 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1072 LogicVRegister Simulator::bic(VectorFormat vform,
1073                               LogicVRegister dst,
1074                               const LogicVRegister& src,
1075                               uint64_t imm) {
1076   uint64_t result[16];
1077   int lane_count = LaneCountFromFormat(vform);
1078   for (int i = 0; i < lane_count; ++i) {
1079     result[i] = src.Uint(vform, i) & ~imm;
1080   }
1081   dst.ClearForWrite(vform);
1082   for (int i = 0; i < lane_count; ++i) {
1083     dst.SetUint(vform, i, result[i]);
1084   }
1085   return dst;
1086 }
1087 
1088 
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1089 LogicVRegister Simulator::bif(VectorFormat vform,
1090                               LogicVRegister dst,
1091                               const LogicVRegister& src1,
1092                               const LogicVRegister& src2) {
1093   dst.ClearForWrite(vform);
1094   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1095     uint64_t operand1 = dst.Uint(vform, i);
1096     uint64_t operand2 = ~src2.Uint(vform, i);
1097     uint64_t operand3 = src1.Uint(vform, i);
1098     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1099     dst.SetUint(vform, i, result);
1100   }
1101   return dst;
1102 }
1103 
1104 
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1105 LogicVRegister Simulator::bit(VectorFormat vform,
1106                               LogicVRegister dst,
1107                               const LogicVRegister& src1,
1108                               const LogicVRegister& src2) {
1109   dst.ClearForWrite(vform);
1110   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1111     uint64_t operand1 = dst.Uint(vform, i);
1112     uint64_t operand2 = src2.Uint(vform, i);
1113     uint64_t operand3 = src1.Uint(vform, i);
1114     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1115     dst.SetUint(vform, i, result);
1116   }
1117   return dst;
1118 }
1119 
1120 
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src_mask,const LogicVRegister & src1,const LogicVRegister & src2)1121 LogicVRegister Simulator::bsl(VectorFormat vform,
1122                               LogicVRegister dst,
1123                               const LogicVRegister& src_mask,
1124                               const LogicVRegister& src1,
1125                               const LogicVRegister& src2) {
1126   dst.ClearForWrite(vform);
1127   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1128     uint64_t operand1 = src2.Uint(vform, i);
1129     uint64_t operand2 = src_mask.Uint(vform, i);
1130     uint64_t operand3 = src1.Uint(vform, i);
1131     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1132     dst.SetUint(vform, i, result);
1133   }
1134   return dst;
1135 }
1136 
1137 
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1138 LogicVRegister Simulator::sminmax(VectorFormat vform,
1139                                   LogicVRegister dst,
1140                                   const LogicVRegister& src1,
1141                                   const LogicVRegister& src2,
1142                                   bool max) {
1143   dst.ClearForWrite(vform);
1144   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1145     int64_t src1_val = src1.Int(vform, i);
1146     int64_t src2_val = src2.Int(vform, i);
1147     int64_t dst_val;
1148     if (max) {
1149       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1150     } else {
1151       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1152     }
1153     dst.SetInt(vform, i, dst_val);
1154   }
1155   return dst;
1156 }
1157 
1158 
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1159 LogicVRegister Simulator::smax(VectorFormat vform,
1160                                LogicVRegister dst,
1161                                const LogicVRegister& src1,
1162                                const LogicVRegister& src2) {
1163   return sminmax(vform, dst, src1, src2, true);
1164 }
1165 
1166 
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1167 LogicVRegister Simulator::smin(VectorFormat vform,
1168                                LogicVRegister dst,
1169                                const LogicVRegister& src1,
1170                                const LogicVRegister& src2) {
1171   return sminmax(vform, dst, src1, src2, false);
1172 }
1173 
1174 
sminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1175 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1176                                    LogicVRegister dst,
1177                                    const LogicVRegister& src1,
1178                                    const LogicVRegister& src2,
1179                                    bool max) {
1180   unsigned lanes = LaneCountFromFormat(vform);
1181   int64_t result[kZRegMaxSizeInBytes];
1182   const LogicVRegister* src = &src1;
1183   for (unsigned j = 0; j < 2; j++) {
1184     for (unsigned i = 0; i < lanes; i += 2) {
1185       int64_t first_val = src->Int(vform, i);
1186       int64_t second_val = src->Int(vform, i + 1);
1187       int64_t dst_val;
1188       if (max) {
1189         dst_val = (first_val > second_val) ? first_val : second_val;
1190       } else {
1191         dst_val = (first_val < second_val) ? first_val : second_val;
1192       }
1193       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1194       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1195     }
1196     src = &src2;
1197   }
1198   dst.SetIntArray(vform, result);
1199   if (IsSVEFormat(vform)) {
1200     interleave_top_bottom(vform, dst, dst);
1201   }
1202   return dst;
1203 }
1204 
1205 
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1206 LogicVRegister Simulator::smaxp(VectorFormat vform,
1207                                 LogicVRegister dst,
1208                                 const LogicVRegister& src1,
1209                                 const LogicVRegister& src2) {
1210   return sminmaxp(vform, dst, src1, src2, true);
1211 }
1212 
1213 
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1214 LogicVRegister Simulator::sminp(VectorFormat vform,
1215                                 LogicVRegister dst,
1216                                 const LogicVRegister& src1,
1217                                 const LogicVRegister& src2) {
1218   return sminmaxp(vform, dst, src1, src2, false);
1219 }
1220 
1221 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1222 LogicVRegister Simulator::addp(VectorFormat vform,
1223                                LogicVRegister dst,
1224                                const LogicVRegister& src) {
1225   VIXL_ASSERT(vform == kFormatD);
1226 
1227   uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1228   dst.ClearForWrite(vform);
1229   dst.SetUint(vform, 0, dst_val);
1230   return dst;
1231 }
1232 
1233 
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1234 LogicVRegister Simulator::addv(VectorFormat vform,
1235                                LogicVRegister dst,
1236                                const LogicVRegister& src) {
1237   VectorFormat vform_dst =
1238       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1239 
1240 
1241   int64_t dst_val = 0;
1242   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1243     dst_val += src.Int(vform, i);
1244   }
1245 
1246   dst.ClearForWrite(vform_dst);
1247   dst.SetInt(vform_dst, 0, dst_val);
1248   return dst;
1249 }
1250 
1251 
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1252 LogicVRegister Simulator::saddlv(VectorFormat vform,
1253                                  LogicVRegister dst,
1254                                  const LogicVRegister& src) {
1255   VectorFormat vform_dst =
1256       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1257 
1258   int64_t dst_val = 0;
1259   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1260     dst_val += src.Int(vform, i);
1261   }
1262 
1263   dst.ClearForWrite(vform_dst);
1264   dst.SetInt(vform_dst, 0, dst_val);
1265   return dst;
1266 }
1267 
1268 
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1269 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1270                                  LogicVRegister dst,
1271                                  const LogicVRegister& src) {
1272   VectorFormat vform_dst =
1273       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1274 
1275   uint64_t dst_val = 0;
1276   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1277     dst_val += src.Uint(vform, i);
1278   }
1279 
1280   dst.ClearForWrite(vform_dst);
1281   dst.SetUint(vform_dst, 0, dst_val);
1282   return dst;
1283 }
1284 
1285 
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1286 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1287                                    LogicVRegister dst,
1288                                    const LogicPRegister& pg,
1289                                    const LogicVRegister& src,
1290                                    bool max) {
1291   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1292   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1293     if (!pg.IsActive(vform, i)) continue;
1294 
1295     int64_t src_val = src.Int(vform, i);
1296     if (max) {
1297       dst_val = (src_val > dst_val) ? src_val : dst_val;
1298     } else {
1299       dst_val = (src_val < dst_val) ? src_val : dst_val;
1300     }
1301   }
1302   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1303   dst.SetInt(vform, 0, dst_val);
1304   return dst;
1305 }
1306 
1307 
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1308 LogicVRegister Simulator::smaxv(VectorFormat vform,
1309                                 LogicVRegister dst,
1310                                 const LogicVRegister& src) {
1311   sminmaxv(vform, dst, GetPTrue(), src, true);
1312   return dst;
1313 }
1314 
1315 
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1316 LogicVRegister Simulator::sminv(VectorFormat vform,
1317                                 LogicVRegister dst,
1318                                 const LogicVRegister& src) {
1319   sminmaxv(vform, dst, GetPTrue(), src, false);
1320   return dst;
1321 }
1322 
1323 
smaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1324 LogicVRegister Simulator::smaxv(VectorFormat vform,
1325                                 LogicVRegister dst,
1326                                 const LogicPRegister& pg,
1327                                 const LogicVRegister& src) {
1328   VIXL_ASSERT(IsSVEFormat(vform));
1329   sminmaxv(vform, dst, pg, src, true);
1330   return dst;
1331 }
1332 
1333 
sminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1334 LogicVRegister Simulator::sminv(VectorFormat vform,
1335                                 LogicVRegister dst,
1336                                 const LogicPRegister& pg,
1337                                 const LogicVRegister& src) {
1338   VIXL_ASSERT(IsSVEFormat(vform));
1339   sminmaxv(vform, dst, pg, src, false);
1340   return dst;
1341 }
1342 
1343 
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1344 LogicVRegister Simulator::uminmax(VectorFormat vform,
1345                                   LogicVRegister dst,
1346                                   const LogicVRegister& src1,
1347                                   const LogicVRegister& src2,
1348                                   bool max) {
1349   dst.ClearForWrite(vform);
1350   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1351     uint64_t src1_val = src1.Uint(vform, i);
1352     uint64_t src2_val = src2.Uint(vform, i);
1353     uint64_t dst_val;
1354     if (max) {
1355       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1356     } else {
1357       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1358     }
1359     dst.SetUint(vform, i, dst_val);
1360   }
1361   return dst;
1362 }
1363 
1364 
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1365 LogicVRegister Simulator::umax(VectorFormat vform,
1366                                LogicVRegister dst,
1367                                const LogicVRegister& src1,
1368                                const LogicVRegister& src2) {
1369   return uminmax(vform, dst, src1, src2, true);
1370 }
1371 
1372 
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1373 LogicVRegister Simulator::umin(VectorFormat vform,
1374                                LogicVRegister dst,
1375                                const LogicVRegister& src1,
1376                                const LogicVRegister& src2) {
1377   return uminmax(vform, dst, src1, src2, false);
1378 }
1379 
1380 
uminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1381 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1382                                    LogicVRegister dst,
1383                                    const LogicVRegister& src1,
1384                                    const LogicVRegister& src2,
1385                                    bool max) {
1386   unsigned lanes = LaneCountFromFormat(vform);
1387   uint64_t result[kZRegMaxSizeInBytes];
1388   const LogicVRegister* src = &src1;
1389   for (unsigned j = 0; j < 2; j++) {
1390     for (unsigned i = 0; i < lanes; i += 2) {
1391       uint64_t first_val = src->Uint(vform, i);
1392       uint64_t second_val = src->Uint(vform, i + 1);
1393       uint64_t dst_val;
1394       if (max) {
1395         dst_val = (first_val > second_val) ? first_val : second_val;
1396       } else {
1397         dst_val = (first_val < second_val) ? first_val : second_val;
1398       }
1399       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1400       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1401     }
1402     src = &src2;
1403   }
1404   dst.SetUintArray(vform, result);
1405   if (IsSVEFormat(vform)) {
1406     interleave_top_bottom(vform, dst, dst);
1407   }
1408   return dst;
1409 }
1410 
1411 
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1412 LogicVRegister Simulator::umaxp(VectorFormat vform,
1413                                 LogicVRegister dst,
1414                                 const LogicVRegister& src1,
1415                                 const LogicVRegister& src2) {
1416   return uminmaxp(vform, dst, src1, src2, true);
1417 }
1418 
1419 
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1420 LogicVRegister Simulator::uminp(VectorFormat vform,
1421                                 LogicVRegister dst,
1422                                 const LogicVRegister& src1,
1423                                 const LogicVRegister& src2) {
1424   return uminmaxp(vform, dst, src1, src2, false);
1425 }
1426 
1427 
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1428 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1429                                    LogicVRegister dst,
1430                                    const LogicPRegister& pg,
1431                                    const LogicVRegister& src,
1432                                    bool max) {
1433   uint64_t dst_val = max ? 0 : UINT64_MAX;
1434   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1435     if (!pg.IsActive(vform, i)) continue;
1436 
1437     uint64_t src_val = src.Uint(vform, i);
1438     if (max) {
1439       dst_val = (src_val > dst_val) ? src_val : dst_val;
1440     } else {
1441       dst_val = (src_val < dst_val) ? src_val : dst_val;
1442     }
1443   }
1444   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1445   dst.SetUint(vform, 0, dst_val);
1446   return dst;
1447 }
1448 
1449 
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1450 LogicVRegister Simulator::umaxv(VectorFormat vform,
1451                                 LogicVRegister dst,
1452                                 const LogicVRegister& src) {
1453   uminmaxv(vform, dst, GetPTrue(), src, true);
1454   return dst;
1455 }
1456 
1457 
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1458 LogicVRegister Simulator::uminv(VectorFormat vform,
1459                                 LogicVRegister dst,
1460                                 const LogicVRegister& src) {
1461   uminmaxv(vform, dst, GetPTrue(), src, false);
1462   return dst;
1463 }
1464 
1465 
umaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1466 LogicVRegister Simulator::umaxv(VectorFormat vform,
1467                                 LogicVRegister dst,
1468                                 const LogicPRegister& pg,
1469                                 const LogicVRegister& src) {
1470   VIXL_ASSERT(IsSVEFormat(vform));
1471   uminmaxv(vform, dst, pg, src, true);
1472   return dst;
1473 }
1474 
1475 
uminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1476 LogicVRegister Simulator::uminv(VectorFormat vform,
1477                                 LogicVRegister dst,
1478                                 const LogicPRegister& pg,
1479                                 const LogicVRegister& src) {
1480   VIXL_ASSERT(IsSVEFormat(vform));
1481   uminmaxv(vform, dst, pg, src, false);
1482   return dst;
1483 }
1484 
1485 
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1486 LogicVRegister Simulator::shl(VectorFormat vform,
1487                               LogicVRegister dst,
1488                               const LogicVRegister& src,
1489                               int shift) {
1490   VIXL_ASSERT(shift >= 0);
1491   SimVRegister temp;
1492   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1493   return ushl(vform, dst, src, shiftreg);
1494 }
1495 
1496 
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1497 LogicVRegister Simulator::sshll(VectorFormat vform,
1498                                 LogicVRegister dst,
1499                                 const LogicVRegister& src,
1500                                 int shift) {
1501   VIXL_ASSERT(shift >= 0);
1502   SimVRegister temp1, temp2;
1503   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1504   LogicVRegister extendedreg = sxtl(vform, temp2, src);
1505   return sshl(vform, dst, extendedreg, shiftreg);
1506 }
1507 
1508 
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1509 LogicVRegister Simulator::sshll2(VectorFormat vform,
1510                                  LogicVRegister dst,
1511                                  const LogicVRegister& src,
1512                                  int shift) {
1513   VIXL_ASSERT(shift >= 0);
1514   SimVRegister temp1, temp2;
1515   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1516   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1517   return sshl(vform, dst, extendedreg, shiftreg);
1518 }
1519 
1520 
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1521 LogicVRegister Simulator::shll(VectorFormat vform,
1522                                LogicVRegister dst,
1523                                const LogicVRegister& src) {
1524   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1525   return sshll(vform, dst, src, shift);
1526 }
1527 
1528 
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1529 LogicVRegister Simulator::shll2(VectorFormat vform,
1530                                 LogicVRegister dst,
1531                                 const LogicVRegister& src) {
1532   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1533   return sshll2(vform, dst, src, shift);
1534 }
1535 
1536 
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1537 LogicVRegister Simulator::ushll(VectorFormat vform,
1538                                 LogicVRegister dst,
1539                                 const LogicVRegister& src,
1540                                 int shift) {
1541   VIXL_ASSERT(shift >= 0);
1542   SimVRegister temp1, temp2;
1543   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1544   LogicVRegister extendedreg = uxtl(vform, temp2, src);
1545   return ushl(vform, dst, extendedreg, shiftreg);
1546 }
1547 
1548 
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1549 LogicVRegister Simulator::ushll2(VectorFormat vform,
1550                                  LogicVRegister dst,
1551                                  const LogicVRegister& src,
1552                                  int shift) {
1553   VIXL_ASSERT(shift >= 0);
1554   SimVRegister temp1, temp2;
1555   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1556   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1557   return ushl(vform, dst, extendedreg, shiftreg);
1558 }
1559 
clast(VectorFormat vform,const LogicPRegister & pg,const LogicVRegister & src,int offset_from_last_active)1560 std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1561                                            const LogicPRegister& pg,
1562                                            const LogicVRegister& src,
1563                                            int offset_from_last_active) {
1564   // Untested for any other values.
1565   VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1566 
1567   int last_active = GetLastActive(vform, pg);
1568   int lane_count = LaneCountFromFormat(vform);
1569   int index =
1570       ((last_active + offset_from_last_active) + lane_count) % lane_count;
1571   return std::make_pair(last_active >= 0, src.Uint(vform, index));
1572 }
1573 
compact(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1574 LogicVRegister Simulator::compact(VectorFormat vform,
1575                                   LogicVRegister dst,
1576                                   const LogicPRegister& pg,
1577                                   const LogicVRegister& src) {
1578   int j = 0;
1579   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1580     if (pg.IsActive(vform, i)) {
1581       dst.SetUint(vform, j++, src.Uint(vform, i));
1582     }
1583   }
1584   for (; j < LaneCountFromFormat(vform); j++) {
1585     dst.SetUint(vform, j, 0);
1586   }
1587   return dst;
1588 }
1589 
splice(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1590 LogicVRegister Simulator::splice(VectorFormat vform,
1591                                  LogicVRegister dst,
1592                                  const LogicPRegister& pg,
1593                                  const LogicVRegister& src1,
1594                                  const LogicVRegister& src2) {
1595   int lane_count = LaneCountFromFormat(vform);
1596   int first_active = GetFirstActive(vform, pg);
1597   int last_active = GetLastActive(vform, pg);
1598   int dst_idx = 0;
1599   uint64_t result[kZRegMaxSizeInBytes];
1600 
1601   if (first_active >= 0) {
1602     VIXL_ASSERT(last_active >= first_active);
1603     VIXL_ASSERT(last_active < lane_count);
1604     for (int i = first_active; i <= last_active; i++) {
1605       result[dst_idx++] = src1.Uint(vform, i);
1606     }
1607   }
1608 
1609   VIXL_ASSERT(dst_idx <= lane_count);
1610   for (int i = dst_idx; i < lane_count; i++) {
1611     result[i] = src2.Uint(vform, i - dst_idx);
1612   }
1613 
1614   dst.SetUintArray(vform, result);
1615 
1616   return dst;
1617 }
1618 
sel(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1619 LogicVRegister Simulator::sel(VectorFormat vform,
1620                               LogicVRegister dst,
1621                               const SimPRegister& pg,
1622                               const LogicVRegister& src1,
1623                               const LogicVRegister& src2) {
1624   int p_reg_bits_per_lane =
1625       LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1626   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1627     uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1628                               ? src1.Uint(vform, lane)
1629                               : src2.Uint(vform, lane);
1630     dst.SetUint(vform, lane, lane_value);
1631   }
1632   return dst;
1633 }
1634 
1635 
sel(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src1,const LogicPRegister & src2)1636 LogicPRegister Simulator::sel(LogicPRegister dst,
1637                               const LogicPRegister& pg,
1638                               const LogicPRegister& src1,
1639                               const LogicPRegister& src2) {
1640   for (int i = 0; i < dst.GetChunkCount(); i++) {
1641     LogicPRegister::ChunkType mask = pg.GetChunk(i);
1642     LogicPRegister::ChunkType result =
1643         (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1644     dst.SetChunk(i, result);
1645   }
1646   return dst;
1647 }
1648 
1649 
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1650 LogicVRegister Simulator::sli(VectorFormat vform,
1651                               LogicVRegister dst,
1652                               const LogicVRegister& src,
1653                               int shift) {
1654   dst.ClearForWrite(vform);
1655   int lane_count = LaneCountFromFormat(vform);
1656   for (int i = 0; i < lane_count; i++) {
1657     uint64_t src_lane = src.Uint(vform, i);
1658     uint64_t dst_lane = dst.Uint(vform, i);
1659     uint64_t shifted = src_lane << shift;
1660     uint64_t mask = MaxUintFromFormat(vform) << shift;
1661     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1662   }
1663   return dst;
1664 }
1665 
1666 
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1667 LogicVRegister Simulator::sqshl(VectorFormat vform,
1668                                 LogicVRegister dst,
1669                                 const LogicVRegister& src,
1670                                 int shift) {
1671   VIXL_ASSERT(shift >= 0);
1672   SimVRegister temp;
1673   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1674   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1675 }
1676 
1677 
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1678 LogicVRegister Simulator::uqshl(VectorFormat vform,
1679                                 LogicVRegister dst,
1680                                 const LogicVRegister& src,
1681                                 int shift) {
1682   VIXL_ASSERT(shift >= 0);
1683   SimVRegister temp;
1684   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1685   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1686 }
1687 
1688 
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1689 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1690                                  LogicVRegister dst,
1691                                  const LogicVRegister& src,
1692                                  int shift) {
1693   VIXL_ASSERT(shift >= 0);
1694   SimVRegister temp;
1695   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1696   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1697 }
1698 
1699 
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1700 LogicVRegister Simulator::sri(VectorFormat vform,
1701                               LogicVRegister dst,
1702                               const LogicVRegister& src,
1703                               int shift) {
1704   dst.ClearForWrite(vform);
1705   int lane_count = LaneCountFromFormat(vform);
1706   VIXL_ASSERT((shift > 0) &&
1707               (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1708   for (int i = 0; i < lane_count; i++) {
1709     uint64_t src_lane = src.Uint(vform, i);
1710     uint64_t dst_lane = dst.Uint(vform, i);
1711     uint64_t shifted;
1712     uint64_t mask;
1713     if (shift == 64) {
1714       shifted = 0;
1715       mask = 0;
1716     } else {
1717       shifted = src_lane >> shift;
1718       mask = MaxUintFromFormat(vform) >> shift;
1719     }
1720     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1721   }
1722   return dst;
1723 }
1724 
1725 
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1726 LogicVRegister Simulator::ushr(VectorFormat vform,
1727                                LogicVRegister dst,
1728                                const LogicVRegister& src,
1729                                int shift) {
1730   VIXL_ASSERT(shift >= 0);
1731   SimVRegister temp;
1732   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1733   return ushl(vform, dst, src, shiftreg);
1734 }
1735 
1736 
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1737 LogicVRegister Simulator::sshr(VectorFormat vform,
1738                                LogicVRegister dst,
1739                                const LogicVRegister& src,
1740                                int shift) {
1741   VIXL_ASSERT(shift >= 0);
1742   SimVRegister temp;
1743   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1744   return sshl(vform, dst, src, shiftreg);
1745 }
1746 
1747 
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1748 LogicVRegister Simulator::ssra(VectorFormat vform,
1749                                LogicVRegister dst,
1750                                const LogicVRegister& src,
1751                                int shift) {
1752   SimVRegister temp;
1753   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1754   return add(vform, dst, dst, shifted_reg);
1755 }
1756 
1757 
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1758 LogicVRegister Simulator::usra(VectorFormat vform,
1759                                LogicVRegister dst,
1760                                const LogicVRegister& src,
1761                                int shift) {
1762   SimVRegister temp;
1763   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1764   return add(vform, dst, dst, shifted_reg);
1765 }
1766 
1767 
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1768 LogicVRegister Simulator::srsra(VectorFormat vform,
1769                                 LogicVRegister dst,
1770                                 const LogicVRegister& src,
1771                                 int shift) {
1772   SimVRegister temp;
1773   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1774   return add(vform, dst, dst, shifted_reg);
1775 }
1776 
1777 
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1778 LogicVRegister Simulator::ursra(VectorFormat vform,
1779                                 LogicVRegister dst,
1780                                 const LogicVRegister& src,
1781                                 int shift) {
1782   SimVRegister temp;
1783   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1784   return add(vform, dst, dst, shifted_reg);
1785 }
1786 
1787 
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1788 LogicVRegister Simulator::cls(VectorFormat vform,
1789                               LogicVRegister dst,
1790                               const LogicVRegister& src) {
1791   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1792   int lane_count = LaneCountFromFormat(vform);
1793 
1794   // Ensure that we can store one result per lane.
1795   int result[kZRegMaxSizeInBytes];
1796 
1797   for (int i = 0; i < lane_count; i++) {
1798     result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
1799   }
1800 
1801   dst.ClearForWrite(vform);
1802   for (int i = 0; i < lane_count; ++i) {
1803     dst.SetUint(vform, i, result[i]);
1804   }
1805   return dst;
1806 }
1807 
1808 
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1809 LogicVRegister Simulator::clz(VectorFormat vform,
1810                               LogicVRegister dst,
1811                               const LogicVRegister& src) {
1812   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1813   int lane_count = LaneCountFromFormat(vform);
1814 
1815   // Ensure that we can store one result per lane.
1816   int result[kZRegMaxSizeInBytes];
1817 
1818   for (int i = 0; i < lane_count; i++) {
1819     result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
1820   }
1821 
1822   dst.ClearForWrite(vform);
1823   for (int i = 0; i < lane_count; ++i) {
1824     dst.SetUint(vform, i, result[i]);
1825   }
1826   return dst;
1827 }
1828 
1829 
cnot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1830 LogicVRegister Simulator::cnot(VectorFormat vform,
1831                                LogicVRegister dst,
1832                                const LogicVRegister& src) {
1833   dst.ClearForWrite(vform);
1834   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1835     uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
1836     dst.SetUint(vform, i, value);
1837   }
1838   return dst;
1839 }
1840 
1841 
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1842 LogicVRegister Simulator::cnt(VectorFormat vform,
1843                               LogicVRegister dst,
1844                               const LogicVRegister& src) {
1845   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1846   int lane_count = LaneCountFromFormat(vform);
1847 
1848   // Ensure that we can store one result per lane.
1849   int result[kZRegMaxSizeInBytes];
1850 
1851   for (int i = 0; i < lane_count; i++) {
1852     result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
1853   }
1854 
1855   dst.ClearForWrite(vform);
1856   for (int i = 0; i < lane_count; ++i) {
1857     dst.SetUint(vform, i, result[i]);
1858   }
1859   return dst;
1860 }
1861 
CalculateSignedShiftDistance(int64_t shift_val,int esize,bool shift_in_ls_byte)1862 static int64_t CalculateSignedShiftDistance(int64_t shift_val,
1863                                             int esize,
1864                                             bool shift_in_ls_byte) {
1865   if (shift_in_ls_byte) {
1866     // Neon uses the least-significant byte of the lane as the shift distance.
1867     shift_val = ExtractSignedBitfield64(7, 0, shift_val);
1868   } else {
1869     // SVE uses a saturated shift distance in the range
1870     //  -(esize + 1) ... (esize + 1).
1871     if (shift_val > (esize + 1)) shift_val = esize + 1;
1872     if (shift_val < -(esize + 1)) shift_val = -(esize + 1);
1873   }
1874   return shift_val;
1875 }
1876 
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1877 LogicVRegister Simulator::sshl(VectorFormat vform,
1878                                LogicVRegister dst,
1879                                const LogicVRegister& src1,
1880                                const LogicVRegister& src2,
1881                                bool shift_in_ls_byte) {
1882   dst.ClearForWrite(vform);
1883   int esize = LaneSizeInBitsFromFormat(vform);
1884   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1885     int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1886                                                      esize,
1887                                                      shift_in_ls_byte);
1888 
1889     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1890 
1891     // Set signed saturation state.
1892     if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1893       dst.SetSignedSat(i, lj_src_val >= 0);
1894     }
1895 
1896     // Set unsigned saturation state.
1897     if (lj_src_val < 0) {
1898       dst.SetUnsignedSat(i, false);
1899     } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1900                (lj_src_val != 0)) {
1901       dst.SetUnsignedSat(i, true);
1902     }
1903 
1904     int64_t src_val = src1.Int(vform, i);
1905     bool src_is_negative = src_val < 0;
1906     if (shift_val > 63) {
1907       dst.SetInt(vform, i, 0);
1908     } else if (shift_val < -63) {
1909       dst.SetRounding(i, src_is_negative);
1910       dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1911     } else {
1912       // Use unsigned types for shifts, as behaviour is undefined for signed
1913       // lhs.
1914       uint64_t usrc_val = static_cast<uint64_t>(src_val);
1915 
1916       if (shift_val < 0) {
1917         // Convert to right shift.
1918         shift_val = -shift_val;
1919 
1920         // Set rounding state by testing most-significant bit shifted out.
1921         // Rounding only needed on right shifts.
1922         if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1923           dst.SetRounding(i, true);
1924         }
1925 
1926         usrc_val >>= shift_val;
1927 
1928         if (src_is_negative) {
1929           // Simulate sign-extension.
1930           usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1931         }
1932       } else {
1933         usrc_val <<= shift_val;
1934       }
1935       dst.SetUint(vform, i, usrc_val);
1936     }
1937   }
1938   return dst;
1939 }
1940 
1941 
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1942 LogicVRegister Simulator::ushl(VectorFormat vform,
1943                                LogicVRegister dst,
1944                                const LogicVRegister& src1,
1945                                const LogicVRegister& src2,
1946                                bool shift_in_ls_byte) {
1947   dst.ClearForWrite(vform);
1948   int esize = LaneSizeInBitsFromFormat(vform);
1949   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1950     int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1951                                                      esize,
1952                                                      shift_in_ls_byte);
1953 
1954     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1955 
1956     // Set saturation state.
1957     if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1958       dst.SetUnsignedSat(i, true);
1959     }
1960 
1961     uint64_t src_val = src1.Uint(vform, i);
1962     if ((shift_val > 63) || (shift_val < -64)) {
1963       dst.SetUint(vform, i, 0);
1964     } else {
1965       if (shift_val < 0) {
1966         // Set rounding state. Rounding only needed on right shifts.
1967         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1968           dst.SetRounding(i, true);
1969         }
1970 
1971         if (shift_val == -64) {
1972           src_val = 0;
1973         } else {
1974           src_val >>= -shift_val;
1975         }
1976       } else {
1977         src_val <<= shift_val;
1978       }
1979       dst.SetUint(vform, i, src_val);
1980     }
1981   }
1982   return dst;
1983 }
1984 
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1985 LogicVRegister Simulator::sshr(VectorFormat vform,
1986                                LogicVRegister dst,
1987                                const LogicVRegister& src1,
1988                                const LogicVRegister& src2) {
1989   SimVRegister temp;
1990   // Saturate to sidestep the min-int problem.
1991   neg(vform, temp, src2).SignedSaturate(vform);
1992   sshl(vform, dst, src1, temp, false);
1993   return dst;
1994 }
1995 
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1996 LogicVRegister Simulator::ushr(VectorFormat vform,
1997                                LogicVRegister dst,
1998                                const LogicVRegister& src1,
1999                                const LogicVRegister& src2) {
2000   SimVRegister temp;
2001   // Saturate to sidestep the min-int problem.
2002   neg(vform, temp, src2).SignedSaturate(vform);
2003   ushl(vform, dst, src1, temp, false);
2004   return dst;
2005 }
2006 
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2007 LogicVRegister Simulator::neg(VectorFormat vform,
2008                               LogicVRegister dst,
2009                               const LogicVRegister& src) {
2010   dst.ClearForWrite(vform);
2011   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2012     // Test for signed saturation.
2013     int64_t sa = src.Int(vform, i);
2014     if (sa == MinIntFromFormat(vform)) {
2015       dst.SetSignedSat(i, true);
2016     }
2017     dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2018   }
2019   return dst;
2020 }
2021 
2022 
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2023 LogicVRegister Simulator::suqadd(VectorFormat vform,
2024                                  LogicVRegister dst,
2025                                  const LogicVRegister& src1,
2026                                  const LogicVRegister& src2) {
2027   dst.ClearForWrite(vform);
2028   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2029     int64_t sa = src1.IntLeftJustified(vform, i);
2030     uint64_t ub = src2.UintLeftJustified(vform, i);
2031     uint64_t ur = sa + ub;
2032 
2033     int64_t sr;
2034     memcpy(&sr, &ur, sizeof(sr));
2035     if (sr < sa) {  // Test for signed positive saturation.
2036       dst.SetInt(vform, i, MaxIntFromFormat(vform));
2037     } else {
2038       dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));
2039     }
2040   }
2041   return dst;
2042 }
2043 
2044 
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2045 LogicVRegister Simulator::usqadd(VectorFormat vform,
2046                                  LogicVRegister dst,
2047                                  const LogicVRegister& src1,
2048                                  const LogicVRegister& src2) {
2049   dst.ClearForWrite(vform);
2050   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2051     uint64_t ua = src1.UintLeftJustified(vform, i);
2052     int64_t sb = src2.IntLeftJustified(vform, i);
2053     uint64_t ur = ua + sb;
2054 
2055     if ((sb > 0) && (ur <= ua)) {
2056       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
2057     } else if ((sb < 0) && (ur >= ua)) {
2058       dst.SetUint(vform, i, 0);  // Negative saturation.
2059     } else {
2060       dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));
2061     }
2062   }
2063   return dst;
2064 }
2065 
2066 
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2067 LogicVRegister Simulator::abs(VectorFormat vform,
2068                               LogicVRegister dst,
2069                               const LogicVRegister& src) {
2070   dst.ClearForWrite(vform);
2071   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2072     // Test for signed saturation.
2073     int64_t sa = src.Int(vform, i);
2074     if (sa == MinIntFromFormat(vform)) {
2075       dst.SetSignedSat(i, true);
2076     }
2077     if (sa < 0) {
2078       dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2079     } else {
2080       dst.SetInt(vform, i, sa);
2081     }
2082   }
2083   return dst;
2084 }
2085 
2086 
andv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2087 LogicVRegister Simulator::andv(VectorFormat vform,
2088                                LogicVRegister dst,
2089                                const LogicPRegister& pg,
2090                                const LogicVRegister& src) {
2091   VIXL_ASSERT(IsSVEFormat(vform));
2092   uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2093   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2094     if (!pg.IsActive(vform, i)) continue;
2095 
2096     result &= src.Uint(vform, i);
2097   }
2098   VectorFormat vform_dst =
2099       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2100   dst.ClearForWrite(vform_dst);
2101   dst.SetUint(vform_dst, 0, result);
2102   return dst;
2103 }
2104 
2105 
eorv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2106 LogicVRegister Simulator::eorv(VectorFormat vform,
2107                                LogicVRegister dst,
2108                                const LogicPRegister& pg,
2109                                const LogicVRegister& src) {
2110   VIXL_ASSERT(IsSVEFormat(vform));
2111   uint64_t result = 0;
2112   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2113     if (!pg.IsActive(vform, i)) continue;
2114 
2115     result ^= src.Uint(vform, i);
2116   }
2117   VectorFormat vform_dst =
2118       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2119   dst.ClearForWrite(vform_dst);
2120   dst.SetUint(vform_dst, 0, result);
2121   return dst;
2122 }
2123 
2124 
orv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2125 LogicVRegister Simulator::orv(VectorFormat vform,
2126                               LogicVRegister dst,
2127                               const LogicPRegister& pg,
2128                               const LogicVRegister& src) {
2129   VIXL_ASSERT(IsSVEFormat(vform));
2130   uint64_t result = 0;
2131   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2132     if (!pg.IsActive(vform, i)) continue;
2133 
2134     result |= src.Uint(vform, i);
2135   }
2136   VectorFormat vform_dst =
2137       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2138   dst.ClearForWrite(vform_dst);
2139   dst.SetUint(vform_dst, 0, result);
2140   return dst;
2141 }
2142 
2143 
saddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2144 LogicVRegister Simulator::saddv(VectorFormat vform,
2145                                 LogicVRegister dst,
2146                                 const LogicPRegister& pg,
2147                                 const LogicVRegister& src) {
2148   VIXL_ASSERT(IsSVEFormat(vform));
2149   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2150   int64_t result = 0;
2151   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2152     if (!pg.IsActive(vform, i)) continue;
2153 
2154     // The destination register always has D-lane sizes and the source register
2155     // always has S-lanes or smaller, so signed integer overflow -- undefined
2156     // behaviour -- can't occur.
2157     result += src.Int(vform, i);
2158   }
2159 
2160   dst.ClearForWrite(kFormatD);
2161   dst.SetInt(kFormatD, 0, result);
2162   return dst;
2163 }
2164 
2165 
uaddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2166 LogicVRegister Simulator::uaddv(VectorFormat vform,
2167                                 LogicVRegister dst,
2168                                 const LogicPRegister& pg,
2169                                 const LogicVRegister& src) {
2170   VIXL_ASSERT(IsSVEFormat(vform));
2171   uint64_t result = 0;
2172   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2173     if (!pg.IsActive(vform, i)) continue;
2174 
2175     result += src.Uint(vform, i);
2176   }
2177 
2178   dst.ClearForWrite(kFormatD);
2179   dst.SetUint(kFormatD, 0, result);
2180   return dst;
2181 }
2182 
2183 
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dst_is_signed,const LogicVRegister & src,bool src_is_signed)2184 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2185                                         LogicVRegister dst,
2186                                         bool dst_is_signed,
2187                                         const LogicVRegister& src,
2188                                         bool src_is_signed) {
2189   bool upperhalf = false;
2190   VectorFormat srcform = dstform;
2191   if ((dstform == kFormat16B) || (dstform == kFormat8H) ||
2192       (dstform == kFormat4S)) {
2193     upperhalf = true;
2194     srcform = VectorFormatHalfLanes(srcform);
2195   }
2196   srcform = VectorFormatDoubleWidth(srcform);
2197 
2198   LogicVRegister src_copy = src;
2199 
2200   int offset;
2201   if (upperhalf) {
2202     offset = LaneCountFromFormat(dstform) / 2;
2203   } else {
2204     offset = 0;
2205     dst.ClearForWrite(dstform);
2206   }
2207 
2208   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2209     int64_t ssrc = src_copy.Int(srcform, i);
2210     uint64_t usrc = src_copy.Uint(srcform, i);
2211 
2212     // Test for signed saturation
2213     if (ssrc > MaxIntFromFormat(dstform)) {
2214       dst.SetSignedSat(offset + i, true);
2215     } else if (ssrc < MinIntFromFormat(dstform)) {
2216       dst.SetSignedSat(offset + i, false);
2217     }
2218 
2219     // Test for unsigned saturation
2220     if (src_is_signed) {
2221       if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2222         dst.SetUnsignedSat(offset + i, true);
2223       } else if (ssrc < 0) {
2224         dst.SetUnsignedSat(offset + i, false);
2225       }
2226     } else {
2227       if (usrc > MaxUintFromFormat(dstform)) {
2228         dst.SetUnsignedSat(offset + i, true);
2229       }
2230     }
2231 
2232     int64_t result;
2233     if (src_is_signed) {
2234       result = ssrc & MaxUintFromFormat(dstform);
2235     } else {
2236       result = usrc & MaxUintFromFormat(dstform);
2237     }
2238 
2239     if (dst_is_signed) {
2240       dst.SetInt(dstform, offset + i, result);
2241     } else {
2242       dst.SetUint(dstform, offset + i, result);
2243     }
2244   }
2245   return dst;
2246 }
2247 
2248 
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2249 LogicVRegister Simulator::xtn(VectorFormat vform,
2250                               LogicVRegister dst,
2251                               const LogicVRegister& src) {
2252   return extractnarrow(vform, dst, true, src, true);
2253 }
2254 
2255 
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2256 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2257                                 LogicVRegister dst,
2258                                 const LogicVRegister& src) {
2259   return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2260 }
2261 
2262 
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2263 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2264                                  LogicVRegister dst,
2265                                  const LogicVRegister& src) {
2266   return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2267 }
2268 
2269 
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2270 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2271                                 LogicVRegister dst,
2272                                 const LogicVRegister& src) {
2273   return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2274 }
2275 
2276 
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_signed)2277 LogicVRegister Simulator::absdiff(VectorFormat vform,
2278                                   LogicVRegister dst,
2279                                   const LogicVRegister& src1,
2280                                   const LogicVRegister& src2,
2281                                   bool is_signed) {
2282   dst.ClearForWrite(vform);
2283   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2284     bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2285                                   : (src1.Uint(vform, i) > src2.Uint(vform, i));
2286     // Always calculate the answer using unsigned arithmetic, to avoid
2287     // implemenation-defined signed overflow.
2288     if (src1_gt_src2) {
2289       dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2290     } else {
2291       dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2292     }
2293   }
2294   return dst;
2295 }
2296 
2297 
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2298 LogicVRegister Simulator::saba(VectorFormat vform,
2299                                LogicVRegister dst,
2300                                const LogicVRegister& src1,
2301                                const LogicVRegister& src2) {
2302   SimVRegister temp;
2303   dst.ClearForWrite(vform);
2304   absdiff(vform, temp, src1, src2, true);
2305   add(vform, dst, dst, temp);
2306   return dst;
2307 }
2308 
2309 
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2310 LogicVRegister Simulator::uaba(VectorFormat vform,
2311                                LogicVRegister dst,
2312                                const LogicVRegister& src1,
2313                                const LogicVRegister& src2) {
2314   SimVRegister temp;
2315   dst.ClearForWrite(vform);
2316   absdiff(vform, temp, src1, src2, false);
2317   add(vform, dst, dst, temp);
2318   return dst;
2319 }
2320 
2321 
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2322 LogicVRegister Simulator::not_(VectorFormat vform,
2323                                LogicVRegister dst,
2324                                const LogicVRegister& src) {
2325   dst.ClearForWrite(vform);
2326   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2327     dst.SetUint(vform, i, ~src.Uint(vform, i));
2328   }
2329   return dst;
2330 }
2331 
2332 
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2333 LogicVRegister Simulator::rbit(VectorFormat vform,
2334                                LogicVRegister dst,
2335                                const LogicVRegister& src) {
2336   uint64_t result[kZRegMaxSizeInBytes];
2337   int lane_count = LaneCountFromFormat(vform);
2338   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2339   uint64_t reversed_value;
2340   uint64_t value;
2341   for (int i = 0; i < lane_count; i++) {
2342     value = src.Uint(vform, i);
2343     reversed_value = 0;
2344     for (int j = 0; j < lane_size_in_bits; j++) {
2345       reversed_value = (reversed_value << 1) | (value & 1);
2346       value >>= 1;
2347     }
2348     result[i] = reversed_value;
2349   }
2350 
2351   dst.ClearForWrite(vform);
2352   for (int i = 0; i < lane_count; ++i) {
2353     dst.SetUint(vform, i, result[i]);
2354   }
2355   return dst;
2356 }
2357 
2358 
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2359 LogicVRegister Simulator::rev(VectorFormat vform,
2360                               LogicVRegister dst,
2361                               const LogicVRegister& src) {
2362   VIXL_ASSERT(IsSVEFormat(vform));
2363   int lane_count = LaneCountFromFormat(vform);
2364   for (int i = 0; i < lane_count / 2; i++) {
2365     uint64_t t = src.Uint(vform, i);
2366     dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2367     dst.SetUint(vform, lane_count - i - 1, t);
2368   }
2369   return dst;
2370 }
2371 
2372 
rev_byte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rev_size)2373 LogicVRegister Simulator::rev_byte(VectorFormat vform,
2374                                    LogicVRegister dst,
2375                                    const LogicVRegister& src,
2376                                    int rev_size) {
2377   uint64_t result[kZRegMaxSizeInBytes] = {};
2378   int lane_count = LaneCountFromFormat(vform);
2379   int lane_size = LaneSizeInBytesFromFormat(vform);
2380   int lanes_per_loop = rev_size / lane_size;
2381   for (int i = 0; i < lane_count; i += lanes_per_loop) {
2382     for (int j = 0; j < lanes_per_loop; j++) {
2383       result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2384     }
2385   }
2386   dst.ClearForWrite(vform);
2387   for (int i = 0; i < lane_count; ++i) {
2388     dst.SetUint(vform, i, result[i]);
2389   }
2390   return dst;
2391 }
2392 
2393 
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2394 LogicVRegister Simulator::rev16(VectorFormat vform,
2395                                 LogicVRegister dst,
2396                                 const LogicVRegister& src) {
2397   return rev_byte(vform, dst, src, 2);
2398 }
2399 
2400 
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2401 LogicVRegister Simulator::rev32(VectorFormat vform,
2402                                 LogicVRegister dst,
2403                                 const LogicVRegister& src) {
2404   return rev_byte(vform, dst, src, 4);
2405 }
2406 
2407 
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2408 LogicVRegister Simulator::rev64(VectorFormat vform,
2409                                 LogicVRegister dst,
2410                                 const LogicVRegister& src) {
2411   return rev_byte(vform, dst, src, 8);
2412 }
2413 
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2414 LogicVRegister Simulator::addlp(VectorFormat vform,
2415                                 LogicVRegister dst,
2416                                 const LogicVRegister& src,
2417                                 bool is_signed,
2418                                 bool do_accumulate) {
2419   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2420   VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);
2421 
2422   uint64_t result[kZRegMaxSizeInBytes];
2423   int lane_count = LaneCountFromFormat(vform);
2424   for (int i = 0; i < lane_count; i++) {
2425     if (is_signed) {
2426       result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2427                                         src.Int(vformsrc, 2 * i + 1));
2428     } else {
2429       result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2430     }
2431   }
2432 
2433   dst.ClearForWrite(vform);
2434   for (int i = 0; i < lane_count; ++i) {
2435     if (do_accumulate) {
2436       result[i] += dst.Uint(vform, i);
2437     }
2438     dst.SetUint(vform, i, result[i]);
2439   }
2440 
2441   return dst;
2442 }
2443 
2444 
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2445 LogicVRegister Simulator::saddlp(VectorFormat vform,
2446                                  LogicVRegister dst,
2447                                  const LogicVRegister& src) {
2448   return addlp(vform, dst, src, true, false);
2449 }
2450 
2451 
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2452 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2453                                  LogicVRegister dst,
2454                                  const LogicVRegister& src) {
2455   return addlp(vform, dst, src, false, false);
2456 }
2457 
2458 
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2459 LogicVRegister Simulator::sadalp(VectorFormat vform,
2460                                  LogicVRegister dst,
2461                                  const LogicVRegister& src) {
2462   return addlp(vform, dst, src, true, true);
2463 }
2464 
2465 
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2466 LogicVRegister Simulator::uadalp(VectorFormat vform,
2467                                  LogicVRegister dst,
2468                                  const LogicVRegister& src) {
2469   return addlp(vform, dst, src, false, true);
2470 }
2471 
ror(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rotation)2472 LogicVRegister Simulator::ror(VectorFormat vform,
2473                               LogicVRegister dst,
2474                               const LogicVRegister& src,
2475                               int rotation) {
2476   int width = LaneSizeInBitsFromFormat(vform);
2477   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2478     uint64_t value = src.Uint(vform, i);
2479     dst.SetUint(vform, i, RotateRight(value, rotation, width));
2480   }
2481   return dst;
2482 }
2483 
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2484 LogicVRegister Simulator::ext(VectorFormat vform,
2485                               LogicVRegister dst,
2486                               const LogicVRegister& src1,
2487                               const LogicVRegister& src2,
2488                               int index) {
2489   uint8_t result[kZRegMaxSizeInBytes] = {};
2490   int lane_count = LaneCountFromFormat(vform);
2491   for (int i = 0; i < lane_count - index; ++i) {
2492     result[i] = src1.Uint(vform, i + index);
2493   }
2494   for (int i = 0; i < index; ++i) {
2495     result[lane_count - index + i] = src2.Uint(vform, i);
2496   }
2497   dst.ClearForWrite(vform);
2498   for (int i = 0; i < lane_count; ++i) {
2499     dst.SetUint(vform, i, result[i]);
2500   }
2501   return dst;
2502 }
2503 
rotate_elements_right(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int index)2504 LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
2505                                                 LogicVRegister dst,
2506                                                 const LogicVRegister& src,
2507                                                 int index) {
2508   if (index < 0) index += LaneCountFromFormat(vform);
2509   VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
2510   index *= LaneSizeInBytesFromFormat(vform);
2511   return ext(kFormatVnB, dst, src, src, index);
2512 }
2513 
2514 
2515 template <typename T>
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2516 LogicVRegister Simulator::fadda(VectorFormat vform,
2517                                 LogicVRegister acc,
2518                                 const LogicPRegister& pg,
2519                                 const LogicVRegister& src) {
2520   T result = acc.Float<T>(0);
2521   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2522     if (!pg.IsActive(vform, i)) continue;
2523 
2524     result = FPAdd(result, src.Float<T>(i));
2525   }
2526   VectorFormat vform_dst =
2527       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2528   acc.ClearForWrite(vform_dst);
2529   acc.SetFloat(0, result);
2530   return acc;
2531 }
2532 
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2533 LogicVRegister Simulator::fadda(VectorFormat vform,
2534                                 LogicVRegister acc,
2535                                 const LogicPRegister& pg,
2536                                 const LogicVRegister& src) {
2537   switch (LaneSizeInBitsFromFormat(vform)) {
2538     case kHRegSize:
2539       fadda<SimFloat16>(vform, acc, pg, src);
2540       break;
2541     case kSRegSize:
2542       fadda<float>(vform, acc, pg, src);
2543       break;
2544     case kDRegSize:
2545       fadda<double>(vform, acc, pg, src);
2546       break;
2547     default:
2548       VIXL_UNREACHABLE();
2549   }
2550   return acc;
2551 }
2552 
2553 template <typename T>
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2554 LogicVRegister Simulator::fcadd(VectorFormat vform,
2555                                 LogicVRegister dst,          // d
2556                                 const LogicVRegister& src1,  // n
2557                                 const LogicVRegister& src2,  // m
2558                                 int rot) {
2559   int elements = LaneCountFromFormat(vform);
2560 
2561   T element1, element3;
2562   rot = (rot == 1) ? 270 : 90;
2563 
2564   // Loop example:
2565   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2566   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2567 
2568   for (int e = 0; e <= (elements / 2) - 1; e++) {
2569     switch (rot) {
2570       case 90:
2571         element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2572         element3 = src2.Float<T>(e * 2);
2573         break;
2574       case 270:
2575         element1 = src2.Float<T>(e * 2 + 1);
2576         element3 = FPNeg(src2.Float<T>(e * 2));
2577         break;
2578       default:
2579         VIXL_UNREACHABLE();
2580         return dst;  // prevents "element(n) may be unintialized" errors
2581     }
2582     dst.ClearForWrite(vform);
2583     dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2584     dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2585   }
2586   return dst;
2587 }
2588 
2589 
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2590 LogicVRegister Simulator::fcadd(VectorFormat vform,
2591                                 LogicVRegister dst,          // d
2592                                 const LogicVRegister& src1,  // n
2593                                 const LogicVRegister& src2,  // m
2594                                 int rot) {
2595   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2596     fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2597   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2598     fcadd<float>(vform, dst, src1, src2, rot);
2599   } else {
2600     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2601     fcadd<double>(vform, dst, src1, src2, rot);
2602   }
2603   return dst;
2604 }
2605 
2606 template <typename T>
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int index,int rot)2607 LogicVRegister Simulator::fcmla(VectorFormat vform,
2608                                 LogicVRegister dst,
2609                                 const LogicVRegister& src1,
2610                                 const LogicVRegister& src2,
2611                                 const LogicVRegister& acc,
2612                                 int index,
2613                                 int rot) {
2614   int elements = LaneCountFromFormat(vform);
2615 
2616   T element1, element2, element3, element4;
2617   rot *= 90;
2618 
2619   // Loop example:
2620   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2621   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2622 
2623   for (int e = 0; e <= (elements / 2) - 1; e++) {
2624     // Index == -1 indicates a vector/vector rather than vector/indexed-element
2625     // operation.
2626     int f = (index < 0) ? e : index;
2627 
2628     switch (rot) {
2629       case 0:
2630         element1 = src2.Float<T>(f * 2);
2631         element2 = src1.Float<T>(e * 2);
2632         element3 = src2.Float<T>(f * 2 + 1);
2633         element4 = src1.Float<T>(e * 2);
2634         break;
2635       case 90:
2636         element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2637         element2 = src1.Float<T>(e * 2 + 1);
2638         element3 = src2.Float<T>(f * 2);
2639         element4 = src1.Float<T>(e * 2 + 1);
2640         break;
2641       case 180:
2642         element1 = FPNeg(src2.Float<T>(f * 2));
2643         element2 = src1.Float<T>(e * 2);
2644         element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2645         element4 = src1.Float<T>(e * 2);
2646         break;
2647       case 270:
2648         element1 = src2.Float<T>(f * 2 + 1);
2649         element2 = src1.Float<T>(e * 2 + 1);
2650         element3 = FPNeg(src2.Float<T>(f * 2));
2651         element4 = src1.Float<T>(e * 2 + 1);
2652         break;
2653       default:
2654         VIXL_UNREACHABLE();
2655         return dst;  // prevents "element(n) may be unintialized" errors
2656     }
2657     dst.ClearForWrite(vform);
2658     dst.SetFloat<T>(vform,
2659                     e * 2,
2660                     FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2661     dst.SetFloat<T>(vform,
2662                     e * 2 + 1,
2663                     FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2664   }
2665   return dst;
2666 }
2667 
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int rot)2668 LogicVRegister Simulator::fcmla(VectorFormat vform,
2669                                 LogicVRegister dst,
2670                                 const LogicVRegister& src1,
2671                                 const LogicVRegister& src2,
2672                                 const LogicVRegister& acc,
2673                                 int rot) {
2674   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2675     fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2676   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2677     fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2678   } else {
2679     fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2680   }
2681   return dst;
2682 }
2683 
2684 
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2685 LogicVRegister Simulator::fcmla(VectorFormat vform,
2686                                 LogicVRegister dst,          // d
2687                                 const LogicVRegister& src1,  // n
2688                                 const LogicVRegister& src2,  // m
2689                                 int index,
2690                                 int rot) {
2691   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2692     VIXL_UNIMPLEMENTED();
2693   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2694     fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2695   } else {
2696     fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2697   }
2698   return dst;
2699 }
2700 
cadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot,bool saturate)2701 LogicVRegister Simulator::cadd(VectorFormat vform,
2702                                LogicVRegister dst,
2703                                const LogicVRegister& src1,
2704                                const LogicVRegister& src2,
2705                                int rot,
2706                                bool saturate) {
2707   SimVRegister src1_r, src1_i;
2708   SimVRegister src2_r, src2_i;
2709   SimVRegister zero;
2710   zero.Clear();
2711   uzp1(vform, src1_r, src1, zero);
2712   uzp2(vform, src1_i, src1, zero);
2713   uzp1(vform, src2_r, src2, zero);
2714   uzp2(vform, src2_i, src2, zero);
2715 
2716   if (rot == 90) {
2717     if (saturate) {
2718       sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2719       add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2720     } else {
2721       sub(vform, src1_r, src1_r, src2_i);
2722       add(vform, src1_i, src1_i, src2_r);
2723     }
2724   } else {
2725     VIXL_ASSERT(rot == 270);
2726     if (saturate) {
2727       add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2728       sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2729     } else {
2730       add(vform, src1_r, src1_r, src2_i);
2731       sub(vform, src1_i, src1_i, src2_r);
2732     }
2733   }
2734 
2735   zip1(vform, dst, src1_r, src1_i);
2736   return dst;
2737 }
2738 
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2739 LogicVRegister Simulator::cmla(VectorFormat vform,
2740                                LogicVRegister dst,
2741                                const LogicVRegister& srca,
2742                                const LogicVRegister& src1,
2743                                const LogicVRegister& src2,
2744                                int rot) {
2745   SimVRegister src1_a;
2746   SimVRegister src2_a, src2_b;
2747   SimVRegister srca_i, srca_r;
2748   SimVRegister zero, temp;
2749   zero.Clear();
2750 
2751   if ((rot == 0) || (rot == 180)) {
2752     uzp1(vform, src1_a, src1, zero);
2753     uzp1(vform, src2_a, src2, zero);
2754     uzp2(vform, src2_b, src2, zero);
2755   } else {
2756     uzp2(vform, src1_a, src1, zero);
2757     uzp2(vform, src2_a, src2, zero);
2758     uzp1(vform, src2_b, src2, zero);
2759   }
2760 
2761   uzp1(vform, srca_r, srca, zero);
2762   uzp2(vform, srca_i, srca, zero);
2763 
2764   bool sub_r = (rot == 90) || (rot == 180);
2765   bool sub_i = (rot == 180) || (rot == 270);
2766 
2767   mul(vform, temp, src1_a, src2_a);
2768   if (sub_r) {
2769     sub(vform, srca_r, srca_r, temp);
2770   } else {
2771     add(vform, srca_r, srca_r, temp);
2772   }
2773 
2774   mul(vform, temp, src1_a, src2_b);
2775   if (sub_i) {
2776     sub(vform, srca_i, srca_i, temp);
2777   } else {
2778     add(vform, srca_i, srca_i, temp);
2779   }
2780 
2781   zip1(vform, dst, srca_r, srca_i);
2782   return dst;
2783 }
2784 
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2785 LogicVRegister Simulator::cmla(VectorFormat vform,
2786                                LogicVRegister dst,
2787                                const LogicVRegister& srca,
2788                                const LogicVRegister& src1,
2789                                const LogicVRegister& src2,
2790                                int index,
2791                                int rot) {
2792   SimVRegister temp;
2793   dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
2794   return cmla(vform, dst, srca, src1, temp, rot);
2795 }
2796 
bgrp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool do_bext)2797 LogicVRegister Simulator::bgrp(VectorFormat vform,
2798                                LogicVRegister dst,
2799                                const LogicVRegister& src1,
2800                                const LogicVRegister& src2,
2801                                bool do_bext) {
2802   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2803     uint64_t value = src1.Uint(vform, i);
2804     uint64_t mask = src2.Uint(vform, i);
2805     int high_pos = 0;
2806     int low_pos = 0;
2807     uint64_t result_high = 0;
2808     uint64_t result_low = 0;
2809     for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2810       if ((mask & 1) == 0) {
2811         result_high |= (value & 1) << high_pos;
2812         high_pos++;
2813       } else {
2814         result_low |= (value & 1) << low_pos;
2815         low_pos++;
2816       }
2817       mask >>= 1;
2818       value >>= 1;
2819     }
2820 
2821     if (!do_bext) {
2822       result_low |= result_high << low_pos;
2823     }
2824 
2825     dst.SetUint(vform, i, result_low);
2826   }
2827   return dst;
2828 }
2829 
bdep(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2830 LogicVRegister Simulator::bdep(VectorFormat vform,
2831                                LogicVRegister dst,
2832                                const LogicVRegister& src1,
2833                                const LogicVRegister& src2) {
2834   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2835     uint64_t value = src1.Uint(vform, i);
2836     uint64_t mask = src2.Uint(vform, i);
2837     uint64_t result = 0;
2838     for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2839       if ((mask & 1) == 1) {
2840         result |= (value & 1) << j;
2841         value >>= 1;
2842       }
2843       mask >>= 1;
2844     }
2845     dst.SetUint(vform, i, result);
2846   }
2847   return dst;
2848 }
2849 
histogram(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2,bool do_segmented)2850 LogicVRegister Simulator::histogram(VectorFormat vform,
2851                                     LogicVRegister dst,
2852                                     const LogicPRegister& pg,
2853                                     const LogicVRegister& src1,
2854                                     const LogicVRegister& src2,
2855                                     bool do_segmented) {
2856   int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
2857   uint64_t result[kZRegMaxSizeInBytes];
2858 
2859   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2860     uint64_t count = 0;
2861     uint64_t value = src1.Uint(vform, i);
2862 
2863     int segment = do_segmented ? (i / elements_per_segment) : 0;
2864     int segment_offset = segment * elements_per_segment;
2865     int hist_limit = do_segmented ? elements_per_segment : (i + 1);
2866     for (int j = 0; j < hist_limit; j++) {
2867       if (pg.IsActive(vform, j) &&
2868           (value == src2.Uint(vform, j + segment_offset))) {
2869         count++;
2870       }
2871     }
2872     result[i] = count;
2873   }
2874   dst.SetUintArray(vform, result);
2875   return dst;
2876 }
2877 
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2878 LogicVRegister Simulator::dup_element(VectorFormat vform,
2879                                       LogicVRegister dst,
2880                                       const LogicVRegister& src,
2881                                       int src_index) {
2882   if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {
2883     // When duplicating an element larger than 64 bits, split the element into
2884     // 64-bit parts, and duplicate the parts across the destination.
2885     uint64_t d[4];
2886     int count = (vform == kFormatVnQ) ? 2 : 4;
2887     for (int i = 0; i < count; i++) {
2888       d[i] = src.Uint(kFormatVnD, (src_index * count) + i);
2889     }
2890     dst.Clear();
2891     for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {
2892       dst.SetUint(kFormatVnD, i, d[i % count]);
2893     }
2894   } else {
2895     int lane_count = LaneCountFromFormat(vform);
2896     uint64_t value = src.Uint(vform, src_index);
2897     dst.ClearForWrite(vform);
2898     for (int i = 0; i < lane_count; ++i) {
2899       dst.SetUint(vform, i, value);
2900     }
2901   }
2902   return dst;
2903 }
2904 
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2905 LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2906                                                    LogicVRegister dst,
2907                                                    const LogicVRegister& src,
2908                                                    int src_index) {
2909   // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2910   // whereas in NEON, the size of segment is equal to the size of register
2911   // itself.
2912   int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2913   VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2914   int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2915 
2916   VIXL_ASSERT(src_index >= 0);
2917   VIXL_ASSERT(src_index < lanes_per_segment);
2918 
2919   dst.ClearForWrite(vform);
2920   for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2921     uint64_t value = src.Uint(vform, j + src_index);
2922     for (int i = 0; i < lanes_per_segment; i++) {
2923       dst.SetUint(vform, j + i, value);
2924     }
2925   }
2926   return dst;
2927 }
2928 
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const std::pair<int,int> & src_and_index)2929 LogicVRegister Simulator::dup_elements_to_segments(
2930     VectorFormat vform,
2931     LogicVRegister dst,
2932     const std::pair<int, int>& src_and_index) {
2933   return dup_elements_to_segments(vform,
2934                                   dst,
2935                                   ReadVRegister(src_and_index.first),
2936                                   src_and_index.second);
2937 }
2938 
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2939 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2940                                         LogicVRegister dst,
2941                                         uint64_t imm) {
2942   int lane_count = LaneCountFromFormat(vform);
2943   uint64_t value = imm & MaxUintFromFormat(vform);
2944   dst.ClearForWrite(vform);
2945   for (int i = 0; i < lane_count; ++i) {
2946     dst.SetUint(vform, i, value);
2947   }
2948   return dst;
2949 }
2950 
2951 
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2952 LogicVRegister Simulator::ins_element(VectorFormat vform,
2953                                       LogicVRegister dst,
2954                                       int dst_index,
2955                                       const LogicVRegister& src,
2956                                       int src_index) {
2957   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2958   return dst;
2959 }
2960 
2961 
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2962 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2963                                         LogicVRegister dst,
2964                                         int dst_index,
2965                                         uint64_t imm) {
2966   uint64_t value = imm & MaxUintFromFormat(vform);
2967   dst.SetUint(vform, dst_index, value);
2968   return dst;
2969 }
2970 
2971 
index(VectorFormat vform,LogicVRegister dst,uint64_t start,uint64_t step)2972 LogicVRegister Simulator::index(VectorFormat vform,
2973                                 LogicVRegister dst,
2974                                 uint64_t start,
2975                                 uint64_t step) {
2976   VIXL_ASSERT(IsSVEFormat(vform));
2977   uint64_t value = start;
2978   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2979     dst.SetUint(vform, i, value);
2980     value += step;
2981   }
2982   return dst;
2983 }
2984 
2985 
insr(VectorFormat vform,LogicVRegister dst,uint64_t imm)2986 LogicVRegister Simulator::insr(VectorFormat vform,
2987                                LogicVRegister dst,
2988                                uint64_t imm) {
2989   VIXL_ASSERT(IsSVEFormat(vform));
2990   for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
2991     dst.SetUint(vform, i, dst.Uint(vform, i - 1));
2992   }
2993   dst.SetUint(vform, 0, imm);
2994   return dst;
2995 }
2996 
2997 
mov(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2998 LogicVRegister Simulator::mov(VectorFormat vform,
2999                               LogicVRegister dst,
3000                               const LogicVRegister& src) {
3001   dst.ClearForWrite(vform);
3002   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
3003     dst.SetUint(vform, lane, src.Uint(vform, lane));
3004   }
3005   return dst;
3006 }
3007 
3008 
mov(LogicPRegister dst,const LogicPRegister & src)3009 LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
3010   // Avoid a copy if the registers already alias.
3011   if (dst.Aliases(src)) return dst;
3012 
3013   for (int i = 0; i < dst.GetChunkCount(); i++) {
3014     dst.SetChunk(i, src.GetChunk(i));
3015   }
3016   return dst;
3017 }
3018 
3019 
mov_merging(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3020 LogicVRegister Simulator::mov_merging(VectorFormat vform,
3021                                       LogicVRegister dst,
3022                                       const SimPRegister& pg,
3023                                       const LogicVRegister& src) {
3024   return sel(vform, dst, pg, src, dst);
3025 }
3026 
mov_zeroing(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3027 LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3028                                       LogicVRegister dst,
3029                                       const SimPRegister& pg,
3030                                       const LogicVRegister& src) {
3031   SimVRegister zero;
3032   dup_immediate(vform, zero, 0);
3033   return sel(vform, dst, pg, src, zero);
3034 }
3035 
mov_alternating(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int start_at)3036 LogicVRegister Simulator::mov_alternating(VectorFormat vform,
3037                                           LogicVRegister dst,
3038                                           const LogicVRegister& src,
3039                                           int start_at) {
3040   VIXL_ASSERT((start_at == 0) || (start_at == 1));
3041   for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
3042     dst.SetUint(vform, i, src.Uint(vform, i));
3043   }
3044   return dst;
3045 }
3046 
mov_merging(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3047 LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3048                                       const LogicPRegister& pg,
3049                                       const LogicPRegister& src) {
3050   return sel(dst, pg, src, dst);
3051 }
3052 
mov_zeroing(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3053 LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3054                                       const LogicPRegister& pg,
3055                                       const LogicPRegister& src) {
3056   SimPRegister all_false;
3057   return sel(dst, pg, src, pfalse(all_false));
3058 }
3059 
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)3060 LogicVRegister Simulator::movi(VectorFormat vform,
3061                                LogicVRegister dst,
3062                                uint64_t imm) {
3063   int lane_count = LaneCountFromFormat(vform);
3064   dst.ClearForWrite(vform);
3065   for (int i = 0; i < lane_count; ++i) {
3066     dst.SetUint(vform, i, imm);
3067   }
3068   return dst;
3069 }
3070 
3071 
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)3072 LogicVRegister Simulator::mvni(VectorFormat vform,
3073                                LogicVRegister dst,
3074                                uint64_t imm) {
3075   int lane_count = LaneCountFromFormat(vform);
3076   dst.ClearForWrite(vform);
3077   for (int i = 0; i < lane_count; ++i) {
3078     dst.SetUint(vform, i, ~imm);
3079   }
3080   return dst;
3081 }
3082 
3083 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)3084 LogicVRegister Simulator::orr(VectorFormat vform,
3085                               LogicVRegister dst,
3086                               const LogicVRegister& src,
3087                               uint64_t imm) {
3088   uint64_t result[16];
3089   int lane_count = LaneCountFromFormat(vform);
3090   for (int i = 0; i < lane_count; ++i) {
3091     result[i] = src.Uint(vform, i) | imm;
3092   }
3093   dst.ClearForWrite(vform);
3094   for (int i = 0; i < lane_count; ++i) {
3095     dst.SetUint(vform, i, result[i]);
3096   }
3097   return dst;
3098 }
3099 
3100 
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3101 LogicVRegister Simulator::uxtl(VectorFormat vform,
3102                                LogicVRegister dst,
3103                                const LogicVRegister& src,
3104                                bool is_2) {
3105   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3106   int lane_count = LaneCountFromFormat(vform);
3107   int src_offset = is_2 ? lane_count : 0;
3108 
3109   dst.ClearForWrite(vform);
3110   for (int i = 0; i < lane_count; i++) {
3111     dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));
3112   }
3113   return dst;
3114 }
3115 
3116 
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3117 LogicVRegister Simulator::sxtl(VectorFormat vform,
3118                                LogicVRegister dst,
3119                                const LogicVRegister& src,
3120                                bool is_2) {
3121   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3122   int lane_count = LaneCountFromFormat(vform);
3123   int src_offset = is_2 ? lane_count : 0;
3124 
3125   dst.ClearForWrite(vform);
3126   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3127     dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));
3128   }
3129   return dst;
3130 }
3131 
3132 
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3133 LogicVRegister Simulator::uxtl2(VectorFormat vform,
3134                                 LogicVRegister dst,
3135                                 const LogicVRegister& src) {
3136   return uxtl(vform, dst, src, /* is_2 = */ true);
3137 }
3138 
3139 
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3140 LogicVRegister Simulator::sxtl2(VectorFormat vform,
3141                                 LogicVRegister dst,
3142                                 const LogicVRegister& src) {
3143   return sxtl(vform, dst, src, /* is_2 = */ true);
3144 }
3145 
3146 
uxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3147 LogicVRegister Simulator::uxt(VectorFormat vform,
3148                               LogicVRegister dst,
3149                               const LogicVRegister& src,
3150                               unsigned from_size_in_bits) {
3151   int lane_count = LaneCountFromFormat(vform);
3152   uint64_t mask = GetUintMask(from_size_in_bits);
3153 
3154   dst.ClearForWrite(vform);
3155   for (int i = 0; i < lane_count; i++) {
3156     dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3157   }
3158   return dst;
3159 }
3160 
3161 
sxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3162 LogicVRegister Simulator::sxt(VectorFormat vform,
3163                               LogicVRegister dst,
3164                               const LogicVRegister& src,
3165                               unsigned from_size_in_bits) {
3166   int lane_count = LaneCountFromFormat(vform);
3167 
3168   dst.ClearForWrite(vform);
3169   for (int i = 0; i < lane_count; i++) {
3170     uint64_t value =
3171         ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3172     dst.SetInt(vform, i, value);
3173   }
3174   return dst;
3175 }
3176 
3177 
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3178 LogicVRegister Simulator::shrn(VectorFormat vform,
3179                                LogicVRegister dst,
3180                                const LogicVRegister& src,
3181                                int shift) {
3182   SimVRegister temp;
3183   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3184   VectorFormat vform_dst = vform;
3185   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3186   return extractnarrow(vform_dst, dst, false, shifted_src, false);
3187 }
3188 
3189 
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3190 LogicVRegister Simulator::shrn2(VectorFormat vform,
3191                                 LogicVRegister dst,
3192                                 const LogicVRegister& src,
3193                                 int shift) {
3194   SimVRegister temp;
3195   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3196   VectorFormat vformdst = vform;
3197   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3198   return extractnarrow(vformdst, dst, false, shifted_src, false);
3199 }
3200 
3201 
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3202 LogicVRegister Simulator::rshrn(VectorFormat vform,
3203                                 LogicVRegister dst,
3204                                 const LogicVRegister& src,
3205                                 int shift) {
3206   SimVRegister temp;
3207   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3208   VectorFormat vformdst = vform;
3209   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3210   return extractnarrow(vformdst, dst, false, shifted_src, false);
3211 }
3212 
3213 
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3214 LogicVRegister Simulator::rshrn2(VectorFormat vform,
3215                                  LogicVRegister dst,
3216                                  const LogicVRegister& src,
3217                                  int shift) {
3218   SimVRegister temp;
3219   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3220   VectorFormat vformdst = vform;
3221   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3222   return extractnarrow(vformdst, dst, false, shifted_src, false);
3223 }
3224 
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)3225 LogicVRegister Simulator::Table(VectorFormat vform,
3226                                 LogicVRegister dst,
3227                                 const LogicVRegister& ind,
3228                                 bool zero_out_of_bounds,
3229                                 const LogicVRegister* tab1,
3230                                 const LogicVRegister* tab2,
3231                                 const LogicVRegister* tab3,
3232                                 const LogicVRegister* tab4) {
3233   VIXL_ASSERT(tab1 != NULL);
3234   int lane_count = LaneCountFromFormat(vform);
3235   VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));
3236   uint64_t table[kZRegMaxSizeInBytes * 2];
3237   uint64_t result[kZRegMaxSizeInBytes];
3238 
3239   // For Neon, the table source registers are always 16B, and Neon allows only
3240   // 8B or 16B vform for the destination, so infer the table format from the
3241   // destination.
3242   VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;
3243 
3244   uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);
3245   if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);
3246   if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);
3247   if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);
3248 
3249   for (int i = 0; i < lane_count; i++) {
3250     uint64_t index = ind.Uint(vform, i);
3251     result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);
3252     if (index < tab_size) result[i] = table[index];
3253   }
3254   dst.SetUintArray(vform, result);
3255   return dst;
3256 }
3257 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3258 LogicVRegister Simulator::tbl(VectorFormat vform,
3259                               LogicVRegister dst,
3260                               const LogicVRegister& tab,
3261                               const LogicVRegister& ind) {
3262   return Table(vform, dst, ind, true, &tab);
3263 }
3264 
3265 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3266 LogicVRegister Simulator::tbl(VectorFormat vform,
3267                               LogicVRegister dst,
3268                               const LogicVRegister& tab,
3269                               const LogicVRegister& tab2,
3270                               const LogicVRegister& ind) {
3271   return Table(vform, dst, ind, true, &tab, &tab2);
3272 }
3273 
3274 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3275 LogicVRegister Simulator::tbl(VectorFormat vform,
3276                               LogicVRegister dst,
3277                               const LogicVRegister& tab,
3278                               const LogicVRegister& tab2,
3279                               const LogicVRegister& tab3,
3280                               const LogicVRegister& ind) {
3281   return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3282 }
3283 
3284 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3285 LogicVRegister Simulator::tbl(VectorFormat vform,
3286                               LogicVRegister dst,
3287                               const LogicVRegister& tab,
3288                               const LogicVRegister& tab2,
3289                               const LogicVRegister& tab3,
3290                               const LogicVRegister& tab4,
3291                               const LogicVRegister& ind) {
3292   return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3293 }
3294 
3295 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3296 LogicVRegister Simulator::tbx(VectorFormat vform,
3297                               LogicVRegister dst,
3298                               const LogicVRegister& tab,
3299                               const LogicVRegister& ind) {
3300   return Table(vform, dst, ind, false, &tab);
3301 }
3302 
3303 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3304 LogicVRegister Simulator::tbx(VectorFormat vform,
3305                               LogicVRegister dst,
3306                               const LogicVRegister& tab,
3307                               const LogicVRegister& tab2,
3308                               const LogicVRegister& ind) {
3309   return Table(vform, dst, ind, false, &tab, &tab2);
3310 }
3311 
3312 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3313 LogicVRegister Simulator::tbx(VectorFormat vform,
3314                               LogicVRegister dst,
3315                               const LogicVRegister& tab,
3316                               const LogicVRegister& tab2,
3317                               const LogicVRegister& tab3,
3318                               const LogicVRegister& ind) {
3319   return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3320 }
3321 
3322 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3323 LogicVRegister Simulator::tbx(VectorFormat vform,
3324                               LogicVRegister dst,
3325                               const LogicVRegister& tab,
3326                               const LogicVRegister& tab2,
3327                               const LogicVRegister& tab3,
3328                               const LogicVRegister& tab4,
3329                               const LogicVRegister& ind) {
3330   return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3331 }
3332 
3333 
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3334 LogicVRegister Simulator::uqshrn(VectorFormat vform,
3335                                  LogicVRegister dst,
3336                                  const LogicVRegister& src,
3337                                  int shift) {
3338   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3339 }
3340 
3341 
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3342 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3343                                   LogicVRegister dst,
3344                                   const LogicVRegister& src,
3345                                   int shift) {
3346   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3347 }
3348 
3349 
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3350 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3351                                   LogicVRegister dst,
3352                                   const LogicVRegister& src,
3353                                   int shift) {
3354   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3355 }
3356 
3357 
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3358 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3359                                    LogicVRegister dst,
3360                                    const LogicVRegister& src,
3361                                    int shift) {
3362   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3363 }
3364 
3365 
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3366 LogicVRegister Simulator::sqshrn(VectorFormat vform,
3367                                  LogicVRegister dst,
3368                                  const LogicVRegister& src,
3369                                  int shift) {
3370   SimVRegister temp;
3371   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3372   VectorFormat vformdst = vform;
3373   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3374   return sqxtn(vformdst, dst, shifted_src);
3375 }
3376 
3377 
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3378 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3379                                   LogicVRegister dst,
3380                                   const LogicVRegister& src,
3381                                   int shift) {
3382   SimVRegister temp;
3383   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3384   VectorFormat vformdst = vform;
3385   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3386   return sqxtn(vformdst, dst, shifted_src);
3387 }
3388 
3389 
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3390 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3391                                   LogicVRegister dst,
3392                                   const LogicVRegister& src,
3393                                   int shift) {
3394   SimVRegister temp;
3395   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3396   VectorFormat vformdst = vform;
3397   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3398   return sqxtn(vformdst, dst, shifted_src);
3399 }
3400 
3401 
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3402 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3403                                    LogicVRegister dst,
3404                                    const LogicVRegister& src,
3405                                    int shift) {
3406   SimVRegister temp;
3407   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3408   VectorFormat vformdst = vform;
3409   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3410   return sqxtn(vformdst, dst, shifted_src);
3411 }
3412 
3413 
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3414 LogicVRegister Simulator::sqshrun(VectorFormat vform,
3415                                   LogicVRegister dst,
3416                                   const LogicVRegister& src,
3417                                   int shift) {
3418   SimVRegister temp;
3419   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3420   VectorFormat vformdst = vform;
3421   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3422   return sqxtun(vformdst, dst, shifted_src);
3423 }
3424 
3425 
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3426 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3427                                    LogicVRegister dst,
3428                                    const LogicVRegister& src,
3429                                    int shift) {
3430   SimVRegister temp;
3431   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3432   VectorFormat vformdst = vform;
3433   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3434   return sqxtun(vformdst, dst, shifted_src);
3435 }
3436 
3437 
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3438 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3439                                    LogicVRegister dst,
3440                                    const LogicVRegister& src,
3441                                    int shift) {
3442   SimVRegister temp;
3443   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3444   VectorFormat vformdst = vform;
3445   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3446   return sqxtun(vformdst, dst, shifted_src);
3447 }
3448 
3449 
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3450 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3451                                     LogicVRegister dst,
3452                                     const LogicVRegister& src,
3453                                     int shift) {
3454   SimVRegister temp;
3455   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3456   VectorFormat vformdst = vform;
3457   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3458   return sqxtun(vformdst, dst, shifted_src);
3459 }
3460 
3461 
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3462 LogicVRegister Simulator::uaddl(VectorFormat vform,
3463                                 LogicVRegister dst,
3464                                 const LogicVRegister& src1,
3465                                 const LogicVRegister& src2) {
3466   SimVRegister temp1, temp2;
3467   uxtl(vform, temp1, src1);
3468   uxtl(vform, temp2, src2);
3469   add(vform, dst, temp1, temp2);
3470   return dst;
3471 }
3472 
3473 
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3474 LogicVRegister Simulator::uaddl2(VectorFormat vform,
3475                                  LogicVRegister dst,
3476                                  const LogicVRegister& src1,
3477                                  const LogicVRegister& src2) {
3478   SimVRegister temp1, temp2;
3479   uxtl2(vform, temp1, src1);
3480   uxtl2(vform, temp2, src2);
3481   add(vform, dst, temp1, temp2);
3482   return dst;
3483 }
3484 
3485 
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3486 LogicVRegister Simulator::uaddw(VectorFormat vform,
3487                                 LogicVRegister dst,
3488                                 const LogicVRegister& src1,
3489                                 const LogicVRegister& src2) {
3490   SimVRegister temp;
3491   uxtl(vform, temp, src2);
3492   add(vform, dst, src1, temp);
3493   return dst;
3494 }
3495 
3496 
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3497 LogicVRegister Simulator::uaddw2(VectorFormat vform,
3498                                  LogicVRegister dst,
3499                                  const LogicVRegister& src1,
3500                                  const LogicVRegister& src2) {
3501   SimVRegister temp;
3502   uxtl2(vform, temp, src2);
3503   add(vform, dst, src1, temp);
3504   return dst;
3505 }
3506 
3507 
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3508 LogicVRegister Simulator::saddl(VectorFormat vform,
3509                                 LogicVRegister dst,
3510                                 const LogicVRegister& src1,
3511                                 const LogicVRegister& src2) {
3512   SimVRegister temp1, temp2;
3513   sxtl(vform, temp1, src1);
3514   sxtl(vform, temp2, src2);
3515   add(vform, dst, temp1, temp2);
3516   return dst;
3517 }
3518 
3519 
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3520 LogicVRegister Simulator::saddl2(VectorFormat vform,
3521                                  LogicVRegister dst,
3522                                  const LogicVRegister& src1,
3523                                  const LogicVRegister& src2) {
3524   SimVRegister temp1, temp2;
3525   sxtl2(vform, temp1, src1);
3526   sxtl2(vform, temp2, src2);
3527   add(vform, dst, temp1, temp2);
3528   return dst;
3529 }
3530 
3531 
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3532 LogicVRegister Simulator::saddw(VectorFormat vform,
3533                                 LogicVRegister dst,
3534                                 const LogicVRegister& src1,
3535                                 const LogicVRegister& src2) {
3536   SimVRegister temp;
3537   sxtl(vform, temp, src2);
3538   add(vform, dst, src1, temp);
3539   return dst;
3540 }
3541 
3542 
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3543 LogicVRegister Simulator::saddw2(VectorFormat vform,
3544                                  LogicVRegister dst,
3545                                  const LogicVRegister& src1,
3546                                  const LogicVRegister& src2) {
3547   SimVRegister temp;
3548   sxtl2(vform, temp, src2);
3549   add(vform, dst, src1, temp);
3550   return dst;
3551 }
3552 
3553 
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3554 LogicVRegister Simulator::usubl(VectorFormat vform,
3555                                 LogicVRegister dst,
3556                                 const LogicVRegister& src1,
3557                                 const LogicVRegister& src2) {
3558   SimVRegister temp1, temp2;
3559   uxtl(vform, temp1, src1);
3560   uxtl(vform, temp2, src2);
3561   sub(vform, dst, temp1, temp2);
3562   return dst;
3563 }
3564 
3565 
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3566 LogicVRegister Simulator::usubl2(VectorFormat vform,
3567                                  LogicVRegister dst,
3568                                  const LogicVRegister& src1,
3569                                  const LogicVRegister& src2) {
3570   SimVRegister temp1, temp2;
3571   uxtl2(vform, temp1, src1);
3572   uxtl2(vform, temp2, src2);
3573   sub(vform, dst, temp1, temp2);
3574   return dst;
3575 }
3576 
3577 
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3578 LogicVRegister Simulator::usubw(VectorFormat vform,
3579                                 LogicVRegister dst,
3580                                 const LogicVRegister& src1,
3581                                 const LogicVRegister& src2) {
3582   SimVRegister temp;
3583   uxtl(vform, temp, src2);
3584   sub(vform, dst, src1, temp);
3585   return dst;
3586 }
3587 
3588 
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3589 LogicVRegister Simulator::usubw2(VectorFormat vform,
3590                                  LogicVRegister dst,
3591                                  const LogicVRegister& src1,
3592                                  const LogicVRegister& src2) {
3593   SimVRegister temp;
3594   uxtl2(vform, temp, src2);
3595   sub(vform, dst, src1, temp);
3596   return dst;
3597 }
3598 
3599 
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3600 LogicVRegister Simulator::ssubl(VectorFormat vform,
3601                                 LogicVRegister dst,
3602                                 const LogicVRegister& src1,
3603                                 const LogicVRegister& src2) {
3604   SimVRegister temp1, temp2;
3605   sxtl(vform, temp1, src1);
3606   sxtl(vform, temp2, src2);
3607   sub(vform, dst, temp1, temp2);
3608   return dst;
3609 }
3610 
3611 
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3612 LogicVRegister Simulator::ssubl2(VectorFormat vform,
3613                                  LogicVRegister dst,
3614                                  const LogicVRegister& src1,
3615                                  const LogicVRegister& src2) {
3616   SimVRegister temp1, temp2;
3617   sxtl2(vform, temp1, src1);
3618   sxtl2(vform, temp2, src2);
3619   sub(vform, dst, temp1, temp2);
3620   return dst;
3621 }
3622 
3623 
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3624 LogicVRegister Simulator::ssubw(VectorFormat vform,
3625                                 LogicVRegister dst,
3626                                 const LogicVRegister& src1,
3627                                 const LogicVRegister& src2) {
3628   SimVRegister temp;
3629   sxtl(vform, temp, src2);
3630   sub(vform, dst, src1, temp);
3631   return dst;
3632 }
3633 
3634 
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3635 LogicVRegister Simulator::ssubw2(VectorFormat vform,
3636                                  LogicVRegister dst,
3637                                  const LogicVRegister& src1,
3638                                  const LogicVRegister& src2) {
3639   SimVRegister temp;
3640   sxtl2(vform, temp, src2);
3641   sub(vform, dst, src1, temp);
3642   return dst;
3643 }
3644 
3645 
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3646 LogicVRegister Simulator::uabal(VectorFormat vform,
3647                                 LogicVRegister dst,
3648                                 const LogicVRegister& src1,
3649                                 const LogicVRegister& src2) {
3650   SimVRegister temp1, temp2;
3651   uxtl(vform, temp1, src1);
3652   uxtl(vform, temp2, src2);
3653   uaba(vform, dst, temp1, temp2);
3654   return dst;
3655 }
3656 
3657 
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3658 LogicVRegister Simulator::uabal2(VectorFormat vform,
3659                                  LogicVRegister dst,
3660                                  const LogicVRegister& src1,
3661                                  const LogicVRegister& src2) {
3662   SimVRegister temp1, temp2;
3663   uxtl2(vform, temp1, src1);
3664   uxtl2(vform, temp2, src2);
3665   uaba(vform, dst, temp1, temp2);
3666   return dst;
3667 }
3668 
3669 
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3670 LogicVRegister Simulator::sabal(VectorFormat vform,
3671                                 LogicVRegister dst,
3672                                 const LogicVRegister& src1,
3673                                 const LogicVRegister& src2) {
3674   SimVRegister temp1, temp2;
3675   sxtl(vform, temp1, src1);
3676   sxtl(vform, temp2, src2);
3677   saba(vform, dst, temp1, temp2);
3678   return dst;
3679 }
3680 
3681 
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3682 LogicVRegister Simulator::sabal2(VectorFormat vform,
3683                                  LogicVRegister dst,
3684                                  const LogicVRegister& src1,
3685                                  const LogicVRegister& src2) {
3686   SimVRegister temp1, temp2;
3687   sxtl2(vform, temp1, src1);
3688   sxtl2(vform, temp2, src2);
3689   saba(vform, dst, temp1, temp2);
3690   return dst;
3691 }
3692 
3693 
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3694 LogicVRegister Simulator::uabdl(VectorFormat vform,
3695                                 LogicVRegister dst,
3696                                 const LogicVRegister& src1,
3697                                 const LogicVRegister& src2) {
3698   SimVRegister temp1, temp2;
3699   uxtl(vform, temp1, src1);
3700   uxtl(vform, temp2, src2);
3701   absdiff(vform, dst, temp1, temp2, false);
3702   return dst;
3703 }
3704 
3705 
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3706 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3707                                  LogicVRegister dst,
3708                                  const LogicVRegister& src1,
3709                                  const LogicVRegister& src2) {
3710   SimVRegister temp1, temp2;
3711   uxtl2(vform, temp1, src1);
3712   uxtl2(vform, temp2, src2);
3713   absdiff(vform, dst, temp1, temp2, false);
3714   return dst;
3715 }
3716 
3717 
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3718 LogicVRegister Simulator::sabdl(VectorFormat vform,
3719                                 LogicVRegister dst,
3720                                 const LogicVRegister& src1,
3721                                 const LogicVRegister& src2) {
3722   SimVRegister temp1, temp2;
3723   sxtl(vform, temp1, src1);
3724   sxtl(vform, temp2, src2);
3725   absdiff(vform, dst, temp1, temp2, true);
3726   return dst;
3727 }
3728 
3729 
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3730 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3731                                  LogicVRegister dst,
3732                                  const LogicVRegister& src1,
3733                                  const LogicVRegister& src2) {
3734   SimVRegister temp1, temp2;
3735   sxtl2(vform, temp1, src1);
3736   sxtl2(vform, temp2, src2);
3737   absdiff(vform, dst, temp1, temp2, true);
3738   return dst;
3739 }
3740 
3741 
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3742 LogicVRegister Simulator::umull(VectorFormat vform,
3743                                 LogicVRegister dst,
3744                                 const LogicVRegister& src1,
3745                                 const LogicVRegister& src2,
3746                                 bool is_2) {
3747   SimVRegister temp1, temp2;
3748   uxtl(vform, temp1, src1, is_2);
3749   uxtl(vform, temp2, src2, is_2);
3750   mul(vform, dst, temp1, temp2);
3751   return dst;
3752 }
3753 
3754 
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3755 LogicVRegister Simulator::umull2(VectorFormat vform,
3756                                  LogicVRegister dst,
3757                                  const LogicVRegister& src1,
3758                                  const LogicVRegister& src2) {
3759   return umull(vform, dst, src1, src2, /* is_2 = */ true);
3760 }
3761 
3762 
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3763 LogicVRegister Simulator::smull(VectorFormat vform,
3764                                 LogicVRegister dst,
3765                                 const LogicVRegister& src1,
3766                                 const LogicVRegister& src2,
3767                                 bool is_2) {
3768   SimVRegister temp1, temp2;
3769   sxtl(vform, temp1, src1, is_2);
3770   sxtl(vform, temp2, src2, is_2);
3771   mul(vform, dst, temp1, temp2);
3772   return dst;
3773 }
3774 
3775 
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3776 LogicVRegister Simulator::smull2(VectorFormat vform,
3777                                  LogicVRegister dst,
3778                                  const LogicVRegister& src1,
3779                                  const LogicVRegister& src2) {
3780   return smull(vform, dst, src1, src2, /* is_2 = */ true);
3781 }
3782 
3783 
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3784 LogicVRegister Simulator::umlsl(VectorFormat vform,
3785                                 LogicVRegister dst,
3786                                 const LogicVRegister& src1,
3787                                 const LogicVRegister& src2,
3788                                 bool is_2) {
3789   SimVRegister temp1, temp2;
3790   uxtl(vform, temp1, src1, is_2);
3791   uxtl(vform, temp2, src2, is_2);
3792   mls(vform, dst, dst, temp1, temp2);
3793   return dst;
3794 }
3795 
3796 
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3797 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3798                                  LogicVRegister dst,
3799                                  const LogicVRegister& src1,
3800                                  const LogicVRegister& src2) {
3801   return umlsl(vform, dst, src1, src2, /* is_2 = */ true);
3802 }
3803 
3804 
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3805 LogicVRegister Simulator::smlsl(VectorFormat vform,
3806                                 LogicVRegister dst,
3807                                 const LogicVRegister& src1,
3808                                 const LogicVRegister& src2,
3809                                 bool is_2) {
3810   SimVRegister temp1, temp2;
3811   sxtl(vform, temp1, src1, is_2);
3812   sxtl(vform, temp2, src2, is_2);
3813   mls(vform, dst, dst, temp1, temp2);
3814   return dst;
3815 }
3816 
3817 
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3818 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3819                                  LogicVRegister dst,
3820                                  const LogicVRegister& src1,
3821                                  const LogicVRegister& src2) {
3822   return smlsl(vform, dst, src1, src2, /* is_2 = */ true);
3823 }
3824 
3825 
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3826 LogicVRegister Simulator::umlal(VectorFormat vform,
3827                                 LogicVRegister dst,
3828                                 const LogicVRegister& src1,
3829                                 const LogicVRegister& src2,
3830                                 bool is_2) {
3831   SimVRegister temp1, temp2;
3832   uxtl(vform, temp1, src1, is_2);
3833   uxtl(vform, temp2, src2, is_2);
3834   mla(vform, dst, dst, temp1, temp2);
3835   return dst;
3836 }
3837 
3838 
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3839 LogicVRegister Simulator::umlal2(VectorFormat vform,
3840                                  LogicVRegister dst,
3841                                  const LogicVRegister& src1,
3842                                  const LogicVRegister& src2) {
3843   return umlal(vform, dst, src1, src2, /* is_2 = */ true);
3844 }
3845 
3846 
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3847 LogicVRegister Simulator::smlal(VectorFormat vform,
3848                                 LogicVRegister dst,
3849                                 const LogicVRegister& src1,
3850                                 const LogicVRegister& src2,
3851                                 bool is_2) {
3852   SimVRegister temp1, temp2;
3853   sxtl(vform, temp1, src1, is_2);
3854   sxtl(vform, temp2, src2, is_2);
3855   mla(vform, dst, dst, temp1, temp2);
3856   return dst;
3857 }
3858 
3859 
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3860 LogicVRegister Simulator::smlal2(VectorFormat vform,
3861                                  LogicVRegister dst,
3862                                  const LogicVRegister& src1,
3863                                  const LogicVRegister& src2) {
3864   return smlal(vform, dst, src1, src2, /* is_2 = */ true);
3865 }
3866 
3867 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3868 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3869                                   LogicVRegister dst,
3870                                   const LogicVRegister& src1,
3871                                   const LogicVRegister& src2,
3872                                   bool is_2) {
3873   SimVRegister temp;
3874   LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3875   return add(vform, dst, dst, product).SignedSaturate(vform);
3876 }
3877 
3878 
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3879 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3880                                    LogicVRegister dst,
3881                                    const LogicVRegister& src1,
3882                                    const LogicVRegister& src2) {
3883   return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);
3884 }
3885 
3886 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3887 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3888                                   LogicVRegister dst,
3889                                   const LogicVRegister& src1,
3890                                   const LogicVRegister& src2,
3891                                   bool is_2) {
3892   SimVRegister temp;
3893   LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3894   return sub(vform, dst, dst, product).SignedSaturate(vform);
3895 }
3896 
3897 
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3898 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3899                                    LogicVRegister dst,
3900                                    const LogicVRegister& src1,
3901                                    const LogicVRegister& src2) {
3902   return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);
3903 }
3904 
3905 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3906 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3907                                   LogicVRegister dst,
3908                                   const LogicVRegister& src1,
3909                                   const LogicVRegister& src2,
3910                                   bool is_2) {
3911   SimVRegister temp;
3912   LogicVRegister product = smull(vform, temp, src1, src2, is_2);
3913   return add(vform, dst, product, product).SignedSaturate(vform);
3914 }
3915 
3916 
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3917 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3918                                    LogicVRegister dst,
3919                                    const LogicVRegister& src1,
3920                                    const LogicVRegister& src2) {
3921   return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);
3922 }
3923 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3924 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3925                                    LogicVRegister dst,
3926                                    const LogicVRegister& src1,
3927                                    const LogicVRegister& src2,
3928                                    bool round) {
3929   int esize = LaneSizeInBitsFromFormat(vform);
3930 
3931   SimVRegister temp_lo, temp_hi;
3932 
3933   // Compute low and high multiplication results.
3934   mul(vform, temp_lo, src1, src2);
3935   smulh(vform, temp_hi, src1, src2);
3936 
3937   // Double by shifting high half, and adding in most-significant bit of low
3938   // half.
3939   shl(vform, temp_hi, temp_hi, 1);
3940   usra(vform, temp_hi, temp_lo, esize - 1);
3941 
3942   if (round) {
3943     // Add the second (due to doubling) most-significant bit of the low half
3944     // into the result.
3945     shl(vform, temp_lo, temp_lo, 1);
3946     usra(vform, temp_hi, temp_lo, esize - 1);
3947   }
3948 
3949   SimPRegister not_sat;
3950   LogicPRegister ptemp(not_sat);
3951   dst.ClearForWrite(vform);
3952   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3953     // Saturation only occurs when src1 = src2 = minimum representable value.
3954     // Check this as a special case.
3955     ptemp.SetActive(vform, i, true);
3956     if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&
3957         (src2.Int(vform, i) == MinIntFromFormat(vform))) {
3958       ptemp.SetActive(vform, i, false);
3959     }
3960     dst.SetInt(vform, i, MaxIntFromFormat(vform));
3961   }
3962 
3963   mov_merging(vform, dst, not_sat, temp_hi);
3964   return dst;
3965 }
3966 
3967 
dot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_src1_signed,bool is_src2_signed)3968 LogicVRegister Simulator::dot(VectorFormat vform,
3969                               LogicVRegister dst,
3970                               const LogicVRegister& src1,
3971                               const LogicVRegister& src2,
3972                               bool is_src1_signed,
3973                               bool is_src2_signed) {
3974   VectorFormat quarter_vform =
3975       VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
3976 
3977   dst.ClearForWrite(vform);
3978   for (int e = 0; e < LaneCountFromFormat(vform); e++) {
3979     uint64_t result = 0;
3980     int64_t element1, element2;
3981     for (int i = 0; i < 4; i++) {
3982       int index = 4 * e + i;
3983       if (is_src1_signed) {
3984         element1 = src1.Int(quarter_vform, index);
3985       } else {
3986         element1 = src1.Uint(quarter_vform, index);
3987       }
3988       if (is_src2_signed) {
3989         element2 = src2.Int(quarter_vform, index);
3990       } else {
3991         element2 = src2.Uint(quarter_vform, index);
3992       }
3993       result += element1 * element2;
3994     }
3995     dst.SetUint(vform, e, result + dst.Uint(vform, e));
3996   }
3997   return dst;
3998 }
3999 
4000 
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4001 LogicVRegister Simulator::sdot(VectorFormat vform,
4002                                LogicVRegister dst,
4003                                const LogicVRegister& src1,
4004                                const LogicVRegister& src2) {
4005   return dot(vform, dst, src1, src2, true, true);
4006 }
4007 
4008 
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4009 LogicVRegister Simulator::udot(VectorFormat vform,
4010                                LogicVRegister dst,
4011                                const LogicVRegister& src1,
4012                                const LogicVRegister& src2) {
4013   return dot(vform, dst, src1, src2, false, false);
4014 }
4015 
usdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4016 LogicVRegister Simulator::usdot(VectorFormat vform,
4017                                 LogicVRegister dst,
4018                                 const LogicVRegister& src1,
4019                                 const LogicVRegister& src2) {
4020   return dot(vform, dst, src1, src2, false, true);
4021 }
4022 
cdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & acc,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4023 LogicVRegister Simulator::cdot(VectorFormat vform,
4024                                LogicVRegister dst,
4025                                const LogicVRegister& acc,
4026                                const LogicVRegister& src1,
4027                                const LogicVRegister& src2,
4028                                int rot) {
4029   VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
4030   VectorFormat quarter_vform =
4031       VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4032 
4033   int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;
4034   int sel_b = 1 - sel_a;
4035   int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;
4036 
4037   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4038     int64_t result = acc.Int(vform, i);
4039     for (int j = 0; j < 2; j++) {
4040       int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);
4041       int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);
4042       int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);
4043       int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);
4044       result += (r1 * r2) + (sub_i * i1 * i2);
4045     }
4046     dst.SetInt(vform, i, result);
4047   }
4048   return dst;
4049 }
4050 
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4051 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4052                                     LogicVRegister dst,
4053                                     const LogicVRegister& srca,
4054                                     const LogicVRegister& src1,
4055                                     const LogicVRegister& src2,
4056                                     int rot) {
4057   SimVRegister src1_a, src1_b;
4058   SimVRegister src2_a, src2_b;
4059   SimVRegister srca_i, srca_r;
4060   SimVRegister zero, temp;
4061   zero.Clear();
4062 
4063   if ((rot == 0) || (rot == 180)) {
4064     uzp1(vform, src1_a, src1, zero);
4065     uzp1(vform, src2_a, src2, zero);
4066     uzp2(vform, src2_b, src2, zero);
4067   } else {
4068     uzp2(vform, src1_a, src1, zero);
4069     uzp2(vform, src2_a, src2, zero);
4070     uzp1(vform, src2_b, src2, zero);
4071   }
4072 
4073   uzp1(vform, srca_r, srca, zero);
4074   uzp2(vform, srca_i, srca, zero);
4075 
4076   bool sub_r = (rot == 90) || (rot == 180);
4077   bool sub_i = (rot == 180) || (rot == 270);
4078 
4079   const bool round = true;
4080   sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);
4081   sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);
4082   zip1(vform, dst, srca_r, srca_i);
4083   return dst;
4084 }
4085 
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)4086 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4087                                     LogicVRegister dst,
4088                                     const LogicVRegister& srca,
4089                                     const LogicVRegister& src1,
4090                                     const LogicVRegister& src2,
4091                                     int index,
4092                                     int rot) {
4093   SimVRegister temp;
4094   dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
4095   return sqrdcmlah(vform, dst, srca, src1, temp, rot);
4096 }
4097 
sqrdmlash_d(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4098 LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,
4099                                       LogicVRegister dst,
4100                                       const LogicVRegister& src1,
4101                                       const LogicVRegister& src2,
4102                                       bool round,
4103                                       bool sub_op) {
4104   // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.
4105   // To avoid this, we use:
4106   //     (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4107   // which is same as:
4108   //     (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4109 
4110   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4111   int esize = kDRegSize;
4112   vixl_uint128_t round_const, accum;
4113   round_const.first = 0;
4114   if (round) {
4115     round_const.second = UINT64_C(1) << (esize - 2);
4116   } else {
4117     round_const.second = 0;
4118   }
4119 
4120   dst.ClearForWrite(vform);
4121   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4122     // Shift the whole value left by `esize - 1` bits.
4123     accum.first = dst.Int(vform, i) >> 1;
4124     accum.second = dst.Int(vform, i) << (esize - 1);
4125 
4126     vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));
4127 
4128     if (sub_op) {
4129       product = Neg128(product);
4130     }
4131     accum = Add128(accum, product);
4132 
4133     // Perform rounding.
4134     accum = Add128(accum, round_const);
4135 
4136     // Arithmetic shift the whole value right by `esize - 1` bits.
4137     accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
4138     accum.first = -(accum.first >> (esize - 1));
4139 
4140     // Perform saturation.
4141     bool is_pos = (accum.first == 0) ? true : false;
4142     if (is_pos &&
4143         (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {
4144       accum.second = MaxIntFromFormat(vform);
4145     } else if (!is_pos && (accum.second <
4146                            static_cast<uint64_t>(MinIntFromFormat(vform)))) {
4147       accum.second = MinIntFromFormat(vform);
4148     }
4149 
4150     dst.SetInt(vform, i, accum.second);
4151   }
4152 
4153   return dst;
4154 }
4155 
sqrdmlash(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4156 LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4157                                     LogicVRegister dst,
4158                                     const LogicVRegister& src1,
4159                                     const LogicVRegister& src2,
4160                                     bool round,
4161                                     bool sub_op) {
4162   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4163   // To avoid this, we use:
4164   //     (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4165   // which is same as:
4166   //     (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4167 
4168   if (vform == kFormatVnD) {
4169     return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);
4170   }
4171 
4172   int esize = LaneSizeInBitsFromFormat(vform);
4173   int round_const = round ? (1 << (esize - 2)) : 0;
4174   int64_t accum;
4175 
4176   dst.ClearForWrite(vform);
4177   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4178     accum = dst.Int(vform, i) << (esize - 1);
4179     if (sub_op) {
4180       accum -= src1.Int(vform, i) * src2.Int(vform, i);
4181     } else {
4182       accum += src1.Int(vform, i) * src2.Int(vform, i);
4183     }
4184     accum += round_const;
4185     accum = accum >> (esize - 1);
4186 
4187     if (accum > MaxIntFromFormat(vform)) {
4188       accum = MaxIntFromFormat(vform);
4189     } else if (accum < MinIntFromFormat(vform)) {
4190       accum = MinIntFromFormat(vform);
4191     }
4192     dst.SetInt(vform, i, accum);
4193   }
4194   return dst;
4195 }
4196 
4197 
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4198 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4199                                    LogicVRegister dst,
4200                                    const LogicVRegister& src1,
4201                                    const LogicVRegister& src2,
4202                                    bool round) {
4203   return sqrdmlash(vform, dst, src1, src2, round, false);
4204 }
4205 
4206 
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4207 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4208                                    LogicVRegister dst,
4209                                    const LogicVRegister& src1,
4210                                    const LogicVRegister& src2,
4211                                    bool round) {
4212   return sqrdmlash(vform, dst, src1, src2, round, true);
4213 }
4214 
4215 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4216 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4217                                   LogicVRegister dst,
4218                                   const LogicVRegister& src1,
4219                                   const LogicVRegister& src2) {
4220   return sqrdmulh(vform, dst, src1, src2, false);
4221 }
4222 
4223 
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4224 LogicVRegister Simulator::addhn(VectorFormat vform,
4225                                 LogicVRegister dst,
4226                                 const LogicVRegister& src1,
4227                                 const LogicVRegister& src2) {
4228   SimVRegister temp;
4229   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4230   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4231   return dst;
4232 }
4233 
4234 
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4235 LogicVRegister Simulator::addhn2(VectorFormat vform,
4236                                  LogicVRegister dst,
4237                                  const LogicVRegister& src1,
4238                                  const LogicVRegister& src2) {
4239   SimVRegister temp;
4240   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4241   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4242   return dst;
4243 }
4244 
4245 
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4246 LogicVRegister Simulator::raddhn(VectorFormat vform,
4247                                  LogicVRegister dst,
4248                                  const LogicVRegister& src1,
4249                                  const LogicVRegister& src2) {
4250   SimVRegister temp;
4251   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4252   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4253   return dst;
4254 }
4255 
4256 
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4257 LogicVRegister Simulator::raddhn2(VectorFormat vform,
4258                                   LogicVRegister dst,
4259                                   const LogicVRegister& src1,
4260                                   const LogicVRegister& src2) {
4261   SimVRegister temp;
4262   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4263   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4264   return dst;
4265 }
4266 
4267 
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4268 LogicVRegister Simulator::subhn(VectorFormat vform,
4269                                 LogicVRegister dst,
4270                                 const LogicVRegister& src1,
4271                                 const LogicVRegister& src2) {
4272   SimVRegister temp;
4273   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4274   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4275   return dst;
4276 }
4277 
4278 
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4279 LogicVRegister Simulator::subhn2(VectorFormat vform,
4280                                  LogicVRegister dst,
4281                                  const LogicVRegister& src1,
4282                                  const LogicVRegister& src2) {
4283   SimVRegister temp;
4284   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4285   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4286   return dst;
4287 }
4288 
4289 
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4290 LogicVRegister Simulator::rsubhn(VectorFormat vform,
4291                                  LogicVRegister dst,
4292                                  const LogicVRegister& src1,
4293                                  const LogicVRegister& src2) {
4294   SimVRegister temp;
4295   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4296   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4297   return dst;
4298 }
4299 
4300 
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4301 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4302                                   LogicVRegister dst,
4303                                   const LogicVRegister& src1,
4304                                   const LogicVRegister& src2) {
4305   SimVRegister temp;
4306   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4307   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4308   return dst;
4309 }
4310 
4311 
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4312 LogicVRegister Simulator::trn1(VectorFormat vform,
4313                                LogicVRegister dst,
4314                                const LogicVRegister& src1,
4315                                const LogicVRegister& src2) {
4316   uint64_t result[kZRegMaxSizeInBytes] = {};
4317   int lane_count = LaneCountFromFormat(vform);
4318   int pairs = lane_count / 2;
4319   for (int i = 0; i < pairs; ++i) {
4320     result[2 * i] = src1.Uint(vform, 2 * i);
4321     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4322   }
4323 
4324   dst.ClearForWrite(vform);
4325   for (int i = 0; i < lane_count; ++i) {
4326     dst.SetUint(vform, i, result[i]);
4327   }
4328   return dst;
4329 }
4330 
4331 
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4332 LogicVRegister Simulator::trn2(VectorFormat vform,
4333                                LogicVRegister dst,
4334                                const LogicVRegister& src1,
4335                                const LogicVRegister& src2) {
4336   uint64_t result[kZRegMaxSizeInBytes] = {};
4337   int lane_count = LaneCountFromFormat(vform);
4338   int pairs = lane_count / 2;
4339   for (int i = 0; i < pairs; ++i) {
4340     result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4341     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4342   }
4343 
4344   dst.ClearForWrite(vform);
4345   for (int i = 0; i < lane_count; ++i) {
4346     dst.SetUint(vform, i, result[i]);
4347   }
4348   return dst;
4349 }
4350 
4351 
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4352 LogicVRegister Simulator::zip1(VectorFormat vform,
4353                                LogicVRegister dst,
4354                                const LogicVRegister& src1,
4355                                const LogicVRegister& src2) {
4356   uint64_t result[kZRegMaxSizeInBytes] = {};
4357   int lane_count = LaneCountFromFormat(vform);
4358   int pairs = lane_count / 2;
4359   for (int i = 0; i < pairs; ++i) {
4360     result[2 * i] = src1.Uint(vform, i);
4361     result[(2 * i) + 1] = src2.Uint(vform, i);
4362   }
4363 
4364   dst.ClearForWrite(vform);
4365   for (int i = 0; i < lane_count; ++i) {
4366     dst.SetUint(vform, i, result[i]);
4367   }
4368   return dst;
4369 }
4370 
4371 
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4372 LogicVRegister Simulator::zip2(VectorFormat vform,
4373                                LogicVRegister dst,
4374                                const LogicVRegister& src1,
4375                                const LogicVRegister& src2) {
4376   uint64_t result[kZRegMaxSizeInBytes] = {};
4377   int lane_count = LaneCountFromFormat(vform);
4378   int pairs = lane_count / 2;
4379   for (int i = 0; i < pairs; ++i) {
4380     result[2 * i] = src1.Uint(vform, pairs + i);
4381     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4382   }
4383 
4384   dst.ClearForWrite(vform);
4385   for (int i = 0; i < lane_count; ++i) {
4386     dst.SetUint(vform, i, result[i]);
4387   }
4388   return dst;
4389 }
4390 
4391 
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4392 LogicVRegister Simulator::uzp1(VectorFormat vform,
4393                                LogicVRegister dst,
4394                                const LogicVRegister& src1,
4395                                const LogicVRegister& src2) {
4396   uint64_t result[kZRegMaxSizeInBytes * 2];
4397   int lane_count = LaneCountFromFormat(vform);
4398   for (int i = 0; i < lane_count; ++i) {
4399     result[i] = src1.Uint(vform, i);
4400     result[lane_count + i] = src2.Uint(vform, i);
4401   }
4402 
4403   dst.ClearForWrite(vform);
4404   for (int i = 0; i < lane_count; ++i) {
4405     dst.SetUint(vform, i, result[2 * i]);
4406   }
4407   return dst;
4408 }
4409 
4410 
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4411 LogicVRegister Simulator::uzp2(VectorFormat vform,
4412                                LogicVRegister dst,
4413                                const LogicVRegister& src1,
4414                                const LogicVRegister& src2) {
4415   uint64_t result[kZRegMaxSizeInBytes * 2];
4416   int lane_count = LaneCountFromFormat(vform);
4417   for (int i = 0; i < lane_count; ++i) {
4418     result[i] = src1.Uint(vform, i);
4419     result[lane_count + i] = src2.Uint(vform, i);
4420   }
4421 
4422   dst.ClearForWrite(vform);
4423   for (int i = 0; i < lane_count; ++i) {
4424     dst.SetUint(vform, i, result[(2 * i) + 1]);
4425   }
4426   return dst;
4427 }
4428 
interleave_top_bottom(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4429 LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,
4430                                                 LogicVRegister dst,
4431                                                 const LogicVRegister& src) {
4432   // Interleave the top and bottom half of a vector, ie. for a vector:
4433   //
4434   //   [ ... | F | D | B | ... | E | C | A ]
4435   //
4436   // where B is the first element in the top half of the vector, produce a
4437   // result vector:
4438   //
4439   //   [ ... | ... | F | E | D | C | B | A ]
4440 
4441   uint64_t result[kZRegMaxSizeInBytes] = {};
4442   int lane_count = LaneCountFromFormat(vform);
4443   for (int i = 0; i < lane_count; i += 2) {
4444     result[i] = src.Uint(vform, i / 2);
4445     result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));
4446   }
4447   dst.SetUintArray(vform, result);
4448   return dst;
4449 }
4450 
4451 template <typename T>
FPNeg(T op)4452 T Simulator::FPNeg(T op) {
4453   return -op;
4454 }
4455 
4456 template <typename T>
FPAdd(T op1,T op2)4457 T Simulator::FPAdd(T op1, T op2) {
4458   T result = FPProcessNaNs(op1, op2);
4459   if (IsNaN(result)) {
4460     return result;
4461   }
4462 
4463   if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4464     // inf + -inf returns the default NaN.
4465     FPProcessException();
4466     return FPDefaultNaN<T>();
4467   } else {
4468     // Other cases should be handled by standard arithmetic.
4469     return op1 + op2;
4470   }
4471 }
4472 
4473 
4474 template <typename T>
FPSub(T op1,T op2)4475 T Simulator::FPSub(T op1, T op2) {
4476   // NaNs should be handled elsewhere.
4477   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4478 
4479   if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4480     // inf - inf returns the default NaN.
4481     FPProcessException();
4482     return FPDefaultNaN<T>();
4483   } else {
4484     // Other cases should be handled by standard arithmetic.
4485     return op1 - op2;
4486   }
4487 }
4488 
4489 template <typename T>
FPMulNaNs(T op1,T op2)4490 T Simulator::FPMulNaNs(T op1, T op2) {
4491   T result = FPProcessNaNs(op1, op2);
4492   return IsNaN(result) ? result : FPMul(op1, op2);
4493 }
4494 
4495 template <typename T>
FPMul(T op1,T op2)4496 T Simulator::FPMul(T op1, T op2) {
4497   // NaNs should be handled elsewhere.
4498   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4499 
4500   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4501     // inf * 0.0 returns the default NaN.
4502     FPProcessException();
4503     return FPDefaultNaN<T>();
4504   } else {
4505     // Other cases should be handled by standard arithmetic.
4506     return op1 * op2;
4507   }
4508 }
4509 
4510 
4511 template <typename T>
FPMulx(T op1,T op2)4512 T Simulator::FPMulx(T op1, T op2) {
4513   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4514     // inf * 0.0 returns +/-2.0.
4515     T two = 2.0;
4516     return copysign(1.0, op1) * copysign(1.0, op2) * two;
4517   }
4518   return FPMul(op1, op2);
4519 }
4520 
4521 
4522 template <typename T>
FPMulAdd(T a,T op1,T op2)4523 T Simulator::FPMulAdd(T a, T op1, T op2) {
4524   T result = FPProcessNaNs3(a, op1, op2);
4525 
4526   T sign_a = copysign(1.0, a);
4527   T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
4528   bool isinf_prod = IsInf(op1) || IsInf(op2);
4529   bool operation_generates_nan =
4530       (IsInf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
4531       (IsInf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
4532       (IsInf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
4533 
4534   if (IsNaN(result)) {
4535     // Generated NaNs override quiet NaNs propagated from a.
4536     if (operation_generates_nan && IsQuietNaN(a)) {
4537       FPProcessException();
4538       return FPDefaultNaN<T>();
4539     } else {
4540       return result;
4541     }
4542   }
4543 
4544   // If the operation would produce a NaN, return the default NaN.
4545   if (operation_generates_nan) {
4546     FPProcessException();
4547     return FPDefaultNaN<T>();
4548   }
4549 
4550   // Work around broken fma implementations for exact zero results: The sign of
4551   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4552   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4553     return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
4554   }
4555 
4556   result = FusedMultiplyAdd(op1, op2, a);
4557   VIXL_ASSERT(!IsNaN(result));
4558 
4559   // Work around broken fma implementations for rounded zero results: If a is
4560   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4561   if ((a == 0.0) && (result == 0.0)) {
4562     return copysign(0.0, sign_prod);
4563   }
4564 
4565   return result;
4566 }
4567 
4568 template float Simulator::FPMulAdd(float a, float op1, float op2);
4569 
4570 template double Simulator::FPMulAdd(double a, double op1, double op2);
4571 
4572 template <typename T>
FPDiv(T op1,T op2)4573 T Simulator::FPDiv(T op1, T op2) {
4574   // NaNs should be handled elsewhere.
4575   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4576 
4577   if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4578     // inf / inf and 0.0 / 0.0 return the default NaN.
4579     FPProcessException();
4580     return FPDefaultNaN<T>();
4581   } else {
4582     if (op2 == 0.0) {
4583       FPProcessException();
4584       if (!IsNaN(op1)) {
4585         double op1_sign = copysign(1.0, op1);
4586         double op2_sign = copysign(1.0, op2);
4587         return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4588       }
4589     }
4590 
4591     // Other cases should be handled by standard arithmetic.
4592     return op1 / op2;
4593   }
4594 }
4595 
4596 
4597 template <typename T>
FPSqrt(T op)4598 T Simulator::FPSqrt(T op) {
4599   if (IsNaN(op)) {
4600     return FPProcessNaN(op);
4601   } else if (op < T(0.0)) {
4602     FPProcessException();
4603     return FPDefaultNaN<T>();
4604   } else {
4605     return sqrt(op);
4606   }
4607 }
4608 
4609 
4610 template <typename T>
FPMax(T a,T b)4611 T Simulator::FPMax(T a, T b) {
4612   T result = FPProcessNaNs(a, b);
4613   if (IsNaN(result)) return result;
4614 
4615   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4616     // a and b are zero, and the sign differs: return +0.0.
4617     return 0.0;
4618   } else {
4619     return (a > b) ? a : b;
4620   }
4621 }
4622 
4623 
4624 template <typename T>
FPMaxNM(T a,T b)4625 T Simulator::FPMaxNM(T a, T b) {
4626   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4627     a = kFP64NegativeInfinity;
4628   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4629     b = kFP64NegativeInfinity;
4630   }
4631 
4632   T result = FPProcessNaNs(a, b);
4633   return IsNaN(result) ? result : FPMax(a, b);
4634 }
4635 
4636 
4637 template <typename T>
FPMin(T a,T b)4638 T Simulator::FPMin(T a, T b) {
4639   T result = FPProcessNaNs(a, b);
4640   if (IsNaN(result)) return result;
4641 
4642   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4643     // a and b are zero, and the sign differs: return -0.0.
4644     return -0.0;
4645   } else {
4646     return (a < b) ? a : b;
4647   }
4648 }
4649 
4650 
4651 template <typename T>
FPMinNM(T a,T b)4652 T Simulator::FPMinNM(T a, T b) {
4653   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4654     a = kFP64PositiveInfinity;
4655   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4656     b = kFP64PositiveInfinity;
4657   }
4658 
4659   T result = FPProcessNaNs(a, b);
4660   return IsNaN(result) ? result : FPMin(a, b);
4661 }
4662 
4663 
4664 template <typename T>
FPRecipStepFused(T op1,T op2)4665 T Simulator::FPRecipStepFused(T op1, T op2) {
4666   const T two = 2.0;
4667   if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4668     return two;
4669   } else if (IsInf(op1) || IsInf(op2)) {
4670     // Return +inf if signs match, otherwise -inf.
4671     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4672                                           : kFP64NegativeInfinity;
4673   } else {
4674     return FusedMultiplyAdd(op1, op2, two);
4675   }
4676 }
4677 
4678 template <typename T>
IsNormal(T value)4679 bool IsNormal(T value) {
4680   return std::isnormal(value);
4681 }
4682 
4683 template <>
IsNormal(SimFloat16 value)4684 bool IsNormal(SimFloat16 value) {
4685   uint16_t rawbits = Float16ToRawbits(value);
4686   uint16_t exp_mask = 0x7c00;
4687   // Check that the exponent is neither all zeroes or all ones.
4688   return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4689 }
4690 
4691 
4692 template <typename T>
FPRSqrtStepFused(T op1,T op2)4693 T Simulator::FPRSqrtStepFused(T op1, T op2) {
4694   const T one_point_five = 1.5;
4695   const T two = 2.0;
4696 
4697   if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4698     return one_point_five;
4699   } else if (IsInf(op1) || IsInf(op2)) {
4700     // Return +inf if signs match, otherwise -inf.
4701     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4702                                           : kFP64NegativeInfinity;
4703   } else {
4704     // The multiply-add-halve operation must be fully fused, so avoid interim
4705     // rounding by checking which operand can be losslessly divided by two
4706     // before doing the multiply-add.
4707     if (IsNormal(op1 / two)) {
4708       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4709     } else if (IsNormal(op2 / two)) {
4710       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4711     } else {
4712       // Neither operand is normal after halving: the result is dominated by
4713       // the addition term, so just return that.
4714       return one_point_five;
4715     }
4716   }
4717 }
4718 
FPToFixedJS(double value)4719 int32_t Simulator::FPToFixedJS(double value) {
4720   // The Z-flag is set when the conversion from double precision floating-point
4721   // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4722   // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4723   // Z-flag is unset.
4724   int Z = 1;
4725   int32_t result;
4726 
4727   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4728       (value == kFP64NegativeInfinity)) {
4729     // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4730     // unset the Z-flag.
4731     result = 0.0;
4732     if ((value != 0.0) || std::signbit(value)) {
4733       Z = 0;
4734     }
4735   } else if (std::isnan(value)) {
4736     // NaN values unset the Z-flag and set the result to 0.
4737     FPProcessNaN(value);
4738     result = 0;
4739     Z = 0;
4740   } else {
4741     // All other values are converted to an integer representation, rounded
4742     // toward zero.
4743     double int_result = std::floor(value);
4744     double error = value - int_result;
4745 
4746     if ((error != 0.0) && (int_result < 0.0)) {
4747       int_result++;
4748     }
4749 
4750     // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4751     // write a one-liner with std::round, but the behaviour on ties is incorrect
4752     // for our purposes.
4753     double mod_const = static_cast<double>(UINT64_C(1) << 32);
4754     double mod_error =
4755         (int_result / mod_const) - std::floor(int_result / mod_const);
4756     double constrained;
4757     if (mod_error == 0.5) {
4758       constrained = INT32_MIN;
4759     } else {
4760       constrained = int_result - mod_const * round(int_result / mod_const);
4761     }
4762 
4763     VIXL_ASSERT(std::floor(constrained) == constrained);
4764     VIXL_ASSERT(constrained >= INT32_MIN);
4765     VIXL_ASSERT(constrained <= INT32_MAX);
4766 
4767     // Take the bottom 32 bits of the result as a 32-bit integer.
4768     result = static_cast<int32_t>(constrained);
4769 
4770     if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4771         (error != 0.0)) {
4772       // If the integer result is out of range or the conversion isn't exact,
4773       // take exception and unset the Z-flag.
4774       FPProcessException();
4775       Z = 0;
4776     }
4777   }
4778 
4779   ReadNzcv().SetN(0);
4780   ReadNzcv().SetZ(Z);
4781   ReadNzcv().SetC(0);
4782   ReadNzcv().SetV(0);
4783 
4784   return result;
4785 }
4786 
FPRoundIntCommon(double value,FPRounding round_mode)4787 double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4788   VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4789               (value != kFP64NegativeInfinity));
4790   VIXL_ASSERT(!IsNaN(value));
4791 
4792   double int_result = std::floor(value);
4793   double error = value - int_result;
4794   switch (round_mode) {
4795     case FPTieAway: {
4796       // Take care of correctly handling the range ]-0.5, -0.0], which must
4797       // yield -0.0.
4798       if ((-0.5 < value) && (value < 0.0)) {
4799         int_result = -0.0;
4800 
4801       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4802         // If the error is greater than 0.5, or is equal to 0.5 and the integer
4803         // result is positive, round up.
4804         int_result++;
4805       }
4806       break;
4807     }
4808     case FPTieEven: {
4809       // Take care of correctly handling the range [-0.5, -0.0], which must
4810       // yield -0.0.
4811       if ((-0.5 <= value) && (value < 0.0)) {
4812         int_result = -0.0;
4813 
4814         // If the error is greater than 0.5, or is equal to 0.5 and the integer
4815         // result is odd, round up.
4816       } else if ((error > 0.5) ||
4817                  ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4818         int_result++;
4819       }
4820       break;
4821     }
4822     case FPZero: {
4823       // If value>0 then we take floor(value)
4824       // otherwise, ceil(value).
4825       if (value < 0) {
4826         int_result = ceil(value);
4827       }
4828       break;
4829     }
4830     case FPNegativeInfinity: {
4831       // We always use floor(value).
4832       break;
4833     }
4834     case FPPositiveInfinity: {
4835       // Take care of correctly handling the range ]-1.0, -0.0], which must
4836       // yield -0.0.
4837       if ((-1.0 < value) && (value < 0.0)) {
4838         int_result = -0.0;
4839 
4840         // If the error is non-zero, round up.
4841       } else if (error > 0.0) {
4842         int_result++;
4843       }
4844       break;
4845     }
4846     default:
4847       VIXL_UNIMPLEMENTED();
4848   }
4849   return int_result;
4850 }
4851 
FPRoundInt(double value,FPRounding round_mode)4852 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4853   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4854       (value == kFP64NegativeInfinity)) {
4855     return value;
4856   } else if (IsNaN(value)) {
4857     return FPProcessNaN(value);
4858   }
4859   return FPRoundIntCommon(value, round_mode);
4860 }
4861 
FPRoundInt(double value,FPRounding round_mode,FrintMode frint_mode)4862 double Simulator::FPRoundInt(double value,
4863                              FPRounding round_mode,
4864                              FrintMode frint_mode) {
4865   if (frint_mode == kFrintToInteger) {
4866     return FPRoundInt(value, round_mode);
4867   }
4868 
4869   VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4870 
4871   if (value == 0.0) {
4872     return value;
4873   }
4874 
4875   if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4876       IsNaN(value)) {
4877     if (frint_mode == kFrintToInt32) {
4878       return INT32_MIN;
4879     } else {
4880       return INT64_MIN;
4881     }
4882   }
4883 
4884   double result = FPRoundIntCommon(value, round_mode);
4885 
4886   // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4887   // representable as a double, and is rounded to (INT64_MAX + 1) when
4888   // converted. To avoid this, we compare `result >= int64_max_plus_one`
4889   // instead; this is safe because `result` is known to be integral, and
4890   // `int64_max_plus_one` is exactly representable as a double.
4891   constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4892   VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4893                          int64_max_plus_one)) == int64_max_plus_one);
4894 
4895   if (frint_mode == kFrintToInt32) {
4896     if ((result > INT32_MAX) || (result < INT32_MIN)) {
4897       return INT32_MIN;
4898     }
4899   } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4900     return INT64_MIN;
4901   }
4902 
4903   return result;
4904 }
4905 
FPToInt16(double value,FPRounding rmode)4906 int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4907   value = FPRoundInt(value, rmode);
4908   if (value >= kHMaxInt) {
4909     return kHMaxInt;
4910   } else if (value < kHMinInt) {
4911     return kHMinInt;
4912   }
4913   return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4914 }
4915 
4916 
FPToInt32(double value,FPRounding rmode)4917 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4918   value = FPRoundInt(value, rmode);
4919   if (value >= kWMaxInt) {
4920     return kWMaxInt;
4921   } else if (value < kWMinInt) {
4922     return kWMinInt;
4923   }
4924   return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4925 }
4926 
4927 
FPToInt64(double value,FPRounding rmode)4928 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4929   value = FPRoundInt(value, rmode);
4930   // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
4931   // as a result of kMaxInt not being representable as a double.
4932   if (value >= 9223372036854775808.) {
4933     return kXMaxInt;
4934   } else if (value < kXMinInt) {
4935     return kXMinInt;
4936   }
4937   return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4938 }
4939 
4940 
FPToUInt16(double value,FPRounding rmode)4941 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4942   value = FPRoundInt(value, rmode);
4943   if (value >= kHMaxUInt) {
4944     return kHMaxUInt;
4945   } else if (value < 0.0) {
4946     return 0;
4947   }
4948   return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4949 }
4950 
4951 
FPToUInt32(double value,FPRounding rmode)4952 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4953   value = FPRoundInt(value, rmode);
4954   if (value >= kWMaxUInt) {
4955     return kWMaxUInt;
4956   } else if (value < 0.0) {
4957     return 0;
4958   }
4959   return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
4960 }
4961 
4962 
FPToUInt64(double value,FPRounding rmode)4963 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
4964   value = FPRoundInt(value, rmode);
4965   // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
4966   // as a result of kMaxUInt not being representable as a double.
4967   if (value >= 18446744073709551616.) {
4968     return kXMaxUInt;
4969   } else if (value < 0.0) {
4970     return 0;
4971   }
4972   return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
4973 }
4974 
4975 
4976 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
4977   template <typename T>                                          \
4978   LogicVRegister Simulator::FN(VectorFormat vform,               \
4979                                LogicVRegister dst,               \
4980                                const LogicVRegister& src1,       \
4981                                const LogicVRegister& src2) {     \
4982     dst.ClearForWrite(vform);                                    \
4983     for (int i = 0; i < LaneCountFromFormat(vform); i++) {       \
4984       T op1 = src1.Float<T>(i);                                  \
4985       T op2 = src2.Float<T>(i);                                  \
4986       T result;                                                  \
4987       if (PROCNAN) {                                             \
4988         result = FPProcessNaNs(op1, op2);                        \
4989         if (!IsNaN(result)) {                                    \
4990           result = OP(op1, op2);                                 \
4991         }                                                        \
4992       } else {                                                   \
4993         result = OP(op1, op2);                                   \
4994       }                                                          \
4995       dst.SetFloat(vform, i, result);                            \
4996     }                                                            \
4997     return dst;                                                  \
4998   }                                                              \
4999                                                                  \
5000   LogicVRegister Simulator::FN(VectorFormat vform,               \
5001                                LogicVRegister dst,               \
5002                                const LogicVRegister& src1,       \
5003                                const LogicVRegister& src2) {     \
5004     if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {          \
5005       FN<SimFloat16>(vform, dst, src1, src2);                    \
5006     } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {   \
5007       FN<float>(vform, dst, src1, src2);                         \
5008     } else {                                                     \
5009       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
5010       FN<double>(vform, dst, src1, src2);                        \
5011     }                                                            \
5012     return dst;                                                  \
5013   }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)5014 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
5015 #undef DEFINE_NEON_FP_VECTOR_OP
5016 
5017 
5018 LogicVRegister Simulator::fnmul(VectorFormat vform,
5019                                 LogicVRegister dst,
5020                                 const LogicVRegister& src1,
5021                                 const LogicVRegister& src2) {
5022   SimVRegister temp;
5023   LogicVRegister product = fmul(vform, temp, src1, src2);
5024   return fneg(vform, dst, product);
5025 }
5026 
5027 
5028 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5029 LogicVRegister Simulator::frecps(VectorFormat vform,
5030                                  LogicVRegister dst,
5031                                  const LogicVRegister& src1,
5032                                  const LogicVRegister& src2) {
5033   dst.ClearForWrite(vform);
5034   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5035     T op1 = -src1.Float<T>(i);
5036     T op2 = src2.Float<T>(i);
5037     T result = FPProcessNaNs(op1, op2);
5038     dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
5039   }
5040   return dst;
5041 }
5042 
5043 
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5044 LogicVRegister Simulator::frecps(VectorFormat vform,
5045                                  LogicVRegister dst,
5046                                  const LogicVRegister& src1,
5047                                  const LogicVRegister& src2) {
5048   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5049     frecps<SimFloat16>(vform, dst, src1, src2);
5050   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5051     frecps<float>(vform, dst, src1, src2);
5052   } else {
5053     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5054     frecps<double>(vform, dst, src1, src2);
5055   }
5056   return dst;
5057 }
5058 
5059 
5060 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5061 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5062                                   LogicVRegister dst,
5063                                   const LogicVRegister& src1,
5064                                   const LogicVRegister& src2) {
5065   dst.ClearForWrite(vform);
5066   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5067     T op1 = -src1.Float<T>(i);
5068     T op2 = src2.Float<T>(i);
5069     T result = FPProcessNaNs(op1, op2);
5070     dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
5071   }
5072   return dst;
5073 }
5074 
5075 
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5076 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5077                                   LogicVRegister dst,
5078                                   const LogicVRegister& src1,
5079                                   const LogicVRegister& src2) {
5080   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5081     frsqrts<SimFloat16>(vform, dst, src1, src2);
5082   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5083     frsqrts<float>(vform, dst, src1, src2);
5084   } else {
5085     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5086     frsqrts<double>(vform, dst, src1, src2);
5087   }
5088   return dst;
5089 }
5090 
5091 
5092 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5093 LogicVRegister Simulator::fcmp(VectorFormat vform,
5094                                LogicVRegister dst,
5095                                const LogicVRegister& src1,
5096                                const LogicVRegister& src2,
5097                                Condition cond) {
5098   dst.ClearForWrite(vform);
5099   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5100     bool result = false;
5101     T op1 = src1.Float<T>(i);
5102     T op2 = src2.Float<T>(i);
5103     bool unordered = IsNaN(FPProcessNaNs(op1, op2));
5104 
5105     switch (cond) {
5106       case eq:
5107         result = (op1 == op2);
5108         break;
5109       case ge:
5110         result = (op1 >= op2);
5111         break;
5112       case gt:
5113         result = (op1 > op2);
5114         break;
5115       case le:
5116         result = (op1 <= op2);
5117         break;
5118       case lt:
5119         result = (op1 < op2);
5120         break;
5121       case ne:
5122         result = (op1 != op2);
5123         break;
5124       case uo:
5125         result = unordered;
5126         break;
5127       default:
5128         // Other conditions are defined in terms of those above.
5129         VIXL_UNREACHABLE();
5130         break;
5131     }
5132 
5133     if (result && unordered) {
5134       // Only `uo` and `ne` can be true for unordered comparisons.
5135       VIXL_ASSERT((cond == uo) || (cond == ne));
5136     }
5137 
5138     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
5139   }
5140   return dst;
5141 }
5142 
5143 
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5144 LogicVRegister Simulator::fcmp(VectorFormat vform,
5145                                LogicVRegister dst,
5146                                const LogicVRegister& src1,
5147                                const LogicVRegister& src2,
5148                                Condition cond) {
5149   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5150     fcmp<SimFloat16>(vform, dst, src1, src2, cond);
5151   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5152     fcmp<float>(vform, dst, src1, src2, cond);
5153   } else {
5154     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5155     fcmp<double>(vform, dst, src1, src2, cond);
5156   }
5157   return dst;
5158 }
5159 
5160 
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)5161 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
5162                                     LogicVRegister dst,
5163                                     const LogicVRegister& src,
5164                                     Condition cond) {
5165   SimVRegister temp;
5166   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5167     LogicVRegister zero_reg =
5168         dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
5169     fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
5170   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5171     LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
5172     fcmp<float>(vform, dst, src, zero_reg, cond);
5173   } else {
5174     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5175     LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5176     fcmp<double>(vform, dst, src, zero_reg, cond);
5177   }
5178   return dst;
5179 }
5180 
5181 
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5182 LogicVRegister Simulator::fabscmp(VectorFormat vform,
5183                                   LogicVRegister dst,
5184                                   const LogicVRegister& src1,
5185                                   const LogicVRegister& src2,
5186                                   Condition cond) {
5187   SimVRegister temp1, temp2;
5188   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5189     LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5190     LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5191     fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5192   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5193     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5194     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5195     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5196   } else {
5197     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5198     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5199     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5200     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5201   }
5202   return dst;
5203 }
5204 
5205 
5206 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5207 LogicVRegister Simulator::fmla(VectorFormat vform,
5208                                LogicVRegister dst,
5209                                const LogicVRegister& srca,
5210                                const LogicVRegister& src1,
5211                                const LogicVRegister& src2) {
5212   dst.ClearForWrite(vform);
5213   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5214     T op1 = src1.Float<T>(i);
5215     T op2 = src2.Float<T>(i);
5216     T acc = srca.Float<T>(i);
5217     T result = FPMulAdd(acc, op1, op2);
5218     dst.SetFloat(vform, i, result);
5219   }
5220   return dst;
5221 }
5222 
5223 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5224 LogicVRegister Simulator::fmla(VectorFormat vform,
5225                                LogicVRegister dst,
5226                                const LogicVRegister& srca,
5227                                const LogicVRegister& src1,
5228                                const LogicVRegister& src2) {
5229   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5230     fmla<SimFloat16>(vform, dst, srca, src1, src2);
5231   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5232     fmla<float>(vform, dst, srca, src1, src2);
5233   } else {
5234     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5235     fmla<double>(vform, dst, srca, src1, src2);
5236   }
5237   return dst;
5238 }
5239 
5240 
5241 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5242 LogicVRegister Simulator::fmls(VectorFormat vform,
5243                                LogicVRegister dst,
5244                                const LogicVRegister& srca,
5245                                const LogicVRegister& src1,
5246                                const LogicVRegister& src2) {
5247   dst.ClearForWrite(vform);
5248   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5249     T op1 = -src1.Float<T>(i);
5250     T op2 = src2.Float<T>(i);
5251     T acc = srca.Float<T>(i);
5252     T result = FPMulAdd(acc, op1, op2);
5253     dst.SetFloat(i, result);
5254   }
5255   return dst;
5256 }
5257 
5258 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5259 LogicVRegister Simulator::fmls(VectorFormat vform,
5260                                LogicVRegister dst,
5261                                const LogicVRegister& srca,
5262                                const LogicVRegister& src1,
5263                                const LogicVRegister& src2) {
5264   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5265     fmls<SimFloat16>(vform, dst, srca, src1, src2);
5266   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5267     fmls<float>(vform, dst, srca, src1, src2);
5268   } else {
5269     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5270     fmls<double>(vform, dst, srca, src1, src2);
5271   }
5272   return dst;
5273 }
5274 
5275 
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5276 LogicVRegister Simulator::fmlal(VectorFormat vform,
5277                                 LogicVRegister dst,
5278                                 const LogicVRegister& src1,
5279                                 const LogicVRegister& src2) {
5280   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5281   dst.ClearForWrite(vform);
5282   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5283     float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5284     float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5285     float acc = dst.Float<float>(i);
5286     float result = FPMulAdd(acc, op1, op2);
5287     dst.SetFloat(i, result);
5288   }
5289   return dst;
5290 }
5291 
5292 
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5293 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5294                                  LogicVRegister dst,
5295                                  const LogicVRegister& src1,
5296                                  const LogicVRegister& src2) {
5297   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5298   dst.ClearForWrite(vform);
5299   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5300     int src = i + LaneCountFromFormat(vform);
5301     float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5302     float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5303     float acc = dst.Float<float>(i);
5304     float result = FPMulAdd(acc, op1, op2);
5305     dst.SetFloat(i, result);
5306   }
5307   return dst;
5308 }
5309 
5310 
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5311 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5312                                 LogicVRegister dst,
5313                                 const LogicVRegister& src1,
5314                                 const LogicVRegister& src2) {
5315   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5316   dst.ClearForWrite(vform);
5317   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5318     float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5319     float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5320     float acc = dst.Float<float>(i);
5321     float result = FPMulAdd(acc, op1, op2);
5322     dst.SetFloat(i, result);
5323   }
5324   return dst;
5325 }
5326 
5327 
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5328 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5329                                  LogicVRegister dst,
5330                                  const LogicVRegister& src1,
5331                                  const LogicVRegister& src2) {
5332   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5333   dst.ClearForWrite(vform);
5334   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5335     int src = i + LaneCountFromFormat(vform);
5336     float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5337     float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5338     float acc = dst.Float<float>(i);
5339     float result = FPMulAdd(acc, op1, op2);
5340     dst.SetFloat(i, result);
5341   }
5342   return dst;
5343 }
5344 
5345 
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5346 LogicVRegister Simulator::fmlal(VectorFormat vform,
5347                                 LogicVRegister dst,
5348                                 const LogicVRegister& src1,
5349                                 const LogicVRegister& src2,
5350                                 int index) {
5351   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5352   dst.ClearForWrite(vform);
5353   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5354   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5355     float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5356     float acc = dst.Float<float>(i);
5357     float result = FPMulAdd(acc, op1, op2);
5358     dst.SetFloat(i, result);
5359   }
5360   return dst;
5361 }
5362 
5363 
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5364 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5365                                  LogicVRegister dst,
5366                                  const LogicVRegister& src1,
5367                                  const LogicVRegister& src2,
5368                                  int index) {
5369   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5370   dst.ClearForWrite(vform);
5371   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5372   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5373     int src = i + LaneCountFromFormat(vform);
5374     float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5375     float acc = dst.Float<float>(i);
5376     float result = FPMulAdd(acc, op1, op2);
5377     dst.SetFloat(i, result);
5378   }
5379   return dst;
5380 }
5381 
5382 
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5383 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5384                                 LogicVRegister dst,
5385                                 const LogicVRegister& src1,
5386                                 const LogicVRegister& src2,
5387                                 int index) {
5388   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5389   dst.ClearForWrite(vform);
5390   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5391   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5392     float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5393     float acc = dst.Float<float>(i);
5394     float result = FPMulAdd(acc, op1, op2);
5395     dst.SetFloat(i, result);
5396   }
5397   return dst;
5398 }
5399 
5400 
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5401 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5402                                  LogicVRegister dst,
5403                                  const LogicVRegister& src1,
5404                                  const LogicVRegister& src2,
5405                                  int index) {
5406   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5407   dst.ClearForWrite(vform);
5408   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5409   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5410     int src = i + LaneCountFromFormat(vform);
5411     float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5412     float acc = dst.Float<float>(i);
5413     float result = FPMulAdd(acc, op1, op2);
5414     dst.SetFloat(i, result);
5415   }
5416   return dst;
5417 }
5418 
5419 
5420 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5421 LogicVRegister Simulator::fneg(VectorFormat vform,
5422                                LogicVRegister dst,
5423                                const LogicVRegister& src) {
5424   dst.ClearForWrite(vform);
5425   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5426     T op = src.Float<T>(i);
5427     op = -op;
5428     dst.SetFloat(i, op);
5429   }
5430   return dst;
5431 }
5432 
5433 
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5434 LogicVRegister Simulator::fneg(VectorFormat vform,
5435                                LogicVRegister dst,
5436                                const LogicVRegister& src) {
5437   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5438     fneg<SimFloat16>(vform, dst, src);
5439   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5440     fneg<float>(vform, dst, src);
5441   } else {
5442     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5443     fneg<double>(vform, dst, src);
5444   }
5445   return dst;
5446 }
5447 
5448 
5449 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5450 LogicVRegister Simulator::fabs_(VectorFormat vform,
5451                                 LogicVRegister dst,
5452                                 const LogicVRegister& src) {
5453   dst.ClearForWrite(vform);
5454   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5455     T op = src.Float<T>(i);
5456     if (copysign(1.0, op) < 0.0) {
5457       op = -op;
5458     }
5459     dst.SetFloat(i, op);
5460   }
5461   return dst;
5462 }
5463 
5464 
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5465 LogicVRegister Simulator::fabs_(VectorFormat vform,
5466                                 LogicVRegister dst,
5467                                 const LogicVRegister& src) {
5468   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5469     fabs_<SimFloat16>(vform, dst, src);
5470   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5471     fabs_<float>(vform, dst, src);
5472   } else {
5473     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5474     fabs_<double>(vform, dst, src);
5475   }
5476   return dst;
5477 }
5478 
5479 
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5480 LogicVRegister Simulator::fabd(VectorFormat vform,
5481                                LogicVRegister dst,
5482                                const LogicVRegister& src1,
5483                                const LogicVRegister& src2) {
5484   SimVRegister temp;
5485   fsub(vform, temp, src1, src2);
5486   fabs_(vform, dst, temp);
5487   return dst;
5488 }
5489 
5490 
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5491 LogicVRegister Simulator::fsqrt(VectorFormat vform,
5492                                 LogicVRegister dst,
5493                                 const LogicVRegister& src) {
5494   dst.ClearForWrite(vform);
5495   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5496     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5497       SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5498       dst.SetFloat(i, result);
5499     }
5500   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5501     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5502       float result = FPSqrt(src.Float<float>(i));
5503       dst.SetFloat(i, result);
5504     }
5505   } else {
5506     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5507     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5508       double result = FPSqrt(src.Float<double>(i));
5509       dst.SetFloat(i, result);
5510     }
5511   }
5512   return dst;
5513 }
5514 
5515 
5516 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                                    \
5517   LogicVRegister Simulator::FNP(VectorFormat vform,                            \
5518                                 LogicVRegister dst,                            \
5519                                 const LogicVRegister& src1,                    \
5520                                 const LogicVRegister& src2) {                  \
5521     SimVRegister temp1, temp2;                                                 \
5522     uzp1(vform, temp1, src1, src2);                                            \
5523     uzp2(vform, temp2, src1, src2);                                            \
5524     FN(vform, dst, temp1, temp2);                                              \
5525     if (IsSVEFormat(vform)) {                                                  \
5526       interleave_top_bottom(vform, dst, dst);                                  \
5527     }                                                                          \
5528     return dst;                                                                \
5529   }                                                                            \
5530                                                                                \
5531   LogicVRegister Simulator::FNP(VectorFormat vform,                            \
5532                                 LogicVRegister dst,                            \
5533                                 const LogicVRegister& src) {                   \
5534     if (vform == kFormatH) {                                                   \
5535       SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))),   \
5536                            SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
5537       dst.SetUint(vform, 0, Float16ToRawbits(result));                         \
5538     } else if (vform == kFormatS) {                                            \
5539       float result = OP(src.Float<float>(0), src.Float<float>(1));             \
5540       dst.SetFloat(0, result);                                                 \
5541     } else {                                                                   \
5542       VIXL_ASSERT(vform == kFormatD);                                          \
5543       double result = OP(src.Float<double>(0), src.Float<double>(1));          \
5544       dst.SetFloat(0, result);                                                 \
5545     }                                                                          \
5546     dst.ClearForWrite(vform);                                                  \
5547     return dst;                                                                \
5548   }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)5549 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5550 #undef DEFINE_NEON_FP_PAIR_OP
5551 
5552 template <typename T>
5553 LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5554                                                LogicVRegister dst,
5555                                                const LogicVRegister& src,
5556                                                typename TFPPairOp<T>::type fn,
5557                                                uint64_t inactive_value) {
5558   int lane_count = LaneCountFromFormat(vform);
5559   T result[kZRegMaxSizeInBytes / sizeof(T)];
5560   // Copy the source vector into a working array. Initialise the unused elements
5561   // at the end of the array to the same value that a false predicate would set.
5562   for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5563     result[i] = (i < lane_count)
5564                     ? src.Float<T>(i)
5565                     : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5566   }
5567 
5568   // Pairwise reduce the elements to a single value, using the pair op function
5569   // argument.
5570   for (int step = 1; step < lane_count; step *= 2) {
5571     for (int i = 0; i < lane_count; i += step * 2) {
5572       result[i] = (this->*fn)(result[i], result[i + step]);
5573     }
5574   }
5575   dst.ClearForWrite(ScalarFormatFromFormat(vform));
5576   dst.SetFloat<T>(0, result[0]);
5577   return dst;
5578 }
5579 
FPPairedAcrossHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,typename TFPPairOp<SimFloat16>::type fn16,typename TFPPairOp<float>::type fn32,typename TFPPairOp<double>::type fn64,uint64_t inactive_value)5580 LogicVRegister Simulator::FPPairedAcrossHelper(
5581     VectorFormat vform,
5582     LogicVRegister dst,
5583     const LogicVRegister& src,
5584     typename TFPPairOp<SimFloat16>::type fn16,
5585     typename TFPPairOp<float>::type fn32,
5586     typename TFPPairOp<double>::type fn64,
5587     uint64_t inactive_value) {
5588   switch (LaneSizeInBitsFromFormat(vform)) {
5589     case kHRegSize:
5590       return FPPairedAcrossHelper<SimFloat16>(vform,
5591                                               dst,
5592                                               src,
5593                                               fn16,
5594                                               inactive_value);
5595     case kSRegSize:
5596       return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5597     default:
5598       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5599       return FPPairedAcrossHelper<double>(vform,
5600                                           dst,
5601                                           src,
5602                                           fn64,
5603                                           inactive_value);
5604   }
5605 }
5606 
faddv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5607 LogicVRegister Simulator::faddv(VectorFormat vform,
5608                                 LogicVRegister dst,
5609                                 const LogicVRegister& src) {
5610   return FPPairedAcrossHelper(vform,
5611                               dst,
5612                               src,
5613                               &Simulator::FPAdd<SimFloat16>,
5614                               &Simulator::FPAdd<float>,
5615                               &Simulator::FPAdd<double>,
5616                               0);
5617 }
5618 
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5619 LogicVRegister Simulator::fmaxv(VectorFormat vform,
5620                                 LogicVRegister dst,
5621                                 const LogicVRegister& src) {
5622   int lane_size = LaneSizeInBitsFromFormat(vform);
5623   uint64_t inactive_value =
5624       FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5625   return FPPairedAcrossHelper(vform,
5626                               dst,
5627                               src,
5628                               &Simulator::FPMax<SimFloat16>,
5629                               &Simulator::FPMax<float>,
5630                               &Simulator::FPMax<double>,
5631                               inactive_value);
5632 }
5633 
5634 
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5635 LogicVRegister Simulator::fminv(VectorFormat vform,
5636                                 LogicVRegister dst,
5637                                 const LogicVRegister& src) {
5638   int lane_size = LaneSizeInBitsFromFormat(vform);
5639   uint64_t inactive_value =
5640       FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5641   return FPPairedAcrossHelper(vform,
5642                               dst,
5643                               src,
5644                               &Simulator::FPMin<SimFloat16>,
5645                               &Simulator::FPMin<float>,
5646                               &Simulator::FPMin<double>,
5647                               inactive_value);
5648 }
5649 
5650 
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5651 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5652                                   LogicVRegister dst,
5653                                   const LogicVRegister& src) {
5654   int lane_size = LaneSizeInBitsFromFormat(vform);
5655   uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5656   return FPPairedAcrossHelper(vform,
5657                               dst,
5658                               src,
5659                               &Simulator::FPMaxNM<SimFloat16>,
5660                               &Simulator::FPMaxNM<float>,
5661                               &Simulator::FPMaxNM<double>,
5662                               inactive_value);
5663 }
5664 
5665 
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5666 LogicVRegister Simulator::fminnmv(VectorFormat vform,
5667                                   LogicVRegister dst,
5668                                   const LogicVRegister& src) {
5669   int lane_size = LaneSizeInBitsFromFormat(vform);
5670   uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5671   return FPPairedAcrossHelper(vform,
5672                               dst,
5673                               src,
5674                               &Simulator::FPMinNM<SimFloat16>,
5675                               &Simulator::FPMinNM<float>,
5676                               &Simulator::FPMinNM<double>,
5677                               inactive_value);
5678 }
5679 
5680 
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5681 LogicVRegister Simulator::fmul(VectorFormat vform,
5682                                LogicVRegister dst,
5683                                const LogicVRegister& src1,
5684                                const LogicVRegister& src2,
5685                                int index) {
5686   dst.ClearForWrite(vform);
5687   SimVRegister temp;
5688   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5689     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5690     fmul<SimFloat16>(vform, dst, src1, index_reg);
5691   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5692     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5693     fmul<float>(vform, dst, src1, index_reg);
5694   } else {
5695     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5696     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5697     fmul<double>(vform, dst, src1, index_reg);
5698   }
5699   return dst;
5700 }
5701 
5702 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5703 LogicVRegister Simulator::fmla(VectorFormat vform,
5704                                LogicVRegister dst,
5705                                const LogicVRegister& src1,
5706                                const LogicVRegister& src2,
5707                                int index) {
5708   dst.ClearForWrite(vform);
5709   SimVRegister temp;
5710   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5711     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5712     fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5713   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5714     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5715     fmla<float>(vform, dst, dst, src1, index_reg);
5716   } else {
5717     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5718     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5719     fmla<double>(vform, dst, dst, src1, index_reg);
5720   }
5721   return dst;
5722 }
5723 
5724 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5725 LogicVRegister Simulator::fmls(VectorFormat vform,
5726                                LogicVRegister dst,
5727                                const LogicVRegister& src1,
5728                                const LogicVRegister& src2,
5729                                int index) {
5730   dst.ClearForWrite(vform);
5731   SimVRegister temp;
5732   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5733     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5734     fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5735   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5736     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5737     fmls<float>(vform, dst, dst, src1, index_reg);
5738   } else {
5739     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5740     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5741     fmls<double>(vform, dst, dst, src1, index_reg);
5742   }
5743   return dst;
5744 }
5745 
5746 
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5747 LogicVRegister Simulator::fmulx(VectorFormat vform,
5748                                 LogicVRegister dst,
5749                                 const LogicVRegister& src1,
5750                                 const LogicVRegister& src2,
5751                                 int index) {
5752   dst.ClearForWrite(vform);
5753   SimVRegister temp;
5754   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5755     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5756     fmulx<SimFloat16>(vform, dst, src1, index_reg);
5757   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5758     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5759     fmulx<float>(vform, dst, src1, index_reg);
5760   } else {
5761     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5762     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5763     fmulx<double>(vform, dst, src1, index_reg);
5764   }
5765   return dst;
5766 }
5767 
5768 
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception,FrintMode frint_mode)5769 LogicVRegister Simulator::frint(VectorFormat vform,
5770                                 LogicVRegister dst,
5771                                 const LogicVRegister& src,
5772                                 FPRounding rounding_mode,
5773                                 bool inexact_exception,
5774                                 FrintMode frint_mode) {
5775   dst.ClearForWrite(vform);
5776   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5777     VIXL_ASSERT(frint_mode == kFrintToInteger);
5778     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5779       SimFloat16 input = src.Float<SimFloat16>(i);
5780       SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5781       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5782         FPProcessException();
5783       }
5784       dst.SetFloat<SimFloat16>(i, rounded);
5785     }
5786   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5787     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5788       float input = src.Float<float>(i);
5789       float rounded = FPRoundInt(input, rounding_mode, frint_mode);
5790 
5791       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5792         FPProcessException();
5793       }
5794       dst.SetFloat<float>(i, rounded);
5795     }
5796   } else {
5797     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5798     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5799       double input = src.Float<double>(i);
5800       double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5801       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5802         FPProcessException();
5803       }
5804       dst.SetFloat<double>(i, rounded);
5805     }
5806   }
5807   return dst;
5808 }
5809 
fcvt(VectorFormat dst_vform,VectorFormat src_vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)5810 LogicVRegister Simulator::fcvt(VectorFormat dst_vform,
5811                                VectorFormat src_vform,
5812                                LogicVRegister dst,
5813                                const LogicPRegister& pg,
5814                                const LogicVRegister& src) {
5815   unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);
5816   unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);
5817   VectorFormat vform = SVEFormatFromLaneSizeInBits(
5818       std::max(dst_data_size_in_bits, src_data_size_in_bits));
5819 
5820   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5821     if (!pg.IsActive(vform, i)) continue;
5822 
5823     uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5824                                                       0,
5825                                                       src.Uint(vform, i));
5826     double dst_value =
5827         RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5828 
5829     uint64_t dst_raw_bits =
5830         FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5831 
5832     dst.SetUint(vform, i, dst_raw_bits);
5833   }
5834 
5835   return dst;
5836 }
5837 
fcvts(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5838 LogicVRegister Simulator::fcvts(VectorFormat vform,
5839                                 unsigned dst_data_size_in_bits,
5840                                 unsigned src_data_size_in_bits,
5841                                 LogicVRegister dst,
5842                                 const LogicPRegister& pg,
5843                                 const LogicVRegister& src,
5844                                 FPRounding round,
5845                                 int fbits) {
5846   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5847   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5848 
5849   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5850     if (!pg.IsActive(vform, i)) continue;
5851 
5852     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5853                                                0,
5854                                                src.Uint(vform, i));
5855     double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5856                     std::pow(2.0, fbits);
5857 
5858     switch (dst_data_size_in_bits) {
5859       case kHRegSize:
5860         dst.SetInt(vform, i, FPToInt16(result, round));
5861         break;
5862       case kSRegSize:
5863         dst.SetInt(vform, i, FPToInt32(result, round));
5864         break;
5865       case kDRegSize:
5866         dst.SetInt(vform, i, FPToInt64(result, round));
5867         break;
5868       default:
5869         VIXL_UNIMPLEMENTED();
5870         break;
5871     }
5872   }
5873 
5874   return dst;
5875 }
5876 
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5877 LogicVRegister Simulator::fcvts(VectorFormat vform,
5878                                 LogicVRegister dst,
5879                                 const LogicVRegister& src,
5880                                 FPRounding round,
5881                                 int fbits) {
5882   dst.ClearForWrite(vform);
5883   return fcvts(vform,
5884                LaneSizeInBitsFromFormat(vform),
5885                LaneSizeInBitsFromFormat(vform),
5886                dst,
5887                GetPTrue(),
5888                src,
5889                round,
5890                fbits);
5891 }
5892 
fcvtu(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5893 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5894                                 unsigned dst_data_size_in_bits,
5895                                 unsigned src_data_size_in_bits,
5896                                 LogicVRegister dst,
5897                                 const LogicPRegister& pg,
5898                                 const LogicVRegister& src,
5899                                 FPRounding round,
5900                                 int fbits) {
5901   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5902   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5903 
5904   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5905     if (!pg.IsActive(vform, i)) continue;
5906 
5907     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5908                                                0,
5909                                                src.Uint(vform, i));
5910     double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5911                     std::pow(2.0, fbits);
5912 
5913     switch (dst_data_size_in_bits) {
5914       case kHRegSize:
5915         dst.SetUint(vform, i, FPToUInt16(result, round));
5916         break;
5917       case kSRegSize:
5918         dst.SetUint(vform, i, FPToUInt32(result, round));
5919         break;
5920       case kDRegSize:
5921         dst.SetUint(vform, i, FPToUInt64(result, round));
5922         break;
5923       default:
5924         VIXL_UNIMPLEMENTED();
5925         break;
5926     }
5927   }
5928 
5929   return dst;
5930 }
5931 
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5932 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5933                                 LogicVRegister dst,
5934                                 const LogicVRegister& src,
5935                                 FPRounding round,
5936                                 int fbits) {
5937   dst.ClearForWrite(vform);
5938   return fcvtu(vform,
5939                LaneSizeInBitsFromFormat(vform),
5940                LaneSizeInBitsFromFormat(vform),
5941                dst,
5942                GetPTrue(),
5943                src,
5944                round,
5945                fbits);
5946 }
5947 
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5948 LogicVRegister Simulator::fcvtl(VectorFormat vform,
5949                                 LogicVRegister dst,
5950                                 const LogicVRegister& src) {
5951   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5952     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5953       // TODO: Full support for SimFloat16 in SimRegister(s).
5954       dst.SetFloat(i,
5955                    FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
5956                              ReadDN()));
5957     }
5958   } else {
5959     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5960     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5961       dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
5962     }
5963   }
5964   return dst;
5965 }
5966 
5967 
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5968 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
5969                                  LogicVRegister dst,
5970                                  const LogicVRegister& src) {
5971   int lane_count = LaneCountFromFormat(vform);
5972   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5973     for (int i = 0; i < lane_count; i++) {
5974       // TODO: Full support for SimFloat16 in SimRegister(s).
5975       dst.SetFloat(i,
5976                    FPToFloat(RawbitsToFloat16(
5977                                  src.Float<uint16_t>(i + lane_count)),
5978                              ReadDN()));
5979     }
5980   } else {
5981     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5982     for (int i = 0; i < lane_count; i++) {
5983       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
5984     }
5985   }
5986   return dst;
5987 }
5988 
5989 
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5990 LogicVRegister Simulator::fcvtn(VectorFormat vform,
5991                                 LogicVRegister dst,
5992                                 const LogicVRegister& src) {
5993   SimVRegister tmp;
5994   LogicVRegister srctmp = mov(kFormat2D, tmp, src);
5995   dst.ClearForWrite(vform);
5996   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5997     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5998       dst.SetFloat(i,
5999                    Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
6000                                                 FPTieEven,
6001                                                 ReadDN())));
6002     }
6003   } else {
6004     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6005     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6006       dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
6007     }
6008   }
6009   return dst;
6010 }
6011 
6012 
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6013 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
6014                                  LogicVRegister dst,
6015                                  const LogicVRegister& src) {
6016   int lane_count = LaneCountFromFormat(vform) / 2;
6017   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6018     for (int i = lane_count - 1; i >= 0; i--) {
6019       dst.SetFloat(i + lane_count,
6020                    Float16ToRawbits(
6021                        FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
6022     }
6023   } else {
6024     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6025     for (int i = lane_count - 1; i >= 0; i--) {
6026       dst.SetFloat(i + lane_count,
6027                    FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
6028     }
6029   }
6030   return dst;
6031 }
6032 
6033 
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6034 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
6035                                  LogicVRegister dst,
6036                                  const LogicVRegister& src) {
6037   SimVRegister tmp;
6038   LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6039   int input_lane_count = LaneCountFromFormat(vform);
6040   if (IsSVEFormat(vform)) {
6041     mov(kFormatVnB, tmp, src);
6042     input_lane_count /= 2;
6043   }
6044 
6045   dst.ClearForWrite(vform);
6046   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6047 
6048   for (int i = 0; i < input_lane_count; i++) {
6049     dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
6050   }
6051   return dst;
6052 }
6053 
6054 
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6055 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
6056                                   LogicVRegister dst,
6057                                   const LogicVRegister& src) {
6058   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6059   int lane_count = LaneCountFromFormat(vform) / 2;
6060   for (int i = lane_count - 1; i >= 0; i--) {
6061     dst.SetFloat(i + lane_count,
6062                  FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
6063   }
6064   return dst;
6065 }
6066 
6067 
6068 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)6069 double Simulator::recip_sqrt_estimate(double a) {
6070   int quot0, quot1, s;
6071   double r;
6072   if (a < 0.5) {
6073     quot0 = static_cast<int>(a * 512.0);
6074     r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);
6075   } else {
6076     quot1 = static_cast<int>(a * 256.0);
6077     r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);
6078   }
6079   s = static_cast<int>(256.0 * r + 0.5);
6080   return static_cast<double>(s) / 256.0;
6081 }
6082 
6083 
Bits(uint64_t val,int start_bit,int end_bit)6084 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
6085   return ExtractUnsignedBitfield64(start_bit, end_bit, val);
6086 }
6087 
6088 
6089 template <typename T>
FPRecipSqrtEstimate(T op)6090 T Simulator::FPRecipSqrtEstimate(T op) {
6091   if (IsNaN(op)) {
6092     return FPProcessNaN(op);
6093   } else if (op == 0.0) {
6094     if (copysign(1.0, op) < 0.0) {
6095       return kFP64NegativeInfinity;
6096     } else {
6097       return kFP64PositiveInfinity;
6098     }
6099   } else if (copysign(1.0, op) < 0.0) {
6100     FPProcessException();
6101     return FPDefaultNaN<T>();
6102   } else if (IsInf(op)) {
6103     return 0.0;
6104   } else {
6105     uint64_t fraction;
6106     int exp, result_exp;
6107 
6108     if (IsFloat16<T>()) {
6109       exp = Float16Exp(op);
6110       fraction = Float16Mantissa(op);
6111       fraction <<= 42;
6112     } else if (IsFloat32<T>()) {
6113       exp = FloatExp(op);
6114       fraction = FloatMantissa(op);
6115       fraction <<= 29;
6116     } else {
6117       VIXL_ASSERT(IsFloat64<T>());
6118       exp = DoubleExp(op);
6119       fraction = DoubleMantissa(op);
6120     }
6121 
6122     if (exp == 0) {
6123       while (Bits(fraction, 51, 51) == 0) {
6124         fraction = Bits(fraction, 50, 0) << 1;
6125         exp -= 1;
6126       }
6127       fraction = Bits(fraction, 50, 0) << 1;
6128     }
6129 
6130     double scaled;
6131     if (Bits(exp, 0, 0) == 0) {
6132       scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6133     } else {
6134       scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
6135     }
6136 
6137     if (IsFloat16<T>()) {
6138       result_exp = (44 - exp) / 2;
6139     } else if (IsFloat32<T>()) {
6140       result_exp = (380 - exp) / 2;
6141     } else {
6142       VIXL_ASSERT(IsFloat64<T>());
6143       result_exp = (3068 - exp) / 2;
6144     }
6145 
6146     uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
6147 
6148     if (IsFloat16<T>()) {
6149       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6150       uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
6151       return Float16Pack(0, exp_bits, est_bits);
6152     } else if (IsFloat32<T>()) {
6153       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6154       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
6155       return FloatPack(0, exp_bits, est_bits);
6156     } else {
6157       VIXL_ASSERT(IsFloat64<T>());
6158       return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
6159     }
6160   }
6161 }
6162 
6163 
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6164 LogicVRegister Simulator::frsqrte(VectorFormat vform,
6165                                   LogicVRegister dst,
6166                                   const LogicVRegister& src) {
6167   dst.ClearForWrite(vform);
6168   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6169     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6170       SimFloat16 input = src.Float<SimFloat16>(i);
6171       dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
6172     }
6173   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6174     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6175       float input = src.Float<float>(i);
6176       dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
6177     }
6178   } else {
6179     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6180     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6181       double input = src.Float<double>(i);
6182       dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
6183     }
6184   }
6185   return dst;
6186 }
6187 
6188 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)6189 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
6190   uint32_t sign;
6191 
6192   if (IsFloat16<T>()) {
6193     sign = Float16Sign(op);
6194   } else if (IsFloat32<T>()) {
6195     sign = FloatSign(op);
6196   } else {
6197     VIXL_ASSERT(IsFloat64<T>());
6198     sign = DoubleSign(op);
6199   }
6200 
6201   if (IsNaN(op)) {
6202     return FPProcessNaN(op);
6203   } else if (IsInf(op)) {
6204     return (sign == 1) ? -0.0 : 0.0;
6205   } else if (op == 0.0) {
6206     FPProcessException();  // FPExc_DivideByZero exception.
6207     return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6208   } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6209              (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6210              (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6211     bool overflow_to_inf = false;
6212     switch (rounding) {
6213       case FPTieEven:
6214         overflow_to_inf = true;
6215         break;
6216       case FPPositiveInfinity:
6217         overflow_to_inf = (sign == 0);
6218         break;
6219       case FPNegativeInfinity:
6220         overflow_to_inf = (sign == 1);
6221         break;
6222       case FPZero:
6223         overflow_to_inf = false;
6224         break;
6225       default:
6226         break;
6227     }
6228     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
6229     if (overflow_to_inf) {
6230       return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6231     } else {
6232       // Return FPMaxNormal(sign).
6233       if (IsFloat16<T>()) {
6234         return Float16Pack(sign, 0x1f, 0x3ff);
6235       } else if (IsFloat32<T>()) {
6236         return FloatPack(sign, 0xfe, 0x07fffff);
6237       } else {
6238         VIXL_ASSERT(IsFloat64<T>());
6239         return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6240       }
6241     }
6242   } else {
6243     uint64_t fraction;
6244     int exp, result_exp;
6245 
6246     if (IsFloat16<T>()) {
6247       sign = Float16Sign(op);
6248       exp = Float16Exp(op);
6249       fraction = Float16Mantissa(op);
6250       fraction <<= 42;
6251     } else if (IsFloat32<T>()) {
6252       sign = FloatSign(op);
6253       exp = FloatExp(op);
6254       fraction = FloatMantissa(op);
6255       fraction <<= 29;
6256     } else {
6257       VIXL_ASSERT(IsFloat64<T>());
6258       sign = DoubleSign(op);
6259       exp = DoubleExp(op);
6260       fraction = DoubleMantissa(op);
6261     }
6262 
6263     if (exp == 0) {
6264       if (Bits(fraction, 51, 51) == 0) {
6265         exp -= 1;
6266         fraction = Bits(fraction, 49, 0) << 2;
6267       } else {
6268         fraction = Bits(fraction, 50, 0) << 1;
6269       }
6270     }
6271 
6272     double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6273 
6274     if (IsFloat16<T>()) {
6275       result_exp = (29 - exp);  // In range 29-30 = -1 to 29+1 = 30.
6276     } else if (IsFloat32<T>()) {
6277       result_exp = (253 - exp);  // In range 253-254 = -1 to 253+1 = 254.
6278     } else {
6279       VIXL_ASSERT(IsFloat64<T>());
6280       result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
6281     }
6282 
6283     double estimate = recip_estimate(scaled);
6284 
6285     fraction = DoubleMantissa(estimate);
6286     if (result_exp == 0) {
6287       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6288     } else if (result_exp == -1) {
6289       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6290       result_exp = 0;
6291     }
6292     if (IsFloat16<T>()) {
6293       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6294       uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6295       return Float16Pack(sign, exp_bits, frac_bits);
6296     } else if (IsFloat32<T>()) {
6297       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6298       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6299       return FloatPack(sign, exp_bits, frac_bits);
6300     } else {
6301       VIXL_ASSERT(IsFloat64<T>());
6302       return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6303     }
6304   }
6305 }
6306 
6307 
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)6308 LogicVRegister Simulator::frecpe(VectorFormat vform,
6309                                  LogicVRegister dst,
6310                                  const LogicVRegister& src,
6311                                  FPRounding round) {
6312   dst.ClearForWrite(vform);
6313   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6314     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6315       SimFloat16 input = src.Float<SimFloat16>(i);
6316       dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6317     }
6318   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6319     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6320       float input = src.Float<float>(i);
6321       dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6322     }
6323   } else {
6324     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6325     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6326       double input = src.Float<double>(i);
6327       dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6328     }
6329   }
6330   return dst;
6331 }
6332 
6333 
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6334 LogicVRegister Simulator::ursqrte(VectorFormat vform,
6335                                   LogicVRegister dst,
6336                                   const LogicVRegister& src) {
6337   dst.ClearForWrite(vform);
6338   uint64_t operand;
6339   uint32_t result;
6340   double dp_operand, dp_result;
6341   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6342     operand = src.Uint(vform, i);
6343     if (operand <= 0x3FFFFFFF) {
6344       result = 0xFFFFFFFF;
6345     } else {
6346       dp_operand = operand * std::pow(2.0, -32);
6347       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6348       result = static_cast<uint32_t>(dp_result);
6349     }
6350     dst.SetUint(vform, i, result);
6351   }
6352   return dst;
6353 }
6354 
6355 
6356 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)6357 double Simulator::recip_estimate(double a) {
6358   int q, s;
6359   double r;
6360   q = static_cast<int>(a * 512.0);
6361   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6362   s = static_cast<int>(256.0 * r + 0.5);
6363   return static_cast<double>(s) / 256.0;
6364 }
6365 
6366 
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6367 LogicVRegister Simulator::urecpe(VectorFormat vform,
6368                                  LogicVRegister dst,
6369                                  const LogicVRegister& src) {
6370   dst.ClearForWrite(vform);
6371   uint64_t operand;
6372   uint32_t result;
6373   double dp_operand, dp_result;
6374   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6375     operand = src.Uint(vform, i);
6376     if (operand <= 0x7FFFFFFF) {
6377       result = 0xFFFFFFFF;
6378     } else {
6379       dp_operand = operand * std::pow(2.0, -32);
6380       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6381       result = static_cast<uint32_t>(dp_result);
6382     }
6383     dst.SetUint(vform, i, result);
6384   }
6385   return dst;
6386 }
6387 
pfalse(LogicPRegister dst)6388 LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6389   dst.Clear();
6390   return dst;
6391 }
6392 
pfirst(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6393 LogicPRegister Simulator::pfirst(LogicPRegister dst,
6394                                  const LogicPRegister& pg,
6395                                  const LogicPRegister& src) {
6396   int first_pg = GetFirstActive(kFormatVnB, pg);
6397   VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6398   mov(dst, src);
6399   if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6400   return dst;
6401 }
6402 
ptrue(VectorFormat vform,LogicPRegister dst,int pattern)6403 LogicPRegister Simulator::ptrue(VectorFormat vform,
6404                                 LogicPRegister dst,
6405                                 int pattern) {
6406   int count = GetPredicateConstraintLaneCount(vform, pattern);
6407   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6408     dst.SetActive(vform, i, i < count);
6409   }
6410   return dst;
6411 }
6412 
pnext(VectorFormat vform,LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6413 LogicPRegister Simulator::pnext(VectorFormat vform,
6414                                 LogicPRegister dst,
6415                                 const LogicPRegister& pg,
6416                                 const LogicPRegister& src) {
6417   int next = GetLastActive(vform, src) + 1;
6418   while (next < LaneCountFromFormat(vform)) {
6419     if (pg.IsActive(vform, next)) break;
6420     next++;
6421   }
6422 
6423   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6424     dst.SetActive(vform, i, (i == next));
6425   }
6426   return dst;
6427 }
6428 
6429 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6430 LogicVRegister Simulator::frecpx(VectorFormat vform,
6431                                  LogicVRegister dst,
6432                                  const LogicVRegister& src) {
6433   dst.ClearForWrite(vform);
6434   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6435     T op = src.Float<T>(i);
6436     T result;
6437     if (IsNaN(op)) {
6438       result = FPProcessNaN(op);
6439     } else {
6440       int exp;
6441       uint32_t sign;
6442       if (IsFloat16<T>()) {
6443         sign = Float16Sign(op);
6444         exp = Float16Exp(op);
6445         exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6446         result = Float16Pack(sign, exp, 0);
6447       } else if (IsFloat32<T>()) {
6448         sign = FloatSign(op);
6449         exp = FloatExp(op);
6450         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6451         result = FloatPack(sign, exp, 0);
6452       } else {
6453         VIXL_ASSERT(IsFloat64<T>());
6454         sign = DoubleSign(op);
6455         exp = DoubleExp(op);
6456         exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6457         result = DoublePack(sign, exp, 0);
6458       }
6459     }
6460     dst.SetFloat(i, result);
6461   }
6462   return dst;
6463 }
6464 
6465 
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6466 LogicVRegister Simulator::frecpx(VectorFormat vform,
6467                                  LogicVRegister dst,
6468                                  const LogicVRegister& src) {
6469   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6470     frecpx<SimFloat16>(vform, dst, src);
6471   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6472     frecpx<float>(vform, dst, src);
6473   } else {
6474     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6475     frecpx<double>(vform, dst, src);
6476   }
6477   return dst;
6478 }
6479 
flogb(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6480 LogicVRegister Simulator::flogb(VectorFormat vform,
6481                                 LogicVRegister dst,
6482                                 const LogicVRegister& src) {
6483   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6484     double op = 0.0;
6485     switch (vform) {
6486       case kFormatVnH:
6487         op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);
6488         break;
6489       case kFormatVnS:
6490         op = src.Float<float>(i);
6491         break;
6492       case kFormatVnD:
6493         op = src.Float<double>(i);
6494         break;
6495       default:
6496         VIXL_UNREACHABLE();
6497     }
6498 
6499     switch (std::fpclassify(op)) {
6500       case FP_INFINITE:
6501         dst.SetInt(vform, i, MaxIntFromFormat(vform));
6502         break;
6503       case FP_NAN:
6504       case FP_ZERO:
6505         dst.SetInt(vform, i, MinIntFromFormat(vform));
6506         break;
6507       case FP_SUBNORMAL: {
6508         // DoubleMantissa returns the mantissa of its input, leaving 12 zero
6509         // bits where the sign and exponent would be. We subtract 12 to
6510         // find the number of leading zero bits in the mantissa itself.
6511         int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;
6512         // Log2 of a subnormal is the lowest exponent a normal number can
6513         // represent, together with the zeros in the mantissa.
6514         dst.SetInt(vform, i, -1023 - mant_zero_count);
6515         break;
6516       }
6517       case FP_NORMAL:
6518         // Log2 of a normal number is the exponent minus the bias.
6519         dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);
6520         break;
6521     }
6522   }
6523   return dst;
6524 }
6525 
ftsmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6526 LogicVRegister Simulator::ftsmul(VectorFormat vform,
6527                                  LogicVRegister dst,
6528                                  const LogicVRegister& src1,
6529                                  const LogicVRegister& src2) {
6530   SimVRegister maybe_neg_src1;
6531 
6532   // The bottom bit of src2 controls the sign of the result. Use it to
6533   // conditionally invert the sign of one `fmul` operand.
6534   shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6535   eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6536 
6537   // Multiply src1 by the modified neg_src1, which is potentially its negation.
6538   // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6539   // rather than neg_src1, must be the first source argument.
6540   fmul(vform, dst, src1, maybe_neg_src1);
6541 
6542   return dst;
6543 }
6544 
ftssel(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6545 LogicVRegister Simulator::ftssel(VectorFormat vform,
6546                                  LogicVRegister dst,
6547                                  const LogicVRegister& src1,
6548                                  const LogicVRegister& src2) {
6549   unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6550   uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6551   uint64_t one;
6552 
6553   if (lane_bits == kHRegSize) {
6554     one = Float16ToRawbits(Float16(1.0));
6555   } else if (lane_bits == kSRegSize) {
6556     one = FloatToRawbits(1.0);
6557   } else {
6558     VIXL_ASSERT(lane_bits == kDRegSize);
6559     one = DoubleToRawbits(1.0);
6560   }
6561 
6562   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6563     // Use integer accessors for this operation, as this is a data manipulation
6564     // task requiring no calculation.
6565     uint64_t op = src1.Uint(vform, i);
6566 
6567     // Only the bottom two bits of the src2 register are significant, indicating
6568     // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6569     // determines the sign of the value written to dst.
6570     uint64_t q = src2.Uint(vform, i);
6571     if ((q & 1) == 1) op = one;
6572     if ((q & 2) == 2) op ^= sign_bit;
6573 
6574     dst.SetUint(vform, i, op);
6575   }
6576 
6577   return dst;
6578 }
6579 
6580 template <typename T>
FTMaddHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,uint64_t coeff_pos,uint64_t coeff_neg)6581 LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6582                                        LogicVRegister dst,
6583                                        const LogicVRegister& src1,
6584                                        const LogicVRegister& src2,
6585                                        uint64_t coeff_pos,
6586                                        uint64_t coeff_neg) {
6587   SimVRegister zero;
6588   dup_immediate(kFormatVnB, zero, 0);
6589 
6590   SimVRegister cf;
6591   SimVRegister cfn;
6592   dup_immediate(vform, cf, coeff_pos);
6593   dup_immediate(vform, cfn, coeff_neg);
6594 
6595   // The specification requires testing the top bit of the raw value, rather
6596   // than the sign of the floating point number, so use an integer comparison
6597   // here.
6598   SimPRegister is_neg;
6599   SVEIntCompareVectorsHelper(lt,
6600                              vform,
6601                              is_neg,
6602                              GetPTrue(),
6603                              src2,
6604                              zero,
6605                              false,
6606                              LeaveFlags);
6607   mov_merging(vform, cf, is_neg, cfn);
6608 
6609   SimVRegister temp;
6610   fabs_<T>(vform, temp, src2);
6611   fmla<T>(vform, cf, cf, src1, temp);
6612   mov(vform, dst, cf);
6613   return dst;
6614 }
6615 
6616 
ftmad(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,unsigned index)6617 LogicVRegister Simulator::ftmad(VectorFormat vform,
6618                                 LogicVRegister dst,
6619                                 const LogicVRegister& src1,
6620                                 const LogicVRegister& src2,
6621                                 unsigned index) {
6622   static const uint64_t ftmad_coeff16[] = {0x3c00,
6623                                            0xb155,
6624                                            0x2030,
6625                                            0x0000,
6626                                            0x0000,
6627                                            0x0000,
6628                                            0x0000,
6629                                            0x0000,
6630                                            0x3c00,
6631                                            0xb800,
6632                                            0x293a,
6633                                            0x0000,
6634                                            0x0000,
6635                                            0x0000,
6636                                            0x0000,
6637                                            0x0000};
6638 
6639   static const uint64_t ftmad_coeff32[] = {0x3f800000,
6640                                            0xbe2aaaab,
6641                                            0x3c088886,
6642                                            0xb95008b9,
6643                                            0x36369d6d,
6644                                            0x00000000,
6645                                            0x00000000,
6646                                            0x00000000,
6647                                            0x3f800000,
6648                                            0xbf000000,
6649                                            0x3d2aaaa6,
6650                                            0xbab60705,
6651                                            0x37cd37cc,
6652                                            0x00000000,
6653                                            0x00000000,
6654                                            0x00000000};
6655 
6656   static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6657                                            0xbfc5555555555543,
6658                                            0x3f8111111110f30c,
6659                                            0xbf2a01a019b92fc6,
6660                                            0x3ec71de351f3d22b,
6661                                            0xbe5ae5e2b60f7b91,
6662                                            0x3de5d8408868552f,
6663                                            0x0000000000000000,
6664                                            0x3ff0000000000000,
6665                                            0xbfe0000000000000,
6666                                            0x3fa5555555555536,
6667                                            0xbf56c16c16c13a0b,
6668                                            0x3efa01a019b1e8d8,
6669                                            0xbe927e4f7282f468,
6670                                            0x3e21ee96d2641b13,
6671                                            0xbda8f76380fbb401};
6672   VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6673   VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6674   VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6675 
6676   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6677     FTMaddHelper<SimFloat16>(vform,
6678                              dst,
6679                              src1,
6680                              src2,
6681                              ftmad_coeff16[index],
6682                              ftmad_coeff16[index + 8]);
6683   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6684     FTMaddHelper<float>(vform,
6685                         dst,
6686                         src1,
6687                         src2,
6688                         ftmad_coeff32[index],
6689                         ftmad_coeff32[index + 8]);
6690   } else {
6691     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6692     FTMaddHelper<double>(vform,
6693                          dst,
6694                          src1,
6695                          src2,
6696                          ftmad_coeff64[index],
6697                          ftmad_coeff64[index + 8]);
6698   }
6699   return dst;
6700 }
6701 
fexpa(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6702 LogicVRegister Simulator::fexpa(VectorFormat vform,
6703                                 LogicVRegister dst,
6704                                 const LogicVRegister& src) {
6705   static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6706                                            0x005d, 0x0075, 0x008e, 0x00a8,
6707                                            0x00c2, 0x00dc, 0x00f8, 0x0114,
6708                                            0x0130, 0x014d, 0x016b, 0x0189,
6709                                            0x01a8, 0x01c8, 0x01e8, 0x0209,
6710                                            0x022b, 0x024e, 0x0271, 0x0295,
6711                                            0x02ba, 0x02e0, 0x0306, 0x032e,
6712                                            0x0356, 0x037f, 0x03a9, 0x03d4};
6713 
6714   static const uint64_t fexpa_coeff32[] =
6715       {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6716        0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6717        0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6718        0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6719        0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6720        0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6721        0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6722        0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6723        0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6724        0x7d3e0c};
6725 
6726   static const uint64_t fexpa_coeff64[] =
6727       {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6728        0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6729        0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6730        0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6731        0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6732        0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6733        0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6734        0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6735        0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6736        0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6737        0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6738        0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6739        0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6740        0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6741        0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6742        0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6743 
6744   unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6745   int index_highbit = 5;
6746   int op_highbit, op_shift;
6747   const uint64_t* fexpa_coeff;
6748 
6749   if (lane_size == kHRegSize) {
6750     index_highbit = 4;
6751     VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
6752     fexpa_coeff = fexpa_coeff16;
6753     op_highbit = 9;
6754     op_shift = 10;
6755   } else if (lane_size == kSRegSize) {
6756     VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
6757     fexpa_coeff = fexpa_coeff32;
6758     op_highbit = 13;
6759     op_shift = 23;
6760   } else {
6761     VIXL_ASSERT(lane_size == kDRegSize);
6762     VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
6763     fexpa_coeff = fexpa_coeff64;
6764     op_highbit = 16;
6765     op_shift = 52;
6766   }
6767 
6768   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6769     uint64_t op = src.Uint(vform, i);
6770     uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6771     result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6772     dst.SetUint(vform, i, result);
6773   }
6774   return dst;
6775 }
6776 
6777 template <typename T>
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6778 LogicVRegister Simulator::fscale(VectorFormat vform,
6779                                  LogicVRegister dst,
6780                                  const LogicVRegister& src1,
6781                                  const LogicVRegister& src2) {
6782   T two = T(2.0);
6783   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6784     T src1_val = src1.Float<T>(i);
6785     if (!IsNaN(src1_val)) {
6786       int64_t scale = src2.Int(vform, i);
6787       // TODO: this is a low-performance implementation, but it's simple and
6788       // less likely to be buggy. Consider replacing it with something faster.
6789 
6790       // Scales outside of these bounds become infinity or zero, so there's no
6791       // point iterating further.
6792       scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6793 
6794       // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and
6795       // decrement scale until it's zero.
6796       while (scale-- > 0) {
6797         src1_val = FPMul(src1_val, two);
6798       }
6799 
6800       // If scale is negative, divide by two and increment scale until it's
6801       // zero. Initially, scale is (src2 - 1), so we pre-increment.
6802       while (++scale < 0) {
6803         src1_val = FPDiv(src1_val, two);
6804       }
6805     }
6806     dst.SetFloat<T>(i, src1_val);
6807   }
6808   return dst;
6809 }
6810 
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6811 LogicVRegister Simulator::fscale(VectorFormat vform,
6812                                  LogicVRegister dst,
6813                                  const LogicVRegister& src1,
6814                                  const LogicVRegister& src2) {
6815   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6816     fscale<SimFloat16>(vform, dst, src1, src2);
6817   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6818     fscale<float>(vform, dst, src1, src2);
6819   } else {
6820     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6821     fscale<double>(vform, dst, src1, src2);
6822   }
6823   return dst;
6824 }
6825 
scvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6826 LogicVRegister Simulator::scvtf(VectorFormat vform,
6827                                 unsigned dst_data_size_in_bits,
6828                                 unsigned src_data_size_in_bits,
6829                                 LogicVRegister dst,
6830                                 const LogicPRegister& pg,
6831                                 const LogicVRegister& src,
6832                                 FPRounding round,
6833                                 int fbits) {
6834   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6835   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6836   dst.ClearForWrite(vform);
6837 
6838   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6839     if (!pg.IsActive(vform, i)) continue;
6840 
6841     int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6842                                             0,
6843                                             src.Uint(vform, i));
6844 
6845     switch (dst_data_size_in_bits) {
6846       case kHRegSize: {
6847         SimFloat16 result = FixedToFloat16(value, fbits, round);
6848         dst.SetUint(vform, i, Float16ToRawbits(result));
6849         break;
6850       }
6851       case kSRegSize: {
6852         float result = FixedToFloat(value, fbits, round);
6853         dst.SetUint(vform, i, FloatToRawbits(result));
6854         break;
6855       }
6856       case kDRegSize: {
6857         double result = FixedToDouble(value, fbits, round);
6858         dst.SetUint(vform, i, DoubleToRawbits(result));
6859         break;
6860       }
6861       default:
6862         VIXL_UNIMPLEMENTED();
6863         break;
6864     }
6865   }
6866 
6867   return dst;
6868 }
6869 
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6870 LogicVRegister Simulator::scvtf(VectorFormat vform,
6871                                 LogicVRegister dst,
6872                                 const LogicVRegister& src,
6873                                 int fbits,
6874                                 FPRounding round) {
6875   return scvtf(vform,
6876                LaneSizeInBitsFromFormat(vform),
6877                LaneSizeInBitsFromFormat(vform),
6878                dst,
6879                GetPTrue(),
6880                src,
6881                round,
6882                fbits);
6883 }
6884 
ucvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6885 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6886                                 unsigned dst_data_size_in_bits,
6887                                 unsigned src_data_size_in_bits,
6888                                 LogicVRegister dst,
6889                                 const LogicPRegister& pg,
6890                                 const LogicVRegister& src,
6891                                 FPRounding round,
6892                                 int fbits) {
6893   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6894   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6895   dst.ClearForWrite(vform);
6896 
6897   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6898     if (!pg.IsActive(vform, i)) continue;
6899 
6900     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6901                                                0,
6902                                                src.Uint(vform, i));
6903 
6904     switch (dst_data_size_in_bits) {
6905       case kHRegSize: {
6906         SimFloat16 result = UFixedToFloat16(value, fbits, round);
6907         dst.SetUint(vform, i, Float16ToRawbits(result));
6908         break;
6909       }
6910       case kSRegSize: {
6911         float result = UFixedToFloat(value, fbits, round);
6912         dst.SetUint(vform, i, FloatToRawbits(result));
6913         break;
6914       }
6915       case kDRegSize: {
6916         double result = UFixedToDouble(value, fbits, round);
6917         dst.SetUint(vform, i, DoubleToRawbits(result));
6918         break;
6919       }
6920       default:
6921         VIXL_UNIMPLEMENTED();
6922         break;
6923     }
6924   }
6925 
6926   return dst;
6927 }
6928 
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6929 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6930                                 LogicVRegister dst,
6931                                 const LogicVRegister& src,
6932                                 int fbits,
6933                                 FPRounding round) {
6934   return ucvtf(vform,
6935                LaneSizeInBitsFromFormat(vform),
6936                LaneSizeInBitsFromFormat(vform),
6937                dst,
6938                GetPTrue(),
6939                src,
6940                round,
6941                fbits);
6942 }
6943 
unpk(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,UnpackType unpack_type,ExtendType extend_type)6944 LogicVRegister Simulator::unpk(VectorFormat vform,
6945                                LogicVRegister dst,
6946                                const LogicVRegister& src,
6947                                UnpackType unpack_type,
6948                                ExtendType extend_type) {
6949   VectorFormat vform_half = VectorFormatHalfWidth(vform);
6950   const int lane_count = LaneCountFromFormat(vform);
6951   const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
6952 
6953   switch (extend_type) {
6954     case kSignedExtend: {
6955       int64_t result[kZRegMaxSizeInBytes];
6956       for (int i = 0; i < lane_count; ++i) {
6957         result[i] = src.Int(vform_half, i + src_start_lane);
6958       }
6959       for (int i = 0; i < lane_count; ++i) {
6960         dst.SetInt(vform, i, result[i]);
6961       }
6962       break;
6963     }
6964     case kUnsignedExtend: {
6965       uint64_t result[kZRegMaxSizeInBytes];
6966       for (int i = 0; i < lane_count; ++i) {
6967         result[i] = src.Uint(vform_half, i + src_start_lane);
6968       }
6969       for (int i = 0; i < lane_count; ++i) {
6970         dst.SetUint(vform, i, result[i]);
6971       }
6972       break;
6973     }
6974     default:
6975       VIXL_UNREACHABLE();
6976   }
6977   return dst;
6978 }
6979 
SVEIntCompareVectorsHelper(Condition cond,VectorFormat vform,LogicPRegister dst,const LogicPRegister & mask,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements,FlagsUpdate flags)6980 LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
6981                                                      VectorFormat vform,
6982                                                      LogicPRegister dst,
6983                                                      const LogicPRegister& mask,
6984                                                      const LogicVRegister& src1,
6985                                                      const LogicVRegister& src2,
6986                                                      bool is_wide_elements,
6987                                                      FlagsUpdate flags) {
6988   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
6989     bool result = false;
6990     if (mask.IsActive(vform, lane)) {
6991       int64_t op1 = 0xbadbeef;
6992       int64_t op2 = 0xbadbeef;
6993       int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
6994       switch (cond) {
6995         case eq:
6996         case ge:
6997         case gt:
6998         case lt:
6999         case le:
7000         case ne:
7001           op1 = src1.Int(vform, lane);
7002           op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
7003                                  : src2.Int(vform, lane);
7004           break;
7005         case hi:
7006         case hs:
7007         case ls:
7008         case lo:
7009           op1 = src1.Uint(vform, lane);
7010           op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
7011                                  : src2.Uint(vform, lane);
7012           break;
7013         default:
7014           VIXL_UNREACHABLE();
7015       }
7016 
7017       switch (cond) {
7018         case eq:
7019           result = (op1 == op2);
7020           break;
7021         case ne:
7022           result = (op1 != op2);
7023           break;
7024         case ge:
7025           result = (op1 >= op2);
7026           break;
7027         case gt:
7028           result = (op1 > op2);
7029           break;
7030         case le:
7031           result = (op1 <= op2);
7032           break;
7033         case lt:
7034           result = (op1 < op2);
7035           break;
7036         case hs:
7037           result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
7038           break;
7039         case hi:
7040           result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
7041           break;
7042         case ls:
7043           result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
7044           break;
7045         case lo:
7046           result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
7047           break;
7048         default:
7049           VIXL_UNREACHABLE();
7050       }
7051     }
7052     dst.SetActive(vform, lane, result);
7053   }
7054 
7055   if (flags == SetFlags) PredTest(vform, mask, dst);
7056 
7057   return dst;
7058 }
7059 
SVEBitwiseShiftHelper(Shift shift_op,VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements)7060 LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
7061                                                 VectorFormat vform,
7062                                                 LogicVRegister dst,
7063                                                 const LogicVRegister& src1,
7064                                                 const LogicVRegister& src2,
7065                                                 bool is_wide_elements) {
7066   unsigned lane_size = LaneSizeInBitsFromFormat(vform);
7067   VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
7068 
7069   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7070     int shift_src_lane = lane;
7071     if (is_wide_elements) {
7072       // If the shift amount comes from wide elements, select the D-sized lane
7073       // which occupies the corresponding lanes of the value to be shifted.
7074       shift_src_lane = (lane * lane_size) / kDRegSize;
7075     }
7076     uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
7077 
7078     // Saturate shift_amount to the size of the lane that will be shifted.
7079     if (shift_amount > lane_size) shift_amount = lane_size;
7080 
7081     uint64_t value = src1.Uint(vform, lane);
7082     int64_t result = ShiftOperand(lane_size,
7083                                   value,
7084                                   shift_op,
7085                                   static_cast<unsigned>(shift_amount));
7086     dst.SetUint(vform, lane, result);
7087   }
7088 
7089   return dst;
7090 }
7091 
asrd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int shift)7092 LogicVRegister Simulator::asrd(VectorFormat vform,
7093                                LogicVRegister dst,
7094                                const LogicVRegister& src1,
7095                                int shift) {
7096   VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
7097                               LaneSizeInBitsFromFormat(vform)));
7098 
7099   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7100     int64_t value = src1.Int(vform, i);
7101     if (shift <= 63) {
7102       if (value < 0) {
7103         // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
7104         // cast to int64_t, and cannot cause signed overflow in the result.
7105         value = value + GetUintMask(shift);
7106       }
7107       value = ShiftOperand(kDRegSize, value, ASR, shift);
7108     } else {
7109       value = 0;
7110     }
7111     dst.SetInt(vform, i, value);
7112   }
7113   return dst;
7114 }
7115 
SVEBitwiseLogicalUnpredicatedHelper(LogicalOp logical_op,VectorFormat vform,LogicVRegister zd,const LogicVRegister & zn,const LogicVRegister & zm)7116 LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
7117     LogicalOp logical_op,
7118     VectorFormat vform,
7119     LogicVRegister zd,
7120     const LogicVRegister& zn,
7121     const LogicVRegister& zm) {
7122   VIXL_ASSERT(IsSVEFormat(vform));
7123   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7124     uint64_t op1 = zn.Uint(vform, i);
7125     uint64_t op2 = zm.Uint(vform, i);
7126     uint64_t result = 0;
7127     switch (logical_op) {
7128       case AND:
7129         result = op1 & op2;
7130         break;
7131       case BIC:
7132         result = op1 & ~op2;
7133         break;
7134       case EOR:
7135         result = op1 ^ op2;
7136         break;
7137       case ORR:
7138         result = op1 | op2;
7139         break;
7140       default:
7141         VIXL_UNIMPLEMENTED();
7142     }
7143     zd.SetUint(vform, i, result);
7144   }
7145 
7146   return zd;
7147 }
7148 
SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,LogicPRegister pd,const LogicPRegister & pn,const LogicPRegister & pm)7149 LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
7150                                                     LogicPRegister pd,
7151                                                     const LogicPRegister& pn,
7152                                                     const LogicPRegister& pm) {
7153   for (int i = 0; i < pn.GetChunkCount(); i++) {
7154     LogicPRegister::ChunkType op1 = pn.GetChunk(i);
7155     LogicPRegister::ChunkType op2 = pm.GetChunk(i);
7156     LogicPRegister::ChunkType result = 0;
7157     switch (op) {
7158       case ANDS_p_p_pp_z:
7159       case AND_p_p_pp_z:
7160         result = op1 & op2;
7161         break;
7162       case BICS_p_p_pp_z:
7163       case BIC_p_p_pp_z:
7164         result = op1 & ~op2;
7165         break;
7166       case EORS_p_p_pp_z:
7167       case EOR_p_p_pp_z:
7168         result = op1 ^ op2;
7169         break;
7170       case NANDS_p_p_pp_z:
7171       case NAND_p_p_pp_z:
7172         result = ~(op1 & op2);
7173         break;
7174       case NORS_p_p_pp_z:
7175       case NOR_p_p_pp_z:
7176         result = ~(op1 | op2);
7177         break;
7178       case ORNS_p_p_pp_z:
7179       case ORN_p_p_pp_z:
7180         result = op1 | ~op2;
7181         break;
7182       case ORRS_p_p_pp_z:
7183       case ORR_p_p_pp_z:
7184         result = op1 | op2;
7185         break;
7186       default:
7187         VIXL_UNIMPLEMENTED();
7188     }
7189     pd.SetChunk(i, result);
7190   }
7191   return pd;
7192 }
7193 
SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,VectorFormat vform,LogicVRegister zd,uint64_t imm)7194 LogicVRegister Simulator::SVEBitwiseImmHelper(
7195     SVEBitwiseLogicalWithImm_UnpredicatedOp op,
7196     VectorFormat vform,
7197     LogicVRegister zd,
7198     uint64_t imm) {
7199   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7200     uint64_t op1 = zd.Uint(vform, i);
7201     uint64_t result = 0;
7202     switch (op) {
7203       case AND_z_zi:
7204         result = op1 & imm;
7205         break;
7206       case EOR_z_zi:
7207         result = op1 ^ imm;
7208         break;
7209       case ORR_z_zi:
7210         result = op1 | imm;
7211         break;
7212       default:
7213         VIXL_UNIMPLEMENTED();
7214     }
7215     zd.SetUint(vform, i, result);
7216   }
7217 
7218   return zd;
7219 }
7220 
SVEStructuredStoreHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr)7221 void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
7222                                          const LogicPRegister& pg,
7223                                          unsigned zt_code,
7224                                          const LogicSVEAddressVector& addr) {
7225   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7226 
7227   int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7228   int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7229   int msize_in_bytes = addr.GetMsizeInBytes();
7230   int reg_count = addr.GetRegCount();
7231 
7232   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7233   VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7234 
7235   unsigned zt_codes[4] = {zt_code,
7236                           (zt_code + 1) % kNumberOfZRegisters,
7237                           (zt_code + 2) % kNumberOfZRegisters,
7238                           (zt_code + 3) % kNumberOfZRegisters};
7239 
7240   LogicVRegister zt[4] = {
7241       ReadVRegister(zt_codes[0]),
7242       ReadVRegister(zt_codes[1]),
7243       ReadVRegister(zt_codes[2]),
7244       ReadVRegister(zt_codes[3]),
7245   };
7246 
7247   // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7248   // are ignored, so read the source register using the VectorFormat that
7249   // corresponds with the storage format, and multiply the index accordingly.
7250   VectorFormat unpack_vform =
7251       SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7252   int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7253 
7254   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7255     if (!pg.IsActive(vform, i)) continue;
7256 
7257     for (int r = 0; r < reg_count; r++) {
7258       uint64_t element_address = addr.GetElementAddress(i, r);
7259       StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address);
7260     }
7261   }
7262 
7263   if (ShouldTraceWrites()) {
7264     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7265     if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7266       // Use an FP format where it's likely that we're accessing FP data.
7267       format = GetPrintRegisterFormatTryFP(format);
7268     }
7269     // Stores don't represent a change to the source register's value, so only
7270     // print the relevant part of the value.
7271     format = GetPrintRegPartial(format);
7272 
7273     PrintZStructAccess(zt_code,
7274                        reg_count,
7275                        pg,
7276                        format,
7277                        msize_in_bytes,
7278                        "->",
7279                        addr);
7280   }
7281 }
7282 
SVEStructuredLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,bool is_signed)7283 void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7284                                         const LogicPRegister& pg,
7285                                         unsigned zt_code,
7286                                         const LogicSVEAddressVector& addr,
7287                                         bool is_signed) {
7288   int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7289   int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7290   int msize_in_bytes = addr.GetMsizeInBytes();
7291   int reg_count = addr.GetRegCount();
7292 
7293   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7294   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7295   VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7296 
7297   unsigned zt_codes[4] = {zt_code,
7298                           (zt_code + 1) % kNumberOfZRegisters,
7299                           (zt_code + 2) % kNumberOfZRegisters,
7300                           (zt_code + 3) % kNumberOfZRegisters};
7301   LogicVRegister zt[4] = {
7302       ReadVRegister(zt_codes[0]),
7303       ReadVRegister(zt_codes[1]),
7304       ReadVRegister(zt_codes[2]),
7305       ReadVRegister(zt_codes[3]),
7306   };
7307 
7308   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7309     for (int r = 0; r < reg_count; r++) {
7310       uint64_t element_address = addr.GetElementAddress(i, r);
7311 
7312       if (!pg.IsActive(vform, i)) {
7313         zt[r].SetUint(vform, i, 0);
7314         continue;
7315       }
7316 
7317       if (is_signed) {
7318         LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address);
7319       } else {
7320         LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address);
7321       }
7322     }
7323   }
7324 
7325   if (ShouldTraceVRegs()) {
7326     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7327     if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7328       // Use an FP format where it's likely that we're accessing FP data.
7329       format = GetPrintRegisterFormatTryFP(format);
7330     }
7331     PrintZStructAccess(zt_code,
7332                        reg_count,
7333                        pg,
7334                        format,
7335                        msize_in_bytes,
7336                        "<-",
7337                        addr);
7338   }
7339 }
7340 
brka(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7341 LogicPRegister Simulator::brka(LogicPRegister pd,
7342                                const LogicPRegister& pg,
7343                                const LogicPRegister& pn) {
7344   bool break_ = false;
7345   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7346     if (pg.IsActive(kFormatVnB, i)) {
7347       pd.SetActive(kFormatVnB, i, !break_);
7348       break_ |= pn.IsActive(kFormatVnB, i);
7349     }
7350   }
7351 
7352   return pd;
7353 }
7354 
brkb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7355 LogicPRegister Simulator::brkb(LogicPRegister pd,
7356                                const LogicPRegister& pg,
7357                                const LogicPRegister& pn) {
7358   bool break_ = false;
7359   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7360     if (pg.IsActive(kFormatVnB, i)) {
7361       break_ |= pn.IsActive(kFormatVnB, i);
7362       pd.SetActive(kFormatVnB, i, !break_);
7363     }
7364   }
7365 
7366   return pd;
7367 }
7368 
brkn(LogicPRegister pdm,const LogicPRegister & pg,const LogicPRegister & pn)7369 LogicPRegister Simulator::brkn(LogicPRegister pdm,
7370                                const LogicPRegister& pg,
7371                                const LogicPRegister& pn) {
7372   if (!IsLastActive(kFormatVnB, pg, pn)) {
7373     pfalse(pdm);
7374   }
7375   return pdm;
7376 }
7377 
brkpa(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7378 LogicPRegister Simulator::brkpa(LogicPRegister pd,
7379                                 const LogicPRegister& pg,
7380                                 const LogicPRegister& pn,
7381                                 const LogicPRegister& pm) {
7382   bool last_active = IsLastActive(kFormatVnB, pg, pn);
7383 
7384   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7385     bool active = false;
7386     if (pg.IsActive(kFormatVnB, i)) {
7387       active = last_active;
7388       last_active = last_active && !pm.IsActive(kFormatVnB, i);
7389     }
7390     pd.SetActive(kFormatVnB, i, active);
7391   }
7392 
7393   return pd;
7394 }
7395 
brkpb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7396 LogicPRegister Simulator::brkpb(LogicPRegister pd,
7397                                 const LogicPRegister& pg,
7398                                 const LogicPRegister& pn,
7399                                 const LogicPRegister& pm) {
7400   bool last_active = IsLastActive(kFormatVnB, pg, pn);
7401 
7402   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7403     bool active = false;
7404     if (pg.IsActive(kFormatVnB, i)) {
7405       last_active = last_active && !pm.IsActive(kFormatVnB, i);
7406       active = last_active;
7407     }
7408     pd.SetActive(kFormatVnB, i, active);
7409   }
7410 
7411   return pd;
7412 }
7413 
SVEFaultTolerantLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,SVEFaultTolerantLoadType type,bool is_signed)7414 void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7415                                            const LogicPRegister& pg,
7416                                            unsigned zt_code,
7417                                            const LogicSVEAddressVector& addr,
7418                                            SVEFaultTolerantLoadType type,
7419                                            bool is_signed) {
7420   int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7421   int msize_in_bits = addr.GetMsizeInBits();
7422   int msize_in_bytes = addr.GetMsizeInBytes();
7423 
7424   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7425   VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7426   VIXL_ASSERT(addr.GetRegCount() == 1);
7427 
7428   LogicVRegister zt = ReadVRegister(zt_code);
7429   LogicPRegister ffr = ReadFFR();
7430 
7431   // Non-faulting loads are allowed to fail arbitrarily. To stress user
7432   // code, fail a random element in roughly one in eight full-vector loads.
7433   uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
7434   int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7435 
7436   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7437     uint64_t value = 0;
7438 
7439     if (pg.IsActive(vform, i)) {
7440       uint64_t element_address = addr.GetElementAddress(i, 0);
7441 
7442       if (type == kSVEFirstFaultLoad) {
7443         // First-faulting loads always load the first active element, regardless
7444         // of FFR. The result will be discarded if its FFR lane is inactive, but
7445         // it could still generate a fault.
7446         value = MemReadUint(msize_in_bytes, element_address);
7447         // All subsequent elements have non-fault semantics.
7448         type = kSVENonFaultLoad;
7449 
7450       } else if (ffr.IsActive(vform, i)) {
7451         // Simulation of fault-tolerant loads relies on system calls, and is
7452         // likely to be relatively slow, so we only actually perform the load if
7453         // its FFR lane is active.
7454 
7455         bool can_read = (i < fake_fault_at_lane) &&
7456                         CanReadMemory(element_address, msize_in_bytes);
7457         if (can_read) {
7458           value = MemReadUint(msize_in_bytes, element_address);
7459         } else {
7460           // Propagate the fault to the end of FFR.
7461           for (int j = i; j < LaneCountFromFormat(vform); j++) {
7462             ffr.SetActive(vform, j, false);
7463           }
7464         }
7465       }
7466     }
7467 
7468     // The architecture permits a few possible results for inactive FFR lanes
7469     // (including those caused by a fault in this instruction). We choose to
7470     // leave the register value unchanged (like merging predication) because
7471     // no other input to this instruction can have the same behaviour.
7472     //
7473     // Note that this behaviour takes precedence over pg's zeroing predication.
7474 
7475     if (ffr.IsActive(vform, i)) {
7476       int msb = msize_in_bits - 1;
7477       if (is_signed) {
7478         zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7479       } else {
7480         zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7481       }
7482     }
7483   }
7484 
7485   if (ShouldTraceVRegs()) {
7486     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7487     if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7488       // Use an FP format where it's likely that we're accessing FP data.
7489       format = GetPrintRegisterFormatTryFP(format);
7490     }
7491     // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7492     // expects a single mask, so combine the two predicates.
7493     SimPRegister mask;
7494     SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7495     PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7496   }
7497 }
7498 
SVEGatherLoadScalarPlusVectorHelper(const Instruction * instr,VectorFormat vform,SVEOffsetModifier mod)7499 void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7500                                                     VectorFormat vform,
7501                                                     SVEOffsetModifier mod) {
7502   bool is_signed = instr->ExtractBit(14) == 0;
7503   bool is_ff = instr->ExtractBit(13) == 1;
7504   // Note that these instructions don't use the Dtype encoding.
7505   int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7506   int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7507   uint64_t base = ReadXRegister(instr->GetRn());
7508   LogicSVEAddressVector addr(base,
7509                              &ReadVRegister(instr->GetRm()),
7510                              vform,
7511                              mod,
7512                              scale);
7513   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7514   if (is_ff) {
7515     SVEFaultTolerantLoadHelper(vform,
7516                                ReadPRegister(instr->GetPgLow8()),
7517                                instr->GetRt(),
7518                                addr,
7519                                kSVEFirstFaultLoad,
7520                                is_signed);
7521   } else {
7522     SVEStructuredLoadHelper(vform,
7523                             ReadPRegister(instr->GetPgLow8()),
7524                             instr->GetRt(),
7525                             addr,
7526                             is_signed);
7527   }
7528 }
7529 
GetFirstActive(VectorFormat vform,const LogicPRegister & pg) const7530 int Simulator::GetFirstActive(VectorFormat vform,
7531                               const LogicPRegister& pg) const {
7532   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7533     if (pg.IsActive(vform, i)) return i;
7534   }
7535   return -1;
7536 }
7537 
GetLastActive(VectorFormat vform,const LogicPRegister & pg) const7538 int Simulator::GetLastActive(VectorFormat vform,
7539                              const LogicPRegister& pg) const {
7540   for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7541     if (pg.IsActive(vform, i)) return i;
7542   }
7543   return -1;
7544 }
7545 
CountActiveLanes(VectorFormat vform,const LogicPRegister & pg) const7546 int Simulator::CountActiveLanes(VectorFormat vform,
7547                                 const LogicPRegister& pg) const {
7548   int count = 0;
7549   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7550     count += pg.IsActive(vform, i) ? 1 : 0;
7551   }
7552   return count;
7553 }
7554 
CountActiveAndTrueLanes(VectorFormat vform,const LogicPRegister & pg,const LogicPRegister & pn) const7555 int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7556                                        const LogicPRegister& pg,
7557                                        const LogicPRegister& pn) const {
7558   int count = 0;
7559   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7560     count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7561   }
7562   return count;
7563 }
7564 
GetPredicateConstraintLaneCount(VectorFormat vform,int pattern) const7565 int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7566                                                int pattern) const {
7567   VIXL_ASSERT(IsSVEFormat(vform));
7568   int all = LaneCountFromFormat(vform);
7569   VIXL_ASSERT(all > 0);
7570 
7571   switch (pattern) {
7572     case SVE_VL1:
7573     case SVE_VL2:
7574     case SVE_VL3:
7575     case SVE_VL4:
7576     case SVE_VL5:
7577     case SVE_VL6:
7578     case SVE_VL7:
7579     case SVE_VL8:
7580       // VL1-VL8 are encoded directly.
7581       VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7582       VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7583       return (pattern <= all) ? pattern : 0;
7584     case SVE_VL16:
7585     case SVE_VL32:
7586     case SVE_VL64:
7587     case SVE_VL128:
7588     case SVE_VL256: {
7589       // VL16-VL256 are encoded as log2(N) + c.
7590       int min = 16 << (pattern - SVE_VL16);
7591       return (min <= all) ? min : 0;
7592     }
7593     // Special cases.
7594     case SVE_POW2:
7595       return 1 << HighestSetBitPosition(all);
7596     case SVE_MUL4:
7597       return all - (all % 4);
7598     case SVE_MUL3:
7599       return all - (all % 3);
7600     case SVE_ALL:
7601       return all;
7602   }
7603   // Unnamed cases architecturally return 0.
7604   return 0;
7605 }
7606 
match(VectorFormat vform,LogicPRegister dst,const LogicVRegister & haystack,const LogicVRegister & needles,bool negate_match)7607 LogicPRegister Simulator::match(VectorFormat vform,
7608                                 LogicPRegister dst,
7609                                 const LogicVRegister& haystack,
7610                                 const LogicVRegister& needles,
7611                                 bool negate_match) {
7612   SimVRegister ztemp;
7613   SimPRegister ptemp;
7614 
7615   pfalse(dst);
7616   int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
7617   for (int i = 0; i < lanes_per_segment; i++) {
7618     dup_elements_to_segments(vform, ztemp, needles, i);
7619     SVEIntCompareVectorsHelper(eq,
7620                                vform,
7621                                ptemp,
7622                                GetPTrue(),
7623                                haystack,
7624                                ztemp,
7625                                false,
7626                                LeaveFlags);
7627     SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);
7628   }
7629   if (negate_match) {
7630     ptrue(vform, ptemp, SVE_ALL);
7631     SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);
7632   }
7633   return dst;
7634 }
7635 
GetStructAddress(int lane) const7636 uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7637   if (IsContiguous()) {
7638     return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7639   }
7640 
7641   VIXL_ASSERT(IsScatterGather());
7642   VIXL_ASSERT(vector_ != NULL);
7643 
7644   // For scatter-gather accesses, we need to extract the offset from vector_,
7645   // and apply modifiers.
7646 
7647   uint64_t offset = 0;
7648   switch (vector_form_) {
7649     case kFormatVnS:
7650       offset = vector_->GetLane<uint32_t>(lane);
7651       break;
7652     case kFormatVnD:
7653       offset = vector_->GetLane<uint64_t>(lane);
7654       break;
7655     default:
7656       VIXL_UNIMPLEMENTED();
7657       break;
7658   }
7659 
7660   switch (vector_mod_) {
7661     case SVE_MUL_VL:
7662       VIXL_UNIMPLEMENTED();
7663       break;
7664     case SVE_LSL:
7665       // We apply the shift below. There's nothing to do here.
7666       break;
7667     case NO_SVE_OFFSET_MODIFIER:
7668       VIXL_ASSERT(vector_shift_ == 0);
7669       break;
7670     case SVE_UXTW:
7671       offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7672       break;
7673     case SVE_SXTW:
7674       offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7675       break;
7676   }
7677 
7678   return base_ + (offset << vector_shift_);
7679 }
7680 
pack_odd_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7681 LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,
7682                                             LogicVRegister dst,
7683                                             const LogicVRegister& src) {
7684   SimVRegister zero;
7685   zero.Clear();
7686   return uzp2(vform, dst, src, zero);
7687 }
7688 
pack_even_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7689 LogicVRegister Simulator::pack_even_elements(VectorFormat vform,
7690                                              LogicVRegister dst,
7691                                              const LogicVRegister& src) {
7692   SimVRegister zero;
7693   zero.Clear();
7694   return uzp1(vform, dst, src, zero);
7695 }
7696 
adcl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool top)7697 LogicVRegister Simulator::adcl(VectorFormat vform,
7698                                LogicVRegister dst,
7699                                const LogicVRegister& src1,
7700                                const LogicVRegister& src2,
7701                                bool top) {
7702   unsigned reg_size = LaneSizeInBitsFromFormat(vform);
7703   VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));
7704 
7705   for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
7706     uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));
7707     uint64_t right = dst.Uint(vform, i);
7708     unsigned carry_in = src2.Uint(vform, i + 1) & 1;
7709     std::pair<uint64_t, uint8_t> val_and_flags =
7710         AddWithCarry(reg_size, left, right, carry_in);
7711 
7712     // Set even lanes to the result of the addition.
7713     dst.SetUint(vform, i, val_and_flags.first);
7714 
7715     // Set odd lanes to the carry flag from the addition.
7716     uint64_t carry_out = (val_and_flags.second >> 1) & 1;
7717     dst.SetUint(vform, i + 1, carry_out);
7718   }
7719   return dst;
7720 }
7721 
7722 // Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add
7723 // the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.
7724 //
7725 // Matrices of the form:
7726 //
7727 //  src1 = ( a b c d e f g h )  src2 = ( A B )
7728 //         ( i j k l m n o p )         ( C D )
7729 //                                     ( E F )
7730 //                                     ( G H )
7731 //                                     ( I J )
7732 //                                     ( K L )
7733 //                                     ( M N )
7734 //                                     ( O P )
7735 //
7736 // Are stored in the input vector registers as:
7737 //
7738 //           15  14  13  12  11  10  9   8   7   6   5   4   3   2   1   0
7739 //  src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]
7740 //  src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]
7741 //
matmul(VectorFormat vform_dst,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,bool src1_signed,bool src2_signed)7742 LogicVRegister Simulator::matmul(VectorFormat vform_dst,
7743                                  LogicVRegister srcdst,
7744                                  const LogicVRegister& src1,
7745                                  const LogicVRegister& src2,
7746                                  bool src1_signed,
7747                                  bool src2_signed) {
7748   // Two destination forms are supported: Q register containing four S-sized
7749   // elements (4S) and Z register containing n S-sized elements (VnS).
7750   VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));
7751   VectorFormat vform_src = kFormatVnB;
7752   int b_per_segment = kQRegSize / kBRegSize;
7753   int s_per_segment = kQRegSize / kSRegSize;
7754   int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};
7755   int segment_count = LaneCountFromFormat(vform_dst) / 4;
7756   for (int seg = 0; seg < segment_count; seg++) {
7757     for (int i = 0; i < 2; i++) {
7758       for (int j = 0; j < 2; j++) {
7759         int dstidx = (2 * i) + j + (seg * s_per_segment);
7760         int64_t sum = srcdst.Int(vform_dst, dstidx);
7761         for (int k = 0; k < 8; k++) {
7762           int idx1 = (8 * i) + k + (seg * b_per_segment);
7763           int idx2 = (8 * j) + k + (seg * b_per_segment);
7764           int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)
7765                                    : src1.Uint(vform_src, idx1);
7766           int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)
7767                                    : src2.Uint(vform_src, idx2);
7768           sum += e1 * e2;
7769         }
7770         result[dstidx] = sum;
7771       }
7772     }
7773   }
7774   srcdst.SetIntArray(vform_dst, result);
7775   return srcdst;
7776 }
7777 
7778 // Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2
7779 // result to the matrix in srcdst, and write back to srcdst.
7780 //
7781 // Matrices of the form:
7782 //
7783 //  src1 = ( a b )  src2 = ( A B )
7784 //         ( c d )         ( C D )
7785 //
7786 // Are stored in the input vector registers as:
7787 //
7788 //           3   2   1   0
7789 //  src1 = [ d | c | b | a ]
7790 //  src2 = [ D | B | C | A ]
7791 //
7792 template <typename T>
fmatmul(VectorFormat vform,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)7793 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7794                                   LogicVRegister srcdst,
7795                                   const LogicVRegister& src1,
7796                                   const LogicVRegister& src2) {
7797   T result[kZRegMaxSizeInBytes / sizeof(T)];
7798   int T_per_segment = 4;
7799   int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
7800   for (int seg = 0; seg < segment_count; seg++) {
7801     int segoff = seg * T_per_segment;
7802     for (int i = 0; i < 2; i++) {
7803       for (int j = 0; j < 2; j++) {
7804         T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),
7805                             src2.Float<T>(2 * j + 0 + segoff));
7806         T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),
7807                             src2.Float<T>(2 * j + 1 + segoff));
7808         T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);
7809         result[2 * i + j + segoff] = FPAdd(sum, prod1);
7810       }
7811     }
7812   }
7813   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7814     // Elements outside a multiple of 4T are set to zero. This happens only
7815     // for double precision operations, when the VL is a multiple of 128 bits,
7816     // but not a mutiple of 256 bits.
7817     T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
7818     srcdst.SetFloat<T>(vform, i, value);
7819   }
7820   return srcdst;
7821 }
7822 
fmatmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)7823 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7824                                   LogicVRegister dst,
7825                                   const LogicVRegister& src1,
7826                                   const LogicVRegister& src2) {
7827   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
7828     fmatmul<float>(vform, dst, src1, src2);
7829   } else {
7830     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
7831     fmatmul<double>(vform, dst, src1, src2);
7832   }
7833   return dst;
7834 }
7835 
7836 }  // namespace aarch64
7837 }  // namespace vixl
7838 
7839 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
7840