• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28 
29 #include <cmath>
30 
31 #include "simulator-aarch64.h"
32 
33 namespace vixl {
34 namespace aarch64 {
35 
36 using vixl::internal::SimFloat16;
37 
38 template <typename T>
IsFloat64()39 bool IsFloat64() {
40   return false;
41 }
42 template <>
IsFloat64()43 bool IsFloat64<double>() {
44   return true;
45 }
46 
47 template <typename T>
IsFloat32()48 bool IsFloat32() {
49   return false;
50 }
51 template <>
IsFloat32()52 bool IsFloat32<float>() {
53   return true;
54 }
55 
56 template <typename T>
IsFloat16()57 bool IsFloat16() {
58   return false;
59 }
60 template <>
IsFloat16()61 bool IsFloat16<Float16>() {
62   return true;
63 }
64 template <>
IsFloat16()65 bool IsFloat16<SimFloat16>() {
66   return true;
67 }
68 
69 template <>
FPDefaultNaN()70 double Simulator::FPDefaultNaN<double>() {
71   return kFP64DefaultNaN;
72 }
73 
74 
75 template <>
FPDefaultNaN()76 float Simulator::FPDefaultNaN<float>() {
77   return kFP32DefaultNaN;
78 }
79 
80 
81 template <>
FPDefaultNaN()82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83   return SimFloat16(kFP16DefaultNaN);
84 }
85 
86 
FixedToDouble(int64_t src,int fbits,FPRounding round)87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88   if (src >= 0) {
89     return UFixedToDouble(src, fbits, round);
90   } else if (src == INT64_MIN) {
91     return -UFixedToDouble(src, fbits, round);
92   } else {
93     return -UFixedToDouble(-src, fbits, round);
94   }
95 }
96 
97 
UFixedToDouble(uint64_t src,int fbits,FPRounding round)98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99   // An input of 0 is a special case because the result is effectively
100   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101   if (src == 0) {
102     return 0.0;
103   }
104 
105   // Calculate the exponent. The highest significant bit will have the value
106   // 2^exponent.
107   const int highest_significant_bit = 63 - CountLeadingZeros(src);
108   const int64_t exponent = highest_significant_bit - fbits;
109 
110   return FPRoundToDouble(0, exponent, src, round);
111 }
112 
113 
FixedToFloat(int64_t src,int fbits,FPRounding round)114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115   if (src >= 0) {
116     return UFixedToFloat(src, fbits, round);
117   } else if (src == INT64_MIN) {
118     return -UFixedToFloat(src, fbits, round);
119   } else {
120     return -UFixedToFloat(-src, fbits, round);
121   }
122 }
123 
124 
UFixedToFloat(uint64_t src,int fbits,FPRounding round)125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126   // An input of 0 is a special case because the result is effectively
127   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128   if (src == 0) {
129     return 0.0f;
130   }
131 
132   // Calculate the exponent. The highest significant bit will have the value
133   // 2^exponent.
134   const int highest_significant_bit = 63 - CountLeadingZeros(src);
135   const int32_t exponent = highest_significant_bit - fbits;
136 
137   return FPRoundToFloat(0, exponent, src, round);
138 }
139 
140 
FixedToFloat16(int64_t src,int fbits,FPRounding round)141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142   if (src >= 0) {
143     return UFixedToFloat16(src, fbits, round);
144   } else if (src == INT64_MIN) {
145     return -UFixedToFloat16(src, fbits, round);
146   } else {
147     return -UFixedToFloat16(-src, fbits, round);
148   }
149 }
150 
151 
UFixedToFloat16(uint64_t src,int fbits,FPRounding round)152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153                                       int fbits,
154                                       FPRounding round) {
155   // An input of 0 is a special case because the result is effectively
156   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157   if (src == 0) {
158     return 0.0f;
159   }
160 
161   // Calculate the exponent. The highest significant bit will have the value
162   // 2^exponent.
163   const int highest_significant_bit = 63 - CountLeadingZeros(src);
164   const int16_t exponent = highest_significant_bit - fbits;
165 
166   return FPRoundToFloat16(0, exponent, src, round);
167 }
168 
169 
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)170 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
171   dst.ClearForWrite(vform);
172   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
173     LoadLane(dst, vform, i, addr);
174     addr += LaneSizeInBytesFromFormat(vform);
175   }
176 }
177 
178 
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)179 void Simulator::ld1(VectorFormat vform,
180                     LogicVRegister dst,
181                     int index,
182                     uint64_t addr) {
183   LoadLane(dst, vform, index, addr);
184 }
185 
186 
ld1r(VectorFormat vform,VectorFormat unpack_vform,LogicVRegister dst,uint64_t addr,bool is_signed)187 void Simulator::ld1r(VectorFormat vform,
188                      VectorFormat unpack_vform,
189                      LogicVRegister dst,
190                      uint64_t addr,
191                      bool is_signed) {
192   unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
193   dst.ClearForWrite(vform);
194   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
195     if (is_signed) {
196       LoadIntToLane(dst, vform, unpack_size, i, addr);
197     } else {
198       LoadUintToLane(dst, vform, unpack_size, i, addr);
199     }
200   }
201 }
202 
203 
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)204 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
205   ld1r(vform, vform, dst, addr);
206 }
207 
208 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)209 void Simulator::ld2(VectorFormat vform,
210                     LogicVRegister dst1,
211                     LogicVRegister dst2,
212                     uint64_t addr1) {
213   dst1.ClearForWrite(vform);
214   dst2.ClearForWrite(vform);
215   int esize = LaneSizeInBytesFromFormat(vform);
216   uint64_t addr2 = addr1 + esize;
217   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
218     LoadLane(dst1, vform, i, addr1);
219     LoadLane(dst2, vform, i, addr2);
220     addr1 += 2 * esize;
221     addr2 += 2 * esize;
222   }
223 }
224 
225 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)226 void Simulator::ld2(VectorFormat vform,
227                     LogicVRegister dst1,
228                     LogicVRegister dst2,
229                     int index,
230                     uint64_t addr1) {
231   dst1.ClearForWrite(vform);
232   dst2.ClearForWrite(vform);
233   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
234   LoadLane(dst1, vform, index, addr1);
235   LoadLane(dst2, vform, index, addr2);
236 }
237 
238 
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)239 void Simulator::ld2r(VectorFormat vform,
240                      LogicVRegister dst1,
241                      LogicVRegister dst2,
242                      uint64_t addr) {
243   dst1.ClearForWrite(vform);
244   dst2.ClearForWrite(vform);
245   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
246   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
247     LoadLane(dst1, vform, i, addr);
248     LoadLane(dst2, vform, i, addr2);
249   }
250 }
251 
252 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)253 void Simulator::ld3(VectorFormat vform,
254                     LogicVRegister dst1,
255                     LogicVRegister dst2,
256                     LogicVRegister dst3,
257                     uint64_t addr1) {
258   dst1.ClearForWrite(vform);
259   dst2.ClearForWrite(vform);
260   dst3.ClearForWrite(vform);
261   int esize = LaneSizeInBytesFromFormat(vform);
262   uint64_t addr2 = addr1 + esize;
263   uint64_t addr3 = addr2 + esize;
264   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
265     LoadLane(dst1, vform, i, addr1);
266     LoadLane(dst2, vform, i, addr2);
267     LoadLane(dst3, vform, i, addr3);
268     addr1 += 3 * esize;
269     addr2 += 3 * esize;
270     addr3 += 3 * esize;
271   }
272 }
273 
274 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)275 void Simulator::ld3(VectorFormat vform,
276                     LogicVRegister dst1,
277                     LogicVRegister dst2,
278                     LogicVRegister dst3,
279                     int index,
280                     uint64_t addr1) {
281   dst1.ClearForWrite(vform);
282   dst2.ClearForWrite(vform);
283   dst3.ClearForWrite(vform);
284   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
285   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
286   LoadLane(dst1, vform, index, addr1);
287   LoadLane(dst2, vform, index, addr2);
288   LoadLane(dst3, vform, index, addr3);
289 }
290 
291 
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)292 void Simulator::ld3r(VectorFormat vform,
293                      LogicVRegister dst1,
294                      LogicVRegister dst2,
295                      LogicVRegister dst3,
296                      uint64_t addr) {
297   dst1.ClearForWrite(vform);
298   dst2.ClearForWrite(vform);
299   dst3.ClearForWrite(vform);
300   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
301   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
302   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
303     LoadLane(dst1, vform, i, addr);
304     LoadLane(dst2, vform, i, addr2);
305     LoadLane(dst3, vform, i, addr3);
306   }
307 }
308 
309 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)310 void Simulator::ld4(VectorFormat vform,
311                     LogicVRegister dst1,
312                     LogicVRegister dst2,
313                     LogicVRegister dst3,
314                     LogicVRegister dst4,
315                     uint64_t addr1) {
316   dst1.ClearForWrite(vform);
317   dst2.ClearForWrite(vform);
318   dst3.ClearForWrite(vform);
319   dst4.ClearForWrite(vform);
320   int esize = LaneSizeInBytesFromFormat(vform);
321   uint64_t addr2 = addr1 + esize;
322   uint64_t addr3 = addr2 + esize;
323   uint64_t addr4 = addr3 + esize;
324   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
325     LoadLane(dst1, vform, i, addr1);
326     LoadLane(dst2, vform, i, addr2);
327     LoadLane(dst3, vform, i, addr3);
328     LoadLane(dst4, vform, i, addr4);
329     addr1 += 4 * esize;
330     addr2 += 4 * esize;
331     addr3 += 4 * esize;
332     addr4 += 4 * esize;
333   }
334 }
335 
336 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)337 void Simulator::ld4(VectorFormat vform,
338                     LogicVRegister dst1,
339                     LogicVRegister dst2,
340                     LogicVRegister dst3,
341                     LogicVRegister dst4,
342                     int index,
343                     uint64_t addr1) {
344   dst1.ClearForWrite(vform);
345   dst2.ClearForWrite(vform);
346   dst3.ClearForWrite(vform);
347   dst4.ClearForWrite(vform);
348   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
349   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
350   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
351   LoadLane(dst1, vform, index, addr1);
352   LoadLane(dst2, vform, index, addr2);
353   LoadLane(dst3, vform, index, addr3);
354   LoadLane(dst4, vform, index, addr4);
355 }
356 
357 
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)358 void Simulator::ld4r(VectorFormat vform,
359                      LogicVRegister dst1,
360                      LogicVRegister dst2,
361                      LogicVRegister dst3,
362                      LogicVRegister dst4,
363                      uint64_t addr) {
364   dst1.ClearForWrite(vform);
365   dst2.ClearForWrite(vform);
366   dst3.ClearForWrite(vform);
367   dst4.ClearForWrite(vform);
368   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
369   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
370   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
371   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
372     LoadLane(dst1, vform, i, addr);
373     LoadLane(dst2, vform, i, addr2);
374     LoadLane(dst3, vform, i, addr3);
375     LoadLane(dst4, vform, i, addr4);
376   }
377 }
378 
379 
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)380 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
381   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
382     StoreLane(src, vform, i, addr);
383     addr += LaneSizeInBytesFromFormat(vform);
384   }
385 }
386 
387 
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)388 void Simulator::st1(VectorFormat vform,
389                     LogicVRegister src,
390                     int index,
391                     uint64_t addr) {
392   StoreLane(src, vform, index, addr);
393 }
394 
395 
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,uint64_t addr)396 void Simulator::st2(VectorFormat vform,
397                     LogicVRegister src,
398                     LogicVRegister src2,
399                     uint64_t addr) {
400   int esize = LaneSizeInBytesFromFormat(vform);
401   uint64_t addr2 = addr + esize;
402   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
403     StoreLane(src, vform, i, addr);
404     StoreLane(src2, vform, i, addr2);
405     addr += 2 * esize;
406     addr2 += 2 * esize;
407   }
408 }
409 
410 
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,int index,uint64_t addr)411 void Simulator::st2(VectorFormat vform,
412                     LogicVRegister src,
413                     LogicVRegister src2,
414                     int index,
415                     uint64_t addr) {
416   int esize = LaneSizeInBytesFromFormat(vform);
417   StoreLane(src, vform, index, addr);
418   StoreLane(src2, vform, index, addr + 1 * esize);
419 }
420 
421 
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,uint64_t addr)422 void Simulator::st3(VectorFormat vform,
423                     LogicVRegister src,
424                     LogicVRegister src2,
425                     LogicVRegister src3,
426                     uint64_t addr) {
427   int esize = LaneSizeInBytesFromFormat(vform);
428   uint64_t addr2 = addr + esize;
429   uint64_t addr3 = addr2 + esize;
430   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
431     StoreLane(src, vform, i, addr);
432     StoreLane(src2, vform, i, addr2);
433     StoreLane(src3, vform, i, addr3);
434     addr += 3 * esize;
435     addr2 += 3 * esize;
436     addr3 += 3 * esize;
437   }
438 }
439 
440 
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,int index,uint64_t addr)441 void Simulator::st3(VectorFormat vform,
442                     LogicVRegister src,
443                     LogicVRegister src2,
444                     LogicVRegister src3,
445                     int index,
446                     uint64_t addr) {
447   int esize = LaneSizeInBytesFromFormat(vform);
448   StoreLane(src, vform, index, addr);
449   StoreLane(src2, vform, index, addr + 1 * esize);
450   StoreLane(src3, vform, index, addr + 2 * esize);
451 }
452 
453 
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,uint64_t addr)454 void Simulator::st4(VectorFormat vform,
455                     LogicVRegister src,
456                     LogicVRegister src2,
457                     LogicVRegister src3,
458                     LogicVRegister src4,
459                     uint64_t addr) {
460   int esize = LaneSizeInBytesFromFormat(vform);
461   uint64_t addr2 = addr + esize;
462   uint64_t addr3 = addr2 + esize;
463   uint64_t addr4 = addr3 + esize;
464   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
465     StoreLane(src, vform, i, addr);
466     StoreLane(src2, vform, i, addr2);
467     StoreLane(src3, vform, i, addr3);
468     StoreLane(src4, vform, i, addr4);
469     addr += 4 * esize;
470     addr2 += 4 * esize;
471     addr3 += 4 * esize;
472     addr4 += 4 * esize;
473   }
474 }
475 
476 
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,int index,uint64_t addr)477 void Simulator::st4(VectorFormat vform,
478                     LogicVRegister src,
479                     LogicVRegister src2,
480                     LogicVRegister src3,
481                     LogicVRegister src4,
482                     int index,
483                     uint64_t addr) {
484   int esize = LaneSizeInBytesFromFormat(vform);
485   StoreLane(src, vform, index, addr);
486   StoreLane(src2, vform, index, addr + 1 * esize);
487   StoreLane(src3, vform, index, addr + 2 * esize);
488   StoreLane(src4, vform, index, addr + 3 * esize);
489 }
490 
491 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)492 LogicVRegister Simulator::cmp(VectorFormat vform,
493                               LogicVRegister dst,
494                               const LogicVRegister& src1,
495                               const LogicVRegister& src2,
496                               Condition cond) {
497   dst.ClearForWrite(vform);
498   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
499     int64_t sa = src1.Int(vform, i);
500     int64_t sb = src2.Int(vform, i);
501     uint64_t ua = src1.Uint(vform, i);
502     uint64_t ub = src2.Uint(vform, i);
503     bool result = false;
504     switch (cond) {
505       case eq:
506         result = (ua == ub);
507         break;
508       case ge:
509         result = (sa >= sb);
510         break;
511       case gt:
512         result = (sa > sb);
513         break;
514       case hi:
515         result = (ua > ub);
516         break;
517       case hs:
518         result = (ua >= ub);
519         break;
520       case lt:
521         result = (sa < sb);
522         break;
523       case le:
524         result = (sa <= sb);
525         break;
526       default:
527         VIXL_UNREACHABLE();
528         break;
529     }
530     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
531   }
532   return dst;
533 }
534 
535 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)536 LogicVRegister Simulator::cmp(VectorFormat vform,
537                               LogicVRegister dst,
538                               const LogicVRegister& src1,
539                               int imm,
540                               Condition cond) {
541   SimVRegister temp;
542   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
543   return cmp(vform, dst, src1, imm_reg, cond);
544 }
545 
546 
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)547 LogicVRegister Simulator::cmptst(VectorFormat vform,
548                                  LogicVRegister dst,
549                                  const LogicVRegister& src1,
550                                  const LogicVRegister& src2) {
551   dst.ClearForWrite(vform);
552   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
553     uint64_t ua = src1.Uint(vform, i);
554     uint64_t ub = src2.Uint(vform, i);
555     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
556   }
557   return dst;
558 }
559 
560 
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)561 LogicVRegister Simulator::add(VectorFormat vform,
562                               LogicVRegister dst,
563                               const LogicVRegister& src1,
564                               const LogicVRegister& src2) {
565   int lane_size = LaneSizeInBitsFromFormat(vform);
566   dst.ClearForWrite(vform);
567 
568   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
569     // Test for unsigned saturation.
570     uint64_t ua = src1.UintLeftJustified(vform, i);
571     uint64_t ub = src2.UintLeftJustified(vform, i);
572     uint64_t ur = ua + ub;
573     if (ur < ua) {
574       dst.SetUnsignedSat(i, true);
575     }
576 
577     // Test for signed saturation.
578     bool pos_a = (ua >> 63) == 0;
579     bool pos_b = (ub >> 63) == 0;
580     bool pos_r = (ur >> 63) == 0;
581     // If the signs of the operands are the same, but different from the result,
582     // there was an overflow.
583     if ((pos_a == pos_b) && (pos_a != pos_r)) {
584       dst.SetSignedSat(i, pos_a);
585     }
586     dst.SetInt(vform, i, ur >> (64 - lane_size));
587   }
588   return dst;
589 }
590 
add_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)591 LogicVRegister Simulator::add_uint(VectorFormat vform,
592                                    LogicVRegister dst,
593                                    const LogicVRegister& src1,
594                                    uint64_t value) {
595   int lane_size = LaneSizeInBitsFromFormat(vform);
596   VIXL_ASSERT(IsUintN(lane_size, value));
597   dst.ClearForWrite(vform);
598   // Left-justify `value`.
599   uint64_t ub = value << (64 - lane_size);
600   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
601     // Test for unsigned saturation.
602     uint64_t ua = src1.UintLeftJustified(vform, i);
603     uint64_t ur = ua + ub;
604     if (ur < ua) {
605       dst.SetUnsignedSat(i, true);
606     }
607 
608     // Test for signed saturation.
609     // `value` is always positive, so we have an overflow if the (signed) result
610     // is smaller than the first operand.
611     if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
612       dst.SetSignedSat(i, true);
613     }
614 
615     dst.SetInt(vform, i, ur >> (64 - lane_size));
616   }
617   return dst;
618 }
619 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)620 LogicVRegister Simulator::addp(VectorFormat vform,
621                                LogicVRegister dst,
622                                const LogicVRegister& src1,
623                                const LogicVRegister& src2) {
624   SimVRegister temp1, temp2;
625   uzp1(vform, temp1, src1, src2);
626   uzp2(vform, temp2, src1, src2);
627   add(vform, dst, temp1, temp2);
628   if (IsSVEFormat(vform)) {
629     interleave_top_bottom(vform, dst, dst);
630   }
631   return dst;
632 }
633 
sdiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)634 LogicVRegister Simulator::sdiv(VectorFormat vform,
635                                LogicVRegister dst,
636                                const LogicVRegister& src1,
637                                const LogicVRegister& src2) {
638   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
639 
640   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
641     int64_t val1 = src1.Int(vform, i);
642     int64_t val2 = src2.Int(vform, i);
643     int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
644     int64_t quotient = 0;
645     if ((val1 == min_int) && (val2 == -1)) {
646       quotient = min_int;
647     } else if (val2 != 0) {
648       quotient = val1 / val2;
649     }
650     dst.SetInt(vform, i, quotient);
651   }
652 
653   return dst;
654 }
655 
udiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)656 LogicVRegister Simulator::udiv(VectorFormat vform,
657                                LogicVRegister dst,
658                                const LogicVRegister& src1,
659                                const LogicVRegister& src2) {
660   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
661 
662   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
663     uint64_t val1 = src1.Uint(vform, i);
664     uint64_t val2 = src2.Uint(vform, i);
665     uint64_t quotient = 0;
666     if (val2 != 0) {
667       quotient = val1 / val2;
668     }
669     dst.SetUint(vform, i, quotient);
670   }
671 
672   return dst;
673 }
674 
675 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)676 LogicVRegister Simulator::mla(VectorFormat vform,
677                               LogicVRegister dst,
678                               const LogicVRegister& srca,
679                               const LogicVRegister& src1,
680                               const LogicVRegister& src2) {
681   SimVRegister temp;
682   mul(vform, temp, src1, src2);
683   add(vform, dst, srca, temp);
684   return dst;
685 }
686 
687 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)688 LogicVRegister Simulator::mls(VectorFormat vform,
689                               LogicVRegister dst,
690                               const LogicVRegister& srca,
691                               const LogicVRegister& src1,
692                               const LogicVRegister& src2) {
693   SimVRegister temp;
694   mul(vform, temp, src1, src2);
695   sub(vform, dst, srca, temp);
696   return dst;
697 }
698 
699 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)700 LogicVRegister Simulator::mul(VectorFormat vform,
701                               LogicVRegister dst,
702                               const LogicVRegister& src1,
703                               const LogicVRegister& src2) {
704   dst.ClearForWrite(vform);
705 
706   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
707     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
708   }
709   return dst;
710 }
711 
712 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)713 LogicVRegister Simulator::mul(VectorFormat vform,
714                               LogicVRegister dst,
715                               const LogicVRegister& src1,
716                               const LogicVRegister& src2,
717                               int index) {
718   SimVRegister temp;
719   VectorFormat indexform = VectorFormatFillQ(vform);
720   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
721 }
722 
723 
smulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)724 LogicVRegister Simulator::smulh(VectorFormat vform,
725                                 LogicVRegister dst,
726                                 const LogicVRegister& src1,
727                                 const LogicVRegister& src2) {
728   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
729     int64_t dst_val = 0xbadbeef;
730     int64_t val1 = src1.Int(vform, i);
731     int64_t val2 = src2.Int(vform, i);
732     switch (LaneSizeInBitsFromFormat(vform)) {
733       case 8:
734         dst_val = internal::MultiplyHigh<8>(val1, val2);
735         break;
736       case 16:
737         dst_val = internal::MultiplyHigh<16>(val1, val2);
738         break;
739       case 32:
740         dst_val = internal::MultiplyHigh<32>(val1, val2);
741         break;
742       case 64:
743         dst_val = internal::MultiplyHigh<64>(val1, val2);
744         break;
745       default:
746         VIXL_UNREACHABLE();
747         break;
748     }
749     dst.SetInt(vform, i, dst_val);
750   }
751   return dst;
752 }
753 
754 
umulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)755 LogicVRegister Simulator::umulh(VectorFormat vform,
756                                 LogicVRegister dst,
757                                 const LogicVRegister& src1,
758                                 const LogicVRegister& src2) {
759   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
760     uint64_t dst_val = 0xbadbeef;
761     uint64_t val1 = src1.Uint(vform, i);
762     uint64_t val2 = src2.Uint(vform, i);
763     switch (LaneSizeInBitsFromFormat(vform)) {
764       case 8:
765         dst_val = internal::MultiplyHigh<8>(val1, val2);
766         break;
767       case 16:
768         dst_val = internal::MultiplyHigh<16>(val1, val2);
769         break;
770       case 32:
771         dst_val = internal::MultiplyHigh<32>(val1, val2);
772         break;
773       case 64:
774         dst_val = internal::MultiplyHigh<64>(val1, val2);
775         break;
776       default:
777         VIXL_UNREACHABLE();
778         break;
779     }
780     dst.SetUint(vform, i, dst_val);
781   }
782   return dst;
783 }
784 
785 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)786 LogicVRegister Simulator::mla(VectorFormat vform,
787                               LogicVRegister dst,
788                               const LogicVRegister& src1,
789                               const LogicVRegister& src2,
790                               int index) {
791   SimVRegister temp;
792   VectorFormat indexform = VectorFormatFillQ(vform);
793   return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
794 }
795 
796 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)797 LogicVRegister Simulator::mls(VectorFormat vform,
798                               LogicVRegister dst,
799                               const LogicVRegister& src1,
800                               const LogicVRegister& src2,
801                               int index) {
802   SimVRegister temp;
803   VectorFormat indexform = VectorFormatFillQ(vform);
804   return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
805 }
806 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)807 LogicVRegister Simulator::sqdmull(VectorFormat vform,
808                                   LogicVRegister dst,
809                                   const LogicVRegister& src1,
810                                   const LogicVRegister& src2,
811                                   int index) {
812   SimVRegister temp;
813   VectorFormat indexform =
814       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
815   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
816 }
817 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)818 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
819                                   LogicVRegister dst,
820                                   const LogicVRegister& src1,
821                                   const LogicVRegister& src2,
822                                   int index) {
823   SimVRegister temp;
824   VectorFormat indexform =
825       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
826   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
827 }
828 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)829 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
830                                   LogicVRegister dst,
831                                   const LogicVRegister& src1,
832                                   const LogicVRegister& src2,
833                                   int index) {
834   SimVRegister temp;
835   VectorFormat indexform =
836       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
837   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
838 }
839 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)840 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
841                                   LogicVRegister dst,
842                                   const LogicVRegister& src1,
843                                   const LogicVRegister& src2,
844                                   int index) {
845   SimVRegister temp;
846   VectorFormat indexform = VectorFormatFillQ(vform);
847   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
848 }
849 
850 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)851 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
852                                    LogicVRegister dst,
853                                    const LogicVRegister& src1,
854                                    const LogicVRegister& src2,
855                                    int index) {
856   SimVRegister temp;
857   VectorFormat indexform = VectorFormatFillQ(vform);
858   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
859 }
860 
861 
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)862 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
863                                    LogicVRegister dst,
864                                    const LogicVRegister& src1,
865                                    const LogicVRegister& src2,
866                                    int index) {
867   SimVRegister temp;
868   VectorFormat indexform = VectorFormatFillQ(vform);
869   return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
870 }
871 
872 
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)873 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
874                                    LogicVRegister dst,
875                                    const LogicVRegister& src1,
876                                    const LogicVRegister& src2,
877                                    int index) {
878   SimVRegister temp;
879   VectorFormat indexform = VectorFormatFillQ(vform);
880   return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
881 }
882 
883 
PolynomialMult(uint64_t op1,uint64_t op2,int lane_size_in_bits) const884 uint64_t Simulator::PolynomialMult(uint64_t op1,
885                                    uint64_t op2,
886                                    int lane_size_in_bits) const {
887   VIXL_ASSERT(static_cast<unsigned>(lane_size_in_bits) <= kSRegSize);
888   VIXL_ASSERT(IsUintN(lane_size_in_bits, op1));
889   VIXL_ASSERT(IsUintN(lane_size_in_bits, op2));
890   uint64_t result = 0;
891   for (int i = 0; i < lane_size_in_bits; ++i) {
892     if ((op1 >> i) & 1) {
893       result = result ^ (op2 << i);
894     }
895   }
896   return result;
897 }
898 
899 
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)900 LogicVRegister Simulator::pmul(VectorFormat vform,
901                                LogicVRegister dst,
902                                const LogicVRegister& src1,
903                                const LogicVRegister& src2) {
904   dst.ClearForWrite(vform);
905   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
906     dst.SetUint(vform,
907                 i,
908                 PolynomialMult(src1.Uint(vform, i),
909                                src2.Uint(vform, i),
910                                LaneSizeInBitsFromFormat(vform)));
911   }
912   return dst;
913 }
914 
915 
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)916 LogicVRegister Simulator::pmull(VectorFormat vform,
917                                 LogicVRegister dst,
918                                 const LogicVRegister& src1,
919                                 const LogicVRegister& src2) {
920   dst.ClearForWrite(vform);
921 
922   VectorFormat vform_src = VectorFormatHalfWidth(vform);
923   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
924     dst.SetUint(vform,
925                 i,
926                 PolynomialMult(src1.Uint(vform_src, i),
927                                src2.Uint(vform_src, i),
928                                LaneSizeInBitsFromFormat(vform_src)));
929   }
930 
931   return dst;
932 }
933 
934 
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)935 LogicVRegister Simulator::pmull2(VectorFormat vform,
936                                  LogicVRegister dst,
937                                  const LogicVRegister& src1,
938                                  const LogicVRegister& src2) {
939   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
940   dst.ClearForWrite(vform);
941   int lane_count = LaneCountFromFormat(vform);
942   for (int i = 0; i < lane_count; i++) {
943     dst.SetUint(vform,
944                 i,
945                 PolynomialMult(src1.Uint(vform_src, lane_count + i),
946                                src2.Uint(vform_src, lane_count + i),
947                                LaneSizeInBitsFromFormat(vform_src)));
948   }
949   return dst;
950 }
951 
952 
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)953 LogicVRegister Simulator::sub(VectorFormat vform,
954                               LogicVRegister dst,
955                               const LogicVRegister& src1,
956                               const LogicVRegister& src2) {
957   int lane_size = LaneSizeInBitsFromFormat(vform);
958   dst.ClearForWrite(vform);
959   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
960     // Test for unsigned saturation.
961     uint64_t ua = src1.UintLeftJustified(vform, i);
962     uint64_t ub = src2.UintLeftJustified(vform, i);
963     uint64_t ur = ua - ub;
964     if (ub > ua) {
965       dst.SetUnsignedSat(i, false);
966     }
967 
968     // Test for signed saturation.
969     bool pos_a = (ua >> 63) == 0;
970     bool pos_b = (ub >> 63) == 0;
971     bool pos_r = (ur >> 63) == 0;
972     // If the signs of the operands are different, and the sign of the first
973     // operand doesn't match the result, there was an overflow.
974     if ((pos_a != pos_b) && (pos_a != pos_r)) {
975       dst.SetSignedSat(i, pos_a);
976     }
977 
978     dst.SetInt(vform, i, ur >> (64 - lane_size));
979   }
980   return dst;
981 }
982 
sub_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)983 LogicVRegister Simulator::sub_uint(VectorFormat vform,
984                                    LogicVRegister dst,
985                                    const LogicVRegister& src1,
986                                    uint64_t value) {
987   int lane_size = LaneSizeInBitsFromFormat(vform);
988   VIXL_ASSERT(IsUintN(lane_size, value));
989   dst.ClearForWrite(vform);
990   // Left-justify `value`.
991   uint64_t ub = value << (64 - lane_size);
992   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
993     // Test for unsigned saturation.
994     uint64_t ua = src1.UintLeftJustified(vform, i);
995     uint64_t ur = ua - ub;
996     if (ub > ua) {
997       dst.SetUnsignedSat(i, false);
998     }
999 
1000     // Test for signed saturation.
1001     // `value` is always positive, so we have an overflow if the (signed) result
1002     // is greater than the first operand.
1003     if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1004       dst.SetSignedSat(i, false);
1005     }
1006 
1007     dst.SetInt(vform, i, ur >> (64 - lane_size));
1008   }
1009   return dst;
1010 }
1011 
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1012 LogicVRegister Simulator::and_(VectorFormat vform,
1013                                LogicVRegister dst,
1014                                const LogicVRegister& src1,
1015                                const LogicVRegister& src2) {
1016   dst.ClearForWrite(vform);
1017   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1018     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1019   }
1020   return dst;
1021 }
1022 
1023 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1024 LogicVRegister Simulator::orr(VectorFormat vform,
1025                               LogicVRegister dst,
1026                               const LogicVRegister& src1,
1027                               const LogicVRegister& src2) {
1028   dst.ClearForWrite(vform);
1029   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1030     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1031   }
1032   return dst;
1033 }
1034 
1035 
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1036 LogicVRegister Simulator::orn(VectorFormat vform,
1037                               LogicVRegister dst,
1038                               const LogicVRegister& src1,
1039                               const LogicVRegister& src2) {
1040   dst.ClearForWrite(vform);
1041   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1042     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1043   }
1044   return dst;
1045 }
1046 
1047 
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1048 LogicVRegister Simulator::eor(VectorFormat vform,
1049                               LogicVRegister dst,
1050                               const LogicVRegister& src1,
1051                               const LogicVRegister& src2) {
1052   dst.ClearForWrite(vform);
1053   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1054     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1055   }
1056   return dst;
1057 }
1058 
1059 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1060 LogicVRegister Simulator::bic(VectorFormat vform,
1061                               LogicVRegister dst,
1062                               const LogicVRegister& src1,
1063                               const LogicVRegister& src2) {
1064   dst.ClearForWrite(vform);
1065   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1066     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1067   }
1068   return dst;
1069 }
1070 
1071 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1072 LogicVRegister Simulator::bic(VectorFormat vform,
1073                               LogicVRegister dst,
1074                               const LogicVRegister& src,
1075                               uint64_t imm) {
1076   uint64_t result[16];
1077   int lane_count = LaneCountFromFormat(vform);
1078   for (int i = 0; i < lane_count; ++i) {
1079     result[i] = src.Uint(vform, i) & ~imm;
1080   }
1081   dst.ClearForWrite(vform);
1082   for (int i = 0; i < lane_count; ++i) {
1083     dst.SetUint(vform, i, result[i]);
1084   }
1085   return dst;
1086 }
1087 
1088 
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1089 LogicVRegister Simulator::bif(VectorFormat vform,
1090                               LogicVRegister dst,
1091                               const LogicVRegister& src1,
1092                               const LogicVRegister& src2) {
1093   dst.ClearForWrite(vform);
1094   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1095     uint64_t operand1 = dst.Uint(vform, i);
1096     uint64_t operand2 = ~src2.Uint(vform, i);
1097     uint64_t operand3 = src1.Uint(vform, i);
1098     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1099     dst.SetUint(vform, i, result);
1100   }
1101   return dst;
1102 }
1103 
1104 
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1105 LogicVRegister Simulator::bit(VectorFormat vform,
1106                               LogicVRegister dst,
1107                               const LogicVRegister& src1,
1108                               const LogicVRegister& src2) {
1109   dst.ClearForWrite(vform);
1110   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1111     uint64_t operand1 = dst.Uint(vform, i);
1112     uint64_t operand2 = src2.Uint(vform, i);
1113     uint64_t operand3 = src1.Uint(vform, i);
1114     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1115     dst.SetUint(vform, i, result);
1116   }
1117   return dst;
1118 }
1119 
1120 
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src_mask,const LogicVRegister & src1,const LogicVRegister & src2)1121 LogicVRegister Simulator::bsl(VectorFormat vform,
1122                               LogicVRegister dst,
1123                               const LogicVRegister& src_mask,
1124                               const LogicVRegister& src1,
1125                               const LogicVRegister& src2) {
1126   dst.ClearForWrite(vform);
1127   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1128     uint64_t operand1 = src2.Uint(vform, i);
1129     uint64_t operand2 = src_mask.Uint(vform, i);
1130     uint64_t operand3 = src1.Uint(vform, i);
1131     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1132     dst.SetUint(vform, i, result);
1133   }
1134   return dst;
1135 }
1136 
1137 
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1138 LogicVRegister Simulator::sminmax(VectorFormat vform,
1139                                   LogicVRegister dst,
1140                                   const LogicVRegister& src1,
1141                                   const LogicVRegister& src2,
1142                                   bool max) {
1143   dst.ClearForWrite(vform);
1144   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1145     int64_t src1_val = src1.Int(vform, i);
1146     int64_t src2_val = src2.Int(vform, i);
1147     int64_t dst_val;
1148     if (max) {
1149       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1150     } else {
1151       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1152     }
1153     dst.SetInt(vform, i, dst_val);
1154   }
1155   return dst;
1156 }
1157 
1158 
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1159 LogicVRegister Simulator::smax(VectorFormat vform,
1160                                LogicVRegister dst,
1161                                const LogicVRegister& src1,
1162                                const LogicVRegister& src2) {
1163   return sminmax(vform, dst, src1, src2, true);
1164 }
1165 
1166 
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1167 LogicVRegister Simulator::smin(VectorFormat vform,
1168                                LogicVRegister dst,
1169                                const LogicVRegister& src1,
1170                                const LogicVRegister& src2) {
1171   return sminmax(vform, dst, src1, src2, false);
1172 }
1173 
1174 
sminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1175 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1176                                    LogicVRegister dst,
1177                                    const LogicVRegister& src1,
1178                                    const LogicVRegister& src2,
1179                                    bool max) {
1180   unsigned lanes = LaneCountFromFormat(vform);
1181   int64_t result[kZRegMaxSizeInBytes];
1182   const LogicVRegister* src = &src1;
1183   for (unsigned j = 0; j < 2; j++) {
1184     for (unsigned i = 0; i < lanes; i += 2) {
1185       int64_t first_val = src->Int(vform, i);
1186       int64_t second_val = src->Int(vform, i + 1);
1187       int64_t dst_val;
1188       if (max) {
1189         dst_val = (first_val > second_val) ? first_val : second_val;
1190       } else {
1191         dst_val = (first_val < second_val) ? first_val : second_val;
1192       }
1193       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1194       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1195     }
1196     src = &src2;
1197   }
1198   dst.SetIntArray(vform, result);
1199   if (IsSVEFormat(vform)) {
1200     interleave_top_bottom(vform, dst, dst);
1201   }
1202   return dst;
1203 }
1204 
1205 
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1206 LogicVRegister Simulator::smaxp(VectorFormat vform,
1207                                 LogicVRegister dst,
1208                                 const LogicVRegister& src1,
1209                                 const LogicVRegister& src2) {
1210   return sminmaxp(vform, dst, src1, src2, true);
1211 }
1212 
1213 
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1214 LogicVRegister Simulator::sminp(VectorFormat vform,
1215                                 LogicVRegister dst,
1216                                 const LogicVRegister& src1,
1217                                 const LogicVRegister& src2) {
1218   return sminmaxp(vform, dst, src1, src2, false);
1219 }
1220 
1221 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1222 LogicVRegister Simulator::addp(VectorFormat vform,
1223                                LogicVRegister dst,
1224                                const LogicVRegister& src) {
1225   VIXL_ASSERT(vform == kFormatD);
1226 
1227   uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1228   dst.ClearForWrite(vform);
1229   dst.SetUint(vform, 0, dst_val);
1230   return dst;
1231 }
1232 
1233 
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1234 LogicVRegister Simulator::addv(VectorFormat vform,
1235                                LogicVRegister dst,
1236                                const LogicVRegister& src) {
1237   VectorFormat vform_dst =
1238       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1239 
1240 
1241   int64_t dst_val = 0;
1242   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1243     dst_val += src.Int(vform, i);
1244   }
1245 
1246   dst.ClearForWrite(vform_dst);
1247   dst.SetInt(vform_dst, 0, dst_val);
1248   return dst;
1249 }
1250 
1251 
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1252 LogicVRegister Simulator::saddlv(VectorFormat vform,
1253                                  LogicVRegister dst,
1254                                  const LogicVRegister& src) {
1255   VectorFormat vform_dst =
1256       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1257 
1258   int64_t dst_val = 0;
1259   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1260     dst_val += src.Int(vform, i);
1261   }
1262 
1263   dst.ClearForWrite(vform_dst);
1264   dst.SetInt(vform_dst, 0, dst_val);
1265   return dst;
1266 }
1267 
1268 
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1269 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1270                                  LogicVRegister dst,
1271                                  const LogicVRegister& src) {
1272   VectorFormat vform_dst =
1273       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1274 
1275   uint64_t dst_val = 0;
1276   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1277     dst_val += src.Uint(vform, i);
1278   }
1279 
1280   dst.ClearForWrite(vform_dst);
1281   dst.SetUint(vform_dst, 0, dst_val);
1282   return dst;
1283 }
1284 
1285 
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1286 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1287                                    LogicVRegister dst,
1288                                    const LogicPRegister& pg,
1289                                    const LogicVRegister& src,
1290                                    bool max) {
1291   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1292   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1293     if (!pg.IsActive(vform, i)) continue;
1294 
1295     int64_t src_val = src.Int(vform, i);
1296     if (max) {
1297       dst_val = (src_val > dst_val) ? src_val : dst_val;
1298     } else {
1299       dst_val = (src_val < dst_val) ? src_val : dst_val;
1300     }
1301   }
1302   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1303   dst.SetInt(vform, 0, dst_val);
1304   return dst;
1305 }
1306 
1307 
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1308 LogicVRegister Simulator::smaxv(VectorFormat vform,
1309                                 LogicVRegister dst,
1310                                 const LogicVRegister& src) {
1311   sminmaxv(vform, dst, GetPTrue(), src, true);
1312   return dst;
1313 }
1314 
1315 
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1316 LogicVRegister Simulator::sminv(VectorFormat vform,
1317                                 LogicVRegister dst,
1318                                 const LogicVRegister& src) {
1319   sminmaxv(vform, dst, GetPTrue(), src, false);
1320   return dst;
1321 }
1322 
1323 
smaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1324 LogicVRegister Simulator::smaxv(VectorFormat vform,
1325                                 LogicVRegister dst,
1326                                 const LogicPRegister& pg,
1327                                 const LogicVRegister& src) {
1328   VIXL_ASSERT(IsSVEFormat(vform));
1329   sminmaxv(vform, dst, pg, src, true);
1330   return dst;
1331 }
1332 
1333 
sminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1334 LogicVRegister Simulator::sminv(VectorFormat vform,
1335                                 LogicVRegister dst,
1336                                 const LogicPRegister& pg,
1337                                 const LogicVRegister& src) {
1338   VIXL_ASSERT(IsSVEFormat(vform));
1339   sminmaxv(vform, dst, pg, src, false);
1340   return dst;
1341 }
1342 
1343 
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1344 LogicVRegister Simulator::uminmax(VectorFormat vform,
1345                                   LogicVRegister dst,
1346                                   const LogicVRegister& src1,
1347                                   const LogicVRegister& src2,
1348                                   bool max) {
1349   dst.ClearForWrite(vform);
1350   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1351     uint64_t src1_val = src1.Uint(vform, i);
1352     uint64_t src2_val = src2.Uint(vform, i);
1353     uint64_t dst_val;
1354     if (max) {
1355       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1356     } else {
1357       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1358     }
1359     dst.SetUint(vform, i, dst_val);
1360   }
1361   return dst;
1362 }
1363 
1364 
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1365 LogicVRegister Simulator::umax(VectorFormat vform,
1366                                LogicVRegister dst,
1367                                const LogicVRegister& src1,
1368                                const LogicVRegister& src2) {
1369   return uminmax(vform, dst, src1, src2, true);
1370 }
1371 
1372 
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1373 LogicVRegister Simulator::umin(VectorFormat vform,
1374                                LogicVRegister dst,
1375                                const LogicVRegister& src1,
1376                                const LogicVRegister& src2) {
1377   return uminmax(vform, dst, src1, src2, false);
1378 }
1379 
1380 
uminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1381 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1382                                    LogicVRegister dst,
1383                                    const LogicVRegister& src1,
1384                                    const LogicVRegister& src2,
1385                                    bool max) {
1386   unsigned lanes = LaneCountFromFormat(vform);
1387   uint64_t result[kZRegMaxSizeInBytes];
1388   const LogicVRegister* src = &src1;
1389   for (unsigned j = 0; j < 2; j++) {
1390     for (unsigned i = 0; i < lanes; i += 2) {
1391       uint64_t first_val = src->Uint(vform, i);
1392       uint64_t second_val = src->Uint(vform, i + 1);
1393       uint64_t dst_val;
1394       if (max) {
1395         dst_val = (first_val > second_val) ? first_val : second_val;
1396       } else {
1397         dst_val = (first_val < second_val) ? first_val : second_val;
1398       }
1399       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1400       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1401     }
1402     src = &src2;
1403   }
1404   dst.SetUintArray(vform, result);
1405   if (IsSVEFormat(vform)) {
1406     interleave_top_bottom(vform, dst, dst);
1407   }
1408   return dst;
1409 }
1410 
1411 
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1412 LogicVRegister Simulator::umaxp(VectorFormat vform,
1413                                 LogicVRegister dst,
1414                                 const LogicVRegister& src1,
1415                                 const LogicVRegister& src2) {
1416   return uminmaxp(vform, dst, src1, src2, true);
1417 }
1418 
1419 
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1420 LogicVRegister Simulator::uminp(VectorFormat vform,
1421                                 LogicVRegister dst,
1422                                 const LogicVRegister& src1,
1423                                 const LogicVRegister& src2) {
1424   return uminmaxp(vform, dst, src1, src2, false);
1425 }
1426 
1427 
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1428 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1429                                    LogicVRegister dst,
1430                                    const LogicPRegister& pg,
1431                                    const LogicVRegister& src,
1432                                    bool max) {
1433   uint64_t dst_val = max ? 0 : UINT64_MAX;
1434   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1435     if (!pg.IsActive(vform, i)) continue;
1436 
1437     uint64_t src_val = src.Uint(vform, i);
1438     if (max) {
1439       dst_val = (src_val > dst_val) ? src_val : dst_val;
1440     } else {
1441       dst_val = (src_val < dst_val) ? src_val : dst_val;
1442     }
1443   }
1444   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1445   dst.SetUint(vform, 0, dst_val);
1446   return dst;
1447 }
1448 
1449 
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1450 LogicVRegister Simulator::umaxv(VectorFormat vform,
1451                                 LogicVRegister dst,
1452                                 const LogicVRegister& src) {
1453   uminmaxv(vform, dst, GetPTrue(), src, true);
1454   return dst;
1455 }
1456 
1457 
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1458 LogicVRegister Simulator::uminv(VectorFormat vform,
1459                                 LogicVRegister dst,
1460                                 const LogicVRegister& src) {
1461   uminmaxv(vform, dst, GetPTrue(), src, false);
1462   return dst;
1463 }
1464 
1465 
umaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1466 LogicVRegister Simulator::umaxv(VectorFormat vform,
1467                                 LogicVRegister dst,
1468                                 const LogicPRegister& pg,
1469                                 const LogicVRegister& src) {
1470   VIXL_ASSERT(IsSVEFormat(vform));
1471   uminmaxv(vform, dst, pg, src, true);
1472   return dst;
1473 }
1474 
1475 
uminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1476 LogicVRegister Simulator::uminv(VectorFormat vform,
1477                                 LogicVRegister dst,
1478                                 const LogicPRegister& pg,
1479                                 const LogicVRegister& src) {
1480   VIXL_ASSERT(IsSVEFormat(vform));
1481   uminmaxv(vform, dst, pg, src, false);
1482   return dst;
1483 }
1484 
1485 
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1486 LogicVRegister Simulator::shl(VectorFormat vform,
1487                               LogicVRegister dst,
1488                               const LogicVRegister& src,
1489                               int shift) {
1490   VIXL_ASSERT(shift >= 0);
1491   SimVRegister temp;
1492   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1493   return ushl(vform, dst, src, shiftreg);
1494 }
1495 
1496 
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1497 LogicVRegister Simulator::sshll(VectorFormat vform,
1498                                 LogicVRegister dst,
1499                                 const LogicVRegister& src,
1500                                 int shift) {
1501   VIXL_ASSERT(shift >= 0);
1502   SimVRegister temp1, temp2;
1503   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1504   LogicVRegister extendedreg = sxtl(vform, temp2, src);
1505   return sshl(vform, dst, extendedreg, shiftreg);
1506 }
1507 
1508 
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1509 LogicVRegister Simulator::sshll2(VectorFormat vform,
1510                                  LogicVRegister dst,
1511                                  const LogicVRegister& src,
1512                                  int shift) {
1513   VIXL_ASSERT(shift >= 0);
1514   SimVRegister temp1, temp2;
1515   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1516   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1517   return sshl(vform, dst, extendedreg, shiftreg);
1518 }
1519 
1520 
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1521 LogicVRegister Simulator::shll(VectorFormat vform,
1522                                LogicVRegister dst,
1523                                const LogicVRegister& src) {
1524   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1525   return sshll(vform, dst, src, shift);
1526 }
1527 
1528 
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1529 LogicVRegister Simulator::shll2(VectorFormat vform,
1530                                 LogicVRegister dst,
1531                                 const LogicVRegister& src) {
1532   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1533   return sshll2(vform, dst, src, shift);
1534 }
1535 
1536 
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1537 LogicVRegister Simulator::ushll(VectorFormat vform,
1538                                 LogicVRegister dst,
1539                                 const LogicVRegister& src,
1540                                 int shift) {
1541   VIXL_ASSERT(shift >= 0);
1542   SimVRegister temp1, temp2;
1543   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1544   LogicVRegister extendedreg = uxtl(vform, temp2, src);
1545   return ushl(vform, dst, extendedreg, shiftreg);
1546 }
1547 
1548 
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1549 LogicVRegister Simulator::ushll2(VectorFormat vform,
1550                                  LogicVRegister dst,
1551                                  const LogicVRegister& src,
1552                                  int shift) {
1553   VIXL_ASSERT(shift >= 0);
1554   SimVRegister temp1, temp2;
1555   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1556   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1557   return ushl(vform, dst, extendedreg, shiftreg);
1558 }
1559 
clast(VectorFormat vform,const LogicPRegister & pg,const LogicVRegister & src,int offset_from_last_active)1560 std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1561                                            const LogicPRegister& pg,
1562                                            const LogicVRegister& src,
1563                                            int offset_from_last_active) {
1564   // Untested for any other values.
1565   VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1566 
1567   int last_active = GetLastActive(vform, pg);
1568   int lane_count = LaneCountFromFormat(vform);
1569   int index =
1570       ((last_active + offset_from_last_active) + lane_count) % lane_count;
1571   return std::make_pair(last_active >= 0, src.Uint(vform, index));
1572 }
1573 
compact(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1574 LogicVRegister Simulator::compact(VectorFormat vform,
1575                                   LogicVRegister dst,
1576                                   const LogicPRegister& pg,
1577                                   const LogicVRegister& src) {
1578   int j = 0;
1579   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1580     if (pg.IsActive(vform, i)) {
1581       dst.SetUint(vform, j++, src.Uint(vform, i));
1582     }
1583   }
1584   for (; j < LaneCountFromFormat(vform); j++) {
1585     dst.SetUint(vform, j, 0);
1586   }
1587   return dst;
1588 }
1589 
splice(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1590 LogicVRegister Simulator::splice(VectorFormat vform,
1591                                  LogicVRegister dst,
1592                                  const LogicPRegister& pg,
1593                                  const LogicVRegister& src1,
1594                                  const LogicVRegister& src2) {
1595   int lane_count = LaneCountFromFormat(vform);
1596   int first_active = GetFirstActive(vform, pg);
1597   int last_active = GetLastActive(vform, pg);
1598   int dst_idx = 0;
1599   uint64_t result[kZRegMaxSizeInBytes];
1600 
1601   if (first_active >= 0) {
1602     VIXL_ASSERT(last_active >= first_active);
1603     VIXL_ASSERT(last_active < lane_count);
1604     for (int i = first_active; i <= last_active; i++) {
1605       result[dst_idx++] = src1.Uint(vform, i);
1606     }
1607   }
1608 
1609   VIXL_ASSERT(dst_idx <= lane_count);
1610   for (int i = dst_idx; i < lane_count; i++) {
1611     result[i] = src2.Uint(vform, i - dst_idx);
1612   }
1613 
1614   dst.SetUintArray(vform, result);
1615 
1616   return dst;
1617 }
1618 
sel(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1619 LogicVRegister Simulator::sel(VectorFormat vform,
1620                               LogicVRegister dst,
1621                               const SimPRegister& pg,
1622                               const LogicVRegister& src1,
1623                               const LogicVRegister& src2) {
1624   int p_reg_bits_per_lane =
1625       LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1626   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1627     uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1628                               ? src1.Uint(vform, lane)
1629                               : src2.Uint(vform, lane);
1630     dst.SetUint(vform, lane, lane_value);
1631   }
1632   return dst;
1633 }
1634 
1635 
sel(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src1,const LogicPRegister & src2)1636 LogicPRegister Simulator::sel(LogicPRegister dst,
1637                               const LogicPRegister& pg,
1638                               const LogicPRegister& src1,
1639                               const LogicPRegister& src2) {
1640   for (int i = 0; i < dst.GetChunkCount(); i++) {
1641     LogicPRegister::ChunkType mask = pg.GetChunk(i);
1642     LogicPRegister::ChunkType result =
1643         (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1644     dst.SetChunk(i, result);
1645   }
1646   return dst;
1647 }
1648 
1649 
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1650 LogicVRegister Simulator::sli(VectorFormat vform,
1651                               LogicVRegister dst,
1652                               const LogicVRegister& src,
1653                               int shift) {
1654   dst.ClearForWrite(vform);
1655   int lane_count = LaneCountFromFormat(vform);
1656   for (int i = 0; i < lane_count; i++) {
1657     uint64_t src_lane = src.Uint(vform, i);
1658     uint64_t dst_lane = dst.Uint(vform, i);
1659     uint64_t shifted = src_lane << shift;
1660     uint64_t mask = MaxUintFromFormat(vform) << shift;
1661     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1662   }
1663   return dst;
1664 }
1665 
1666 
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1667 LogicVRegister Simulator::sqshl(VectorFormat vform,
1668                                 LogicVRegister dst,
1669                                 const LogicVRegister& src,
1670                                 int shift) {
1671   VIXL_ASSERT(shift >= 0);
1672   SimVRegister temp;
1673   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1674   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1675 }
1676 
1677 
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1678 LogicVRegister Simulator::uqshl(VectorFormat vform,
1679                                 LogicVRegister dst,
1680                                 const LogicVRegister& src,
1681                                 int shift) {
1682   VIXL_ASSERT(shift >= 0);
1683   SimVRegister temp;
1684   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1685   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1686 }
1687 
1688 
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1689 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1690                                  LogicVRegister dst,
1691                                  const LogicVRegister& src,
1692                                  int shift) {
1693   VIXL_ASSERT(shift >= 0);
1694   SimVRegister temp;
1695   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1696   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1697 }
1698 
1699 
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1700 LogicVRegister Simulator::sri(VectorFormat vform,
1701                               LogicVRegister dst,
1702                               const LogicVRegister& src,
1703                               int shift) {
1704   dst.ClearForWrite(vform);
1705   int lane_count = LaneCountFromFormat(vform);
1706   VIXL_ASSERT((shift > 0) &&
1707               (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1708   for (int i = 0; i < lane_count; i++) {
1709     uint64_t src_lane = src.Uint(vform, i);
1710     uint64_t dst_lane = dst.Uint(vform, i);
1711     uint64_t shifted;
1712     uint64_t mask;
1713     if (shift == 64) {
1714       shifted = 0;
1715       mask = 0;
1716     } else {
1717       shifted = src_lane >> shift;
1718       mask = MaxUintFromFormat(vform) >> shift;
1719     }
1720     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1721   }
1722   return dst;
1723 }
1724 
1725 
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1726 LogicVRegister Simulator::ushr(VectorFormat vform,
1727                                LogicVRegister dst,
1728                                const LogicVRegister& src,
1729                                int shift) {
1730   VIXL_ASSERT(shift >= 0);
1731   SimVRegister temp;
1732   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1733   return ushl(vform, dst, src, shiftreg);
1734 }
1735 
1736 
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1737 LogicVRegister Simulator::sshr(VectorFormat vform,
1738                                LogicVRegister dst,
1739                                const LogicVRegister& src,
1740                                int shift) {
1741   VIXL_ASSERT(shift >= 0);
1742   SimVRegister temp;
1743   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1744   return sshl(vform, dst, src, shiftreg);
1745 }
1746 
1747 
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1748 LogicVRegister Simulator::ssra(VectorFormat vform,
1749                                LogicVRegister dst,
1750                                const LogicVRegister& src,
1751                                int shift) {
1752   SimVRegister temp;
1753   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1754   return add(vform, dst, dst, shifted_reg);
1755 }
1756 
1757 
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1758 LogicVRegister Simulator::usra(VectorFormat vform,
1759                                LogicVRegister dst,
1760                                const LogicVRegister& src,
1761                                int shift) {
1762   SimVRegister temp;
1763   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1764   return add(vform, dst, dst, shifted_reg);
1765 }
1766 
1767 
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1768 LogicVRegister Simulator::srsra(VectorFormat vform,
1769                                 LogicVRegister dst,
1770                                 const LogicVRegister& src,
1771                                 int shift) {
1772   SimVRegister temp;
1773   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1774   return add(vform, dst, dst, shifted_reg);
1775 }
1776 
1777 
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1778 LogicVRegister Simulator::ursra(VectorFormat vform,
1779                                 LogicVRegister dst,
1780                                 const LogicVRegister& src,
1781                                 int shift) {
1782   SimVRegister temp;
1783   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1784   return add(vform, dst, dst, shifted_reg);
1785 }
1786 
1787 
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1788 LogicVRegister Simulator::cls(VectorFormat vform,
1789                               LogicVRegister dst,
1790                               const LogicVRegister& src) {
1791   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1792   int lane_count = LaneCountFromFormat(vform);
1793 
1794   // Ensure that we can store one result per lane.
1795   int result[kZRegMaxSizeInBytes];
1796 
1797   for (int i = 0; i < lane_count; i++) {
1798     result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
1799   }
1800 
1801   dst.ClearForWrite(vform);
1802   for (int i = 0; i < lane_count; ++i) {
1803     dst.SetUint(vform, i, result[i]);
1804   }
1805   return dst;
1806 }
1807 
1808 
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1809 LogicVRegister Simulator::clz(VectorFormat vform,
1810                               LogicVRegister dst,
1811                               const LogicVRegister& src) {
1812   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1813   int lane_count = LaneCountFromFormat(vform);
1814 
1815   // Ensure that we can store one result per lane.
1816   int result[kZRegMaxSizeInBytes];
1817 
1818   for (int i = 0; i < lane_count; i++) {
1819     result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
1820   }
1821 
1822   dst.ClearForWrite(vform);
1823   for (int i = 0; i < lane_count; ++i) {
1824     dst.SetUint(vform, i, result[i]);
1825   }
1826   return dst;
1827 }
1828 
1829 
cnot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1830 LogicVRegister Simulator::cnot(VectorFormat vform,
1831                                LogicVRegister dst,
1832                                const LogicVRegister& src) {
1833   dst.ClearForWrite(vform);
1834   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1835     uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
1836     dst.SetUint(vform, i, value);
1837   }
1838   return dst;
1839 }
1840 
1841 
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1842 LogicVRegister Simulator::cnt(VectorFormat vform,
1843                               LogicVRegister dst,
1844                               const LogicVRegister& src) {
1845   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1846   int lane_count = LaneCountFromFormat(vform);
1847 
1848   // Ensure that we can store one result per lane.
1849   int result[kZRegMaxSizeInBytes];
1850 
1851   for (int i = 0; i < lane_count; i++) {
1852     result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
1853   }
1854 
1855   dst.ClearForWrite(vform);
1856   for (int i = 0; i < lane_count; ++i) {
1857     dst.SetUint(vform, i, result[i]);
1858   }
1859   return dst;
1860 }
1861 
CalculateSignedShiftDistance(int64_t shift_val,int esize,bool shift_in_ls_byte)1862 static int64_t CalculateSignedShiftDistance(int64_t shift_val,
1863                                             int esize,
1864                                             bool shift_in_ls_byte) {
1865   if (shift_in_ls_byte) {
1866     // Neon uses the least-significant byte of the lane as the shift distance.
1867     shift_val = ExtractSignedBitfield64(7, 0, shift_val);
1868   } else {
1869     // SVE uses a saturated shift distance in the range
1870     //  -(esize + 1) ... (esize + 1).
1871     if (shift_val > (esize + 1)) shift_val = esize + 1;
1872     if (shift_val < -(esize + 1)) shift_val = -(esize + 1);
1873   }
1874   return shift_val;
1875 }
1876 
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1877 LogicVRegister Simulator::sshl(VectorFormat vform,
1878                                LogicVRegister dst,
1879                                const LogicVRegister& src1,
1880                                const LogicVRegister& src2,
1881                                bool shift_in_ls_byte) {
1882   dst.ClearForWrite(vform);
1883   int esize = LaneSizeInBitsFromFormat(vform);
1884   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1885     int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1886                                                      esize,
1887                                                      shift_in_ls_byte);
1888 
1889     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1890 
1891     // Set signed saturation state.
1892     if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1893       dst.SetSignedSat(i, lj_src_val >= 0);
1894     }
1895 
1896     // Set unsigned saturation state.
1897     if (lj_src_val < 0) {
1898       dst.SetUnsignedSat(i, false);
1899     } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1900                (lj_src_val != 0)) {
1901       dst.SetUnsignedSat(i, true);
1902     }
1903 
1904     int64_t src_val = src1.Int(vform, i);
1905     bool src_is_negative = src_val < 0;
1906     if (shift_val > 63) {
1907       dst.SetInt(vform, i, 0);
1908     } else if (shift_val < -63) {
1909       dst.SetRounding(i, src_is_negative);
1910       dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1911     } else {
1912       // Use unsigned types for shifts, as behaviour is undefined for signed
1913       // lhs.
1914       uint64_t usrc_val = static_cast<uint64_t>(src_val);
1915 
1916       if (shift_val < 0) {
1917         // Convert to right shift.
1918         shift_val = -shift_val;
1919 
1920         // Set rounding state by testing most-significant bit shifted out.
1921         // Rounding only needed on right shifts.
1922         if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1923           dst.SetRounding(i, true);
1924         }
1925 
1926         usrc_val >>= shift_val;
1927 
1928         if (src_is_negative) {
1929           // Simulate sign-extension.
1930           usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1931         }
1932       } else {
1933         usrc_val <<= shift_val;
1934       }
1935       dst.SetUint(vform, i, usrc_val);
1936     }
1937   }
1938   return dst;
1939 }
1940 
1941 
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1942 LogicVRegister Simulator::ushl(VectorFormat vform,
1943                                LogicVRegister dst,
1944                                const LogicVRegister& src1,
1945                                const LogicVRegister& src2,
1946                                bool shift_in_ls_byte) {
1947   dst.ClearForWrite(vform);
1948   int esize = LaneSizeInBitsFromFormat(vform);
1949   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1950     int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1951                                                      esize,
1952                                                      shift_in_ls_byte);
1953 
1954     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1955 
1956     // Set saturation state.
1957     if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1958       dst.SetUnsignedSat(i, true);
1959     }
1960 
1961     uint64_t src_val = src1.Uint(vform, i);
1962     if ((shift_val > 63) || (shift_val < -64)) {
1963       dst.SetUint(vform, i, 0);
1964     } else {
1965       if (shift_val < 0) {
1966         // Set rounding state. Rounding only needed on right shifts.
1967         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1968           dst.SetRounding(i, true);
1969         }
1970 
1971         if (shift_val == -64) {
1972           src_val = 0;
1973         } else {
1974           src_val >>= -shift_val;
1975         }
1976       } else {
1977         src_val <<= shift_val;
1978       }
1979       dst.SetUint(vform, i, src_val);
1980     }
1981   }
1982   return dst;
1983 }
1984 
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1985 LogicVRegister Simulator::sshr(VectorFormat vform,
1986                                LogicVRegister dst,
1987                                const LogicVRegister& src1,
1988                                const LogicVRegister& src2) {
1989   SimVRegister temp;
1990   // Saturate to sidestep the min-int problem.
1991   neg(vform, temp, src2).SignedSaturate(vform);
1992   sshl(vform, dst, src1, temp, false);
1993   return dst;
1994 }
1995 
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1996 LogicVRegister Simulator::ushr(VectorFormat vform,
1997                                LogicVRegister dst,
1998                                const LogicVRegister& src1,
1999                                const LogicVRegister& src2) {
2000   SimVRegister temp;
2001   // Saturate to sidestep the min-int problem.
2002   neg(vform, temp, src2).SignedSaturate(vform);
2003   ushl(vform, dst, src1, temp, false);
2004   return dst;
2005 }
2006 
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2007 LogicVRegister Simulator::neg(VectorFormat vform,
2008                               LogicVRegister dst,
2009                               const LogicVRegister& src) {
2010   dst.ClearForWrite(vform);
2011   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2012     // Test for signed saturation.
2013     int64_t sa = src.Int(vform, i);
2014     if (sa == MinIntFromFormat(vform)) {
2015       dst.SetSignedSat(i, true);
2016     }
2017     dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2018   }
2019   return dst;
2020 }
2021 
2022 
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2023 LogicVRegister Simulator::suqadd(VectorFormat vform,
2024                                  LogicVRegister dst,
2025                                  const LogicVRegister& src1,
2026                                  const LogicVRegister& src2) {
2027   dst.ClearForWrite(vform);
2028   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2029     int64_t sa = src1.IntLeftJustified(vform, i);
2030     uint64_t ub = src2.UintLeftJustified(vform, i);
2031     uint64_t ur = sa + ub;
2032 
2033     int64_t sr;
2034     memcpy(&sr, &ur, sizeof(sr));
2035     if (sr < sa) {  // Test for signed positive saturation.
2036       dst.SetInt(vform, i, MaxIntFromFormat(vform));
2037     } else {
2038       dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));
2039     }
2040   }
2041   return dst;
2042 }
2043 
2044 
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2045 LogicVRegister Simulator::usqadd(VectorFormat vform,
2046                                  LogicVRegister dst,
2047                                  const LogicVRegister& src1,
2048                                  const LogicVRegister& src2) {
2049   dst.ClearForWrite(vform);
2050   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2051     uint64_t ua = src1.UintLeftJustified(vform, i);
2052     int64_t sb = src2.IntLeftJustified(vform, i);
2053     uint64_t ur = ua + sb;
2054 
2055     if ((sb > 0) && (ur <= ua)) {
2056       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
2057     } else if ((sb < 0) && (ur >= ua)) {
2058       dst.SetUint(vform, i, 0);  // Negative saturation.
2059     } else {
2060       dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));
2061     }
2062   }
2063   return dst;
2064 }
2065 
2066 
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2067 LogicVRegister Simulator::abs(VectorFormat vform,
2068                               LogicVRegister dst,
2069                               const LogicVRegister& src) {
2070   dst.ClearForWrite(vform);
2071   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2072     // Test for signed saturation.
2073     int64_t sa = src.Int(vform, i);
2074     if (sa == MinIntFromFormat(vform)) {
2075       dst.SetSignedSat(i, true);
2076     }
2077     if (sa < 0) {
2078       dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2079     } else {
2080       dst.SetInt(vform, i, sa);
2081     }
2082   }
2083   return dst;
2084 }
2085 
2086 
andv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2087 LogicVRegister Simulator::andv(VectorFormat vform,
2088                                LogicVRegister dst,
2089                                const LogicPRegister& pg,
2090                                const LogicVRegister& src) {
2091   VIXL_ASSERT(IsSVEFormat(vform));
2092   uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2093   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2094     if (!pg.IsActive(vform, i)) continue;
2095 
2096     result &= src.Uint(vform, i);
2097   }
2098   VectorFormat vform_dst =
2099       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2100   dst.ClearForWrite(vform_dst);
2101   dst.SetUint(vform_dst, 0, result);
2102   return dst;
2103 }
2104 
2105 
eorv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2106 LogicVRegister Simulator::eorv(VectorFormat vform,
2107                                LogicVRegister dst,
2108                                const LogicPRegister& pg,
2109                                const LogicVRegister& src) {
2110   VIXL_ASSERT(IsSVEFormat(vform));
2111   uint64_t result = 0;
2112   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2113     if (!pg.IsActive(vform, i)) continue;
2114 
2115     result ^= src.Uint(vform, i);
2116   }
2117   VectorFormat vform_dst =
2118       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2119   dst.ClearForWrite(vform_dst);
2120   dst.SetUint(vform_dst, 0, result);
2121   return dst;
2122 }
2123 
2124 
orv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2125 LogicVRegister Simulator::orv(VectorFormat vform,
2126                               LogicVRegister dst,
2127                               const LogicPRegister& pg,
2128                               const LogicVRegister& src) {
2129   VIXL_ASSERT(IsSVEFormat(vform));
2130   uint64_t result = 0;
2131   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2132     if (!pg.IsActive(vform, i)) continue;
2133 
2134     result |= src.Uint(vform, i);
2135   }
2136   VectorFormat vform_dst =
2137       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2138   dst.ClearForWrite(vform_dst);
2139   dst.SetUint(vform_dst, 0, result);
2140   return dst;
2141 }
2142 
2143 
saddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2144 LogicVRegister Simulator::saddv(VectorFormat vform,
2145                                 LogicVRegister dst,
2146                                 const LogicPRegister& pg,
2147                                 const LogicVRegister& src) {
2148   VIXL_ASSERT(IsSVEFormat(vform));
2149   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2150   int64_t result = 0;
2151   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2152     if (!pg.IsActive(vform, i)) continue;
2153 
2154     // The destination register always has D-lane sizes and the source register
2155     // always has S-lanes or smaller, so signed integer overflow -- undefined
2156     // behaviour -- can't occur.
2157     result += src.Int(vform, i);
2158   }
2159 
2160   dst.ClearForWrite(kFormatD);
2161   dst.SetInt(kFormatD, 0, result);
2162   return dst;
2163 }
2164 
2165 
uaddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2166 LogicVRegister Simulator::uaddv(VectorFormat vform,
2167                                 LogicVRegister dst,
2168                                 const LogicPRegister& pg,
2169                                 const LogicVRegister& src) {
2170   VIXL_ASSERT(IsSVEFormat(vform));
2171   uint64_t result = 0;
2172   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2173     if (!pg.IsActive(vform, i)) continue;
2174 
2175     result += src.Uint(vform, i);
2176   }
2177 
2178   dst.ClearForWrite(kFormatD);
2179   dst.SetUint(kFormatD, 0, result);
2180   return dst;
2181 }
2182 
2183 
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dst_is_signed,const LogicVRegister & src,bool src_is_signed)2184 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2185                                         LogicVRegister dst,
2186                                         bool dst_is_signed,
2187                                         const LogicVRegister& src,
2188                                         bool src_is_signed) {
2189   bool upperhalf = false;
2190   VectorFormat srcform = dstform;
2191   if ((dstform == kFormat16B) || (dstform == kFormat8H) ||
2192       (dstform == kFormat4S)) {
2193     upperhalf = true;
2194     srcform = VectorFormatHalfLanes(srcform);
2195   }
2196   srcform = VectorFormatDoubleWidth(srcform);
2197 
2198   LogicVRegister src_copy = src;
2199 
2200   int offset;
2201   if (upperhalf) {
2202     offset = LaneCountFromFormat(dstform) / 2;
2203   } else {
2204     offset = 0;
2205     dst.ClearForWrite(dstform);
2206   }
2207 
2208   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2209     int64_t ssrc = src_copy.Int(srcform, i);
2210     uint64_t usrc = src_copy.Uint(srcform, i);
2211 
2212     // Test for signed saturation
2213     if (ssrc > MaxIntFromFormat(dstform)) {
2214       dst.SetSignedSat(offset + i, true);
2215     } else if (ssrc < MinIntFromFormat(dstform)) {
2216       dst.SetSignedSat(offset + i, false);
2217     }
2218 
2219     // Test for unsigned saturation
2220     if (src_is_signed) {
2221       if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2222         dst.SetUnsignedSat(offset + i, true);
2223       } else if (ssrc < 0) {
2224         dst.SetUnsignedSat(offset + i, false);
2225       }
2226     } else {
2227       if (usrc > MaxUintFromFormat(dstform)) {
2228         dst.SetUnsignedSat(offset + i, true);
2229       }
2230     }
2231 
2232     int64_t result;
2233     if (src_is_signed) {
2234       result = ssrc & MaxUintFromFormat(dstform);
2235     } else {
2236       result = usrc & MaxUintFromFormat(dstform);
2237     }
2238 
2239     if (dst_is_signed) {
2240       dst.SetInt(dstform, offset + i, result);
2241     } else {
2242       dst.SetUint(dstform, offset + i, result);
2243     }
2244   }
2245   return dst;
2246 }
2247 
2248 
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2249 LogicVRegister Simulator::xtn(VectorFormat vform,
2250                               LogicVRegister dst,
2251                               const LogicVRegister& src) {
2252   return extractnarrow(vform, dst, true, src, true);
2253 }
2254 
2255 
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2256 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2257                                 LogicVRegister dst,
2258                                 const LogicVRegister& src) {
2259   return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2260 }
2261 
2262 
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2263 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2264                                  LogicVRegister dst,
2265                                  const LogicVRegister& src) {
2266   return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2267 }
2268 
2269 
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2270 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2271                                 LogicVRegister dst,
2272                                 const LogicVRegister& src) {
2273   return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2274 }
2275 
2276 
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_signed)2277 LogicVRegister Simulator::absdiff(VectorFormat vform,
2278                                   LogicVRegister dst,
2279                                   const LogicVRegister& src1,
2280                                   const LogicVRegister& src2,
2281                                   bool is_signed) {
2282   dst.ClearForWrite(vform);
2283   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2284     bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2285                                   : (src1.Uint(vform, i) > src2.Uint(vform, i));
2286     // Always calculate the answer using unsigned arithmetic, to avoid
2287     // implemenation-defined signed overflow.
2288     if (src1_gt_src2) {
2289       dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2290     } else {
2291       dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2292     }
2293   }
2294   return dst;
2295 }
2296 
2297 
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2298 LogicVRegister Simulator::saba(VectorFormat vform,
2299                                LogicVRegister dst,
2300                                const LogicVRegister& src1,
2301                                const LogicVRegister& src2) {
2302   SimVRegister temp;
2303   dst.ClearForWrite(vform);
2304   absdiff(vform, temp, src1, src2, true);
2305   add(vform, dst, dst, temp);
2306   return dst;
2307 }
2308 
2309 
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2310 LogicVRegister Simulator::uaba(VectorFormat vform,
2311                                LogicVRegister dst,
2312                                const LogicVRegister& src1,
2313                                const LogicVRegister& src2) {
2314   SimVRegister temp;
2315   dst.ClearForWrite(vform);
2316   absdiff(vform, temp, src1, src2, false);
2317   add(vform, dst, dst, temp);
2318   return dst;
2319 }
2320 
2321 
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2322 LogicVRegister Simulator::not_(VectorFormat vform,
2323                                LogicVRegister dst,
2324                                const LogicVRegister& src) {
2325   dst.ClearForWrite(vform);
2326   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2327     dst.SetUint(vform, i, ~src.Uint(vform, i));
2328   }
2329   return dst;
2330 }
2331 
2332 
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2333 LogicVRegister Simulator::rbit(VectorFormat vform,
2334                                LogicVRegister dst,
2335                                const LogicVRegister& src) {
2336   uint64_t result[kZRegMaxSizeInBytes];
2337   int lane_count = LaneCountFromFormat(vform);
2338   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2339   uint64_t reversed_value;
2340   uint64_t value;
2341   for (int i = 0; i < lane_count; i++) {
2342     value = src.Uint(vform, i);
2343     reversed_value = 0;
2344     for (int j = 0; j < lane_size_in_bits; j++) {
2345       reversed_value = (reversed_value << 1) | (value & 1);
2346       value >>= 1;
2347     }
2348     result[i] = reversed_value;
2349   }
2350 
2351   dst.ClearForWrite(vform);
2352   for (int i = 0; i < lane_count; ++i) {
2353     dst.SetUint(vform, i, result[i]);
2354   }
2355   return dst;
2356 }
2357 
2358 
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2359 LogicVRegister Simulator::rev(VectorFormat vform,
2360                               LogicVRegister dst,
2361                               const LogicVRegister& src) {
2362   VIXL_ASSERT(IsSVEFormat(vform));
2363   int lane_count = LaneCountFromFormat(vform);
2364   for (int i = 0; i < lane_count / 2; i++) {
2365     uint64_t t = src.Uint(vform, i);
2366     dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2367     dst.SetUint(vform, lane_count - i - 1, t);
2368   }
2369   return dst;
2370 }
2371 
2372 
rev_byte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rev_size)2373 LogicVRegister Simulator::rev_byte(VectorFormat vform,
2374                                    LogicVRegister dst,
2375                                    const LogicVRegister& src,
2376                                    int rev_size) {
2377   uint64_t result[kZRegMaxSizeInBytes] = {};
2378   int lane_count = LaneCountFromFormat(vform);
2379   int lane_size = LaneSizeInBytesFromFormat(vform);
2380   int lanes_per_loop = rev_size / lane_size;
2381   for (int i = 0; i < lane_count; i += lanes_per_loop) {
2382     for (int j = 0; j < lanes_per_loop; j++) {
2383       result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2384     }
2385   }
2386   dst.ClearForWrite(vform);
2387   for (int i = 0; i < lane_count; ++i) {
2388     dst.SetUint(vform, i, result[i]);
2389   }
2390   return dst;
2391 }
2392 
2393 
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2394 LogicVRegister Simulator::rev16(VectorFormat vform,
2395                                 LogicVRegister dst,
2396                                 const LogicVRegister& src) {
2397   return rev_byte(vform, dst, src, 2);
2398 }
2399 
2400 
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2401 LogicVRegister Simulator::rev32(VectorFormat vform,
2402                                 LogicVRegister dst,
2403                                 const LogicVRegister& src) {
2404   return rev_byte(vform, dst, src, 4);
2405 }
2406 
2407 
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2408 LogicVRegister Simulator::rev64(VectorFormat vform,
2409                                 LogicVRegister dst,
2410                                 const LogicVRegister& src) {
2411   return rev_byte(vform, dst, src, 8);
2412 }
2413 
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2414 LogicVRegister Simulator::addlp(VectorFormat vform,
2415                                 LogicVRegister dst,
2416                                 const LogicVRegister& src,
2417                                 bool is_signed,
2418                                 bool do_accumulate) {
2419   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2420   VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);
2421 
2422   uint64_t result[kZRegMaxSizeInBytes];
2423   int lane_count = LaneCountFromFormat(vform);
2424   for (int i = 0; i < lane_count; i++) {
2425     if (is_signed) {
2426       result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2427                                         src.Int(vformsrc, 2 * i + 1));
2428     } else {
2429       result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2430     }
2431   }
2432 
2433   dst.ClearForWrite(vform);
2434   for (int i = 0; i < lane_count; ++i) {
2435     if (do_accumulate) {
2436       result[i] += dst.Uint(vform, i);
2437     }
2438     dst.SetUint(vform, i, result[i]);
2439   }
2440 
2441   return dst;
2442 }
2443 
2444 
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2445 LogicVRegister Simulator::saddlp(VectorFormat vform,
2446                                  LogicVRegister dst,
2447                                  const LogicVRegister& src) {
2448   return addlp(vform, dst, src, true, false);
2449 }
2450 
2451 
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2452 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2453                                  LogicVRegister dst,
2454                                  const LogicVRegister& src) {
2455   return addlp(vform, dst, src, false, false);
2456 }
2457 
2458 
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2459 LogicVRegister Simulator::sadalp(VectorFormat vform,
2460                                  LogicVRegister dst,
2461                                  const LogicVRegister& src) {
2462   return addlp(vform, dst, src, true, true);
2463 }
2464 
2465 
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2466 LogicVRegister Simulator::uadalp(VectorFormat vform,
2467                                  LogicVRegister dst,
2468                                  const LogicVRegister& src) {
2469   return addlp(vform, dst, src, false, true);
2470 }
2471 
ror(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rotation)2472 LogicVRegister Simulator::ror(VectorFormat vform,
2473                               LogicVRegister dst,
2474                               const LogicVRegister& src,
2475                               int rotation) {
2476   int width = LaneSizeInBitsFromFormat(vform);
2477   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2478     uint64_t value = src.Uint(vform, i);
2479     dst.SetUint(vform, i, RotateRight(value, rotation, width));
2480   }
2481   return dst;
2482 }
2483 
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2484 LogicVRegister Simulator::ext(VectorFormat vform,
2485                               LogicVRegister dst,
2486                               const LogicVRegister& src1,
2487                               const LogicVRegister& src2,
2488                               int index) {
2489   uint8_t result[kZRegMaxSizeInBytes] = {};
2490   int lane_count = LaneCountFromFormat(vform);
2491   for (int i = 0; i < lane_count - index; ++i) {
2492     result[i] = src1.Uint(vform, i + index);
2493   }
2494   for (int i = 0; i < index; ++i) {
2495     result[lane_count - index + i] = src2.Uint(vform, i);
2496   }
2497   dst.ClearForWrite(vform);
2498   for (int i = 0; i < lane_count; ++i) {
2499     dst.SetUint(vform, i, result[i]);
2500   }
2501   return dst;
2502 }
2503 
rotate_elements_right(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int index)2504 LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
2505                                                 LogicVRegister dst,
2506                                                 const LogicVRegister& src,
2507                                                 int index) {
2508   if (index < 0) index += LaneCountFromFormat(vform);
2509   VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
2510   index *= LaneSizeInBytesFromFormat(vform);
2511   return ext(kFormatVnB, dst, src, src, index);
2512 }
2513 
2514 
2515 template <typename T>
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2516 LogicVRegister Simulator::fadda(VectorFormat vform,
2517                                 LogicVRegister acc,
2518                                 const LogicPRegister& pg,
2519                                 const LogicVRegister& src) {
2520   T result = acc.Float<T>(0);
2521   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2522     if (!pg.IsActive(vform, i)) continue;
2523 
2524     result = FPAdd(result, src.Float<T>(i));
2525   }
2526   VectorFormat vform_dst =
2527       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2528   acc.ClearForWrite(vform_dst);
2529   acc.SetFloat(0, result);
2530   return acc;
2531 }
2532 
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2533 LogicVRegister Simulator::fadda(VectorFormat vform,
2534                                 LogicVRegister acc,
2535                                 const LogicPRegister& pg,
2536                                 const LogicVRegister& src) {
2537   switch (LaneSizeInBitsFromFormat(vform)) {
2538     case kHRegSize:
2539       fadda<SimFloat16>(vform, acc, pg, src);
2540       break;
2541     case kSRegSize:
2542       fadda<float>(vform, acc, pg, src);
2543       break;
2544     case kDRegSize:
2545       fadda<double>(vform, acc, pg, src);
2546       break;
2547     default:
2548       VIXL_UNREACHABLE();
2549   }
2550   return acc;
2551 }
2552 
2553 template <typename T>
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2554 LogicVRegister Simulator::fcadd(VectorFormat vform,
2555                                 LogicVRegister dst,          // d
2556                                 const LogicVRegister& src1,  // n
2557                                 const LogicVRegister& src2,  // m
2558                                 int rot) {
2559   int elements = LaneCountFromFormat(vform);
2560 
2561   T element1, element3;
2562   rot = (rot == 1) ? 270 : 90;
2563 
2564   // Loop example:
2565   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2566   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2567 
2568   for (int e = 0; e <= (elements / 2) - 1; e++) {
2569     switch (rot) {
2570       case 90:
2571         element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2572         element3 = src2.Float<T>(e * 2);
2573         break;
2574       case 270:
2575         element1 = src2.Float<T>(e * 2 + 1);
2576         element3 = FPNeg(src2.Float<T>(e * 2));
2577         break;
2578       default:
2579         VIXL_UNREACHABLE();
2580         return dst;  // prevents "element(n) may be unintialized" errors
2581     }
2582     dst.ClearForWrite(vform);
2583     dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2584     dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2585   }
2586   return dst;
2587 }
2588 
2589 
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2590 LogicVRegister Simulator::fcadd(VectorFormat vform,
2591                                 LogicVRegister dst,          // d
2592                                 const LogicVRegister& src1,  // n
2593                                 const LogicVRegister& src2,  // m
2594                                 int rot) {
2595   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2596     fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2597   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2598     fcadd<float>(vform, dst, src1, src2, rot);
2599   } else {
2600     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2601     fcadd<double>(vform, dst, src1, src2, rot);
2602   }
2603   return dst;
2604 }
2605 
2606 template <typename T>
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int index,int rot)2607 LogicVRegister Simulator::fcmla(VectorFormat vform,
2608                                 LogicVRegister dst,
2609                                 const LogicVRegister& src1,
2610                                 const LogicVRegister& src2,
2611                                 const LogicVRegister& acc,
2612                                 int index,
2613                                 int rot) {
2614   int elements = LaneCountFromFormat(vform);
2615 
2616   T element1, element2, element3, element4;
2617   rot *= 90;
2618 
2619   // Loop example:
2620   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2621   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2622 
2623   for (int e = 0; e <= (elements / 2) - 1; e++) {
2624     // Index == -1 indicates a vector/vector rather than vector/indexed-element
2625     // operation.
2626     int f = (index < 0) ? e : index;
2627 
2628     switch (rot) {
2629       case 0:
2630         element1 = src2.Float<T>(f * 2);
2631         element2 = src1.Float<T>(e * 2);
2632         element3 = src2.Float<T>(f * 2 + 1);
2633         element4 = src1.Float<T>(e * 2);
2634         break;
2635       case 90:
2636         element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2637         element2 = src1.Float<T>(e * 2 + 1);
2638         element3 = src2.Float<T>(f * 2);
2639         element4 = src1.Float<T>(e * 2 + 1);
2640         break;
2641       case 180:
2642         element1 = FPNeg(src2.Float<T>(f * 2));
2643         element2 = src1.Float<T>(e * 2);
2644         element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2645         element4 = src1.Float<T>(e * 2);
2646         break;
2647       case 270:
2648         element1 = src2.Float<T>(f * 2 + 1);
2649         element2 = src1.Float<T>(e * 2 + 1);
2650         element3 = FPNeg(src2.Float<T>(f * 2));
2651         element4 = src1.Float<T>(e * 2 + 1);
2652         break;
2653       default:
2654         VIXL_UNREACHABLE();
2655         return dst;  // prevents "element(n) may be unintialized" errors
2656     }
2657     dst.ClearForWrite(vform);
2658     dst.SetFloat<T>(vform,
2659                     e * 2,
2660                     FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2661     dst.SetFloat<T>(vform,
2662                     e * 2 + 1,
2663                     FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2664   }
2665   return dst;
2666 }
2667 
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int rot)2668 LogicVRegister Simulator::fcmla(VectorFormat vform,
2669                                 LogicVRegister dst,
2670                                 const LogicVRegister& src1,
2671                                 const LogicVRegister& src2,
2672                                 const LogicVRegister& acc,
2673                                 int rot) {
2674   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2675     fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2676   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2677     fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2678   } else {
2679     fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2680   }
2681   return dst;
2682 }
2683 
2684 
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2685 LogicVRegister Simulator::fcmla(VectorFormat vform,
2686                                 LogicVRegister dst,          // d
2687                                 const LogicVRegister& src1,  // n
2688                                 const LogicVRegister& src2,  // m
2689                                 int index,
2690                                 int rot) {
2691   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2692     VIXL_UNIMPLEMENTED();
2693   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2694     fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2695   } else {
2696     fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2697   }
2698   return dst;
2699 }
2700 
cadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot,bool saturate)2701 LogicVRegister Simulator::cadd(VectorFormat vform,
2702                                LogicVRegister dst,
2703                                const LogicVRegister& src1,
2704                                const LogicVRegister& src2,
2705                                int rot,
2706                                bool saturate) {
2707   SimVRegister src1_r, src1_i;
2708   SimVRegister src2_r, src2_i;
2709   SimVRegister zero;
2710   zero.Clear();
2711   uzp1(vform, src1_r, src1, zero);
2712   uzp2(vform, src1_i, src1, zero);
2713   uzp1(vform, src2_r, src2, zero);
2714   uzp2(vform, src2_i, src2, zero);
2715 
2716   if (rot == 90) {
2717     if (saturate) {
2718       sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2719       add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2720     } else {
2721       sub(vform, src1_r, src1_r, src2_i);
2722       add(vform, src1_i, src1_i, src2_r);
2723     }
2724   } else {
2725     VIXL_ASSERT(rot == 270);
2726     if (saturate) {
2727       add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2728       sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2729     } else {
2730       add(vform, src1_r, src1_r, src2_i);
2731       sub(vform, src1_i, src1_i, src2_r);
2732     }
2733   }
2734 
2735   zip1(vform, dst, src1_r, src1_i);
2736   return dst;
2737 }
2738 
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2739 LogicVRegister Simulator::cmla(VectorFormat vform,
2740                                LogicVRegister dst,
2741                                const LogicVRegister& srca,
2742                                const LogicVRegister& src1,
2743                                const LogicVRegister& src2,
2744                                int rot) {
2745   SimVRegister src1_a;
2746   SimVRegister src2_a, src2_b;
2747   SimVRegister srca_i, srca_r;
2748   SimVRegister zero, temp;
2749   zero.Clear();
2750 
2751   if ((rot == 0) || (rot == 180)) {
2752     uzp1(vform, src1_a, src1, zero);
2753     uzp1(vform, src2_a, src2, zero);
2754     uzp2(vform, src2_b, src2, zero);
2755   } else {
2756     uzp2(vform, src1_a, src1, zero);
2757     uzp2(vform, src2_a, src2, zero);
2758     uzp1(vform, src2_b, src2, zero);
2759   }
2760 
2761   uzp1(vform, srca_r, srca, zero);
2762   uzp2(vform, srca_i, srca, zero);
2763 
2764   bool sub_r = (rot == 90) || (rot == 180);
2765   bool sub_i = (rot == 180) || (rot == 270);
2766 
2767   mul(vform, temp, src1_a, src2_a);
2768   if (sub_r) {
2769     sub(vform, srca_r, srca_r, temp);
2770   } else {
2771     add(vform, srca_r, srca_r, temp);
2772   }
2773 
2774   mul(vform, temp, src1_a, src2_b);
2775   if (sub_i) {
2776     sub(vform, srca_i, srca_i, temp);
2777   } else {
2778     add(vform, srca_i, srca_i, temp);
2779   }
2780 
2781   zip1(vform, dst, srca_r, srca_i);
2782   return dst;
2783 }
2784 
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2785 LogicVRegister Simulator::cmla(VectorFormat vform,
2786                                LogicVRegister dst,
2787                                const LogicVRegister& srca,
2788                                const LogicVRegister& src1,
2789                                const LogicVRegister& src2,
2790                                int index,
2791                                int rot) {
2792   SimVRegister temp;
2793   dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
2794   return cmla(vform, dst, srca, src1, temp, rot);
2795 }
2796 
bgrp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool do_bext)2797 LogicVRegister Simulator::bgrp(VectorFormat vform,
2798                                LogicVRegister dst,
2799                                const LogicVRegister& src1,
2800                                const LogicVRegister& src2,
2801                                bool do_bext) {
2802   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2803     uint64_t value = src1.Uint(vform, i);
2804     uint64_t mask = src2.Uint(vform, i);
2805     int high_pos = 0;
2806     int low_pos = 0;
2807     uint64_t result_high = 0;
2808     uint64_t result_low = 0;
2809     for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2810       if ((mask & 1) == 0) {
2811         result_high |= (value & 1) << high_pos;
2812         high_pos++;
2813       } else {
2814         result_low |= (value & 1) << low_pos;
2815         low_pos++;
2816       }
2817       mask >>= 1;
2818       value >>= 1;
2819     }
2820 
2821     if (!do_bext) {
2822       result_low |= result_high << low_pos;
2823     }
2824 
2825     dst.SetUint(vform, i, result_low);
2826   }
2827   return dst;
2828 }
2829 
bdep(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2830 LogicVRegister Simulator::bdep(VectorFormat vform,
2831                                LogicVRegister dst,
2832                                const LogicVRegister& src1,
2833                                const LogicVRegister& src2) {
2834   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2835     uint64_t value = src1.Uint(vform, i);
2836     uint64_t mask = src2.Uint(vform, i);
2837     uint64_t result = 0;
2838     for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2839       if ((mask & 1) == 1) {
2840         result |= (value & 1) << j;
2841         value >>= 1;
2842       }
2843       mask >>= 1;
2844     }
2845     dst.SetUint(vform, i, result);
2846   }
2847   return dst;
2848 }
2849 
histogram(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2,bool do_segmented)2850 LogicVRegister Simulator::histogram(VectorFormat vform,
2851                                     LogicVRegister dst,
2852                                     const LogicPRegister& pg,
2853                                     const LogicVRegister& src1,
2854                                     const LogicVRegister& src2,
2855                                     bool do_segmented) {
2856   int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
2857   uint64_t result[kZRegMaxSizeInBytes];
2858 
2859   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2860     uint64_t count = 0;
2861     uint64_t value = src1.Uint(vform, i);
2862 
2863     int segment = do_segmented ? (i / elements_per_segment) : 0;
2864     int segment_offset = segment * elements_per_segment;
2865     int hist_limit = do_segmented ? elements_per_segment : (i + 1);
2866     for (int j = 0; j < hist_limit; j++) {
2867       if (pg.IsActive(vform, j) &&
2868           (value == src2.Uint(vform, j + segment_offset))) {
2869         count++;
2870       }
2871     }
2872     result[i] = count;
2873   }
2874   dst.SetUintArray(vform, result);
2875   return dst;
2876 }
2877 
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2878 LogicVRegister Simulator::dup_element(VectorFormat vform,
2879                                       LogicVRegister dst,
2880                                       const LogicVRegister& src,
2881                                       int src_index) {
2882   if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {
2883     // When duplicating an element larger than 64 bits, split the element into
2884     // 64-bit parts, and duplicate the parts across the destination.
2885     uint64_t d[4];
2886     int count = (vform == kFormatVnQ) ? 2 : 4;
2887     for (int i = 0; i < count; i++) {
2888       d[i] = src.Uint(kFormatVnD, (src_index * count) + i);
2889     }
2890     dst.Clear();
2891     for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {
2892       dst.SetUint(kFormatVnD, i, d[i % count]);
2893     }
2894   } else {
2895     int lane_count = LaneCountFromFormat(vform);
2896     uint64_t value = src.Uint(vform, src_index);
2897     dst.ClearForWrite(vform);
2898     for (int i = 0; i < lane_count; ++i) {
2899       dst.SetUint(vform, i, value);
2900     }
2901   }
2902   return dst;
2903 }
2904 
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2905 LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2906                                                    LogicVRegister dst,
2907                                                    const LogicVRegister& src,
2908                                                    int src_index) {
2909   // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2910   // whereas in NEON, the size of segment is equal to the size of register
2911   // itself.
2912   int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2913   VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2914   int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2915 
2916   VIXL_ASSERT(src_index >= 0);
2917   VIXL_ASSERT(src_index < lanes_per_segment);
2918 
2919   dst.ClearForWrite(vform);
2920   for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2921     uint64_t value = src.Uint(vform, j + src_index);
2922     for (int i = 0; i < lanes_per_segment; i++) {
2923       dst.SetUint(vform, j + i, value);
2924     }
2925   }
2926   return dst;
2927 }
2928 
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const std::pair<int,int> & src_and_index)2929 LogicVRegister Simulator::dup_elements_to_segments(
2930     VectorFormat vform,
2931     LogicVRegister dst,
2932     const std::pair<int, int>& src_and_index) {
2933   return dup_elements_to_segments(vform,
2934                                   dst,
2935                                   ReadVRegister(src_and_index.first),
2936                                   src_and_index.second);
2937 }
2938 
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2939 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2940                                         LogicVRegister dst,
2941                                         uint64_t imm) {
2942   int lane_count = LaneCountFromFormat(vform);
2943   uint64_t value = imm & MaxUintFromFormat(vform);
2944   dst.ClearForWrite(vform);
2945   for (int i = 0; i < lane_count; ++i) {
2946     dst.SetUint(vform, i, value);
2947   }
2948   return dst;
2949 }
2950 
2951 
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2952 LogicVRegister Simulator::ins_element(VectorFormat vform,
2953                                       LogicVRegister dst,
2954                                       int dst_index,
2955                                       const LogicVRegister& src,
2956                                       int src_index) {
2957   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2958   return dst;
2959 }
2960 
2961 
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2962 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2963                                         LogicVRegister dst,
2964                                         int dst_index,
2965                                         uint64_t imm) {
2966   uint64_t value = imm & MaxUintFromFormat(vform);
2967   dst.SetUint(vform, dst_index, value);
2968   return dst;
2969 }
2970 
2971 
index(VectorFormat vform,LogicVRegister dst,uint64_t start,uint64_t step)2972 LogicVRegister Simulator::index(VectorFormat vform,
2973                                 LogicVRegister dst,
2974                                 uint64_t start,
2975                                 uint64_t step) {
2976   VIXL_ASSERT(IsSVEFormat(vform));
2977   uint64_t value = start;
2978   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2979     dst.SetUint(vform, i, value);
2980     value += step;
2981   }
2982   return dst;
2983 }
2984 
2985 
insr(VectorFormat vform,LogicVRegister dst,uint64_t imm)2986 LogicVRegister Simulator::insr(VectorFormat vform,
2987                                LogicVRegister dst,
2988                                uint64_t imm) {
2989   VIXL_ASSERT(IsSVEFormat(vform));
2990   for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
2991     dst.SetUint(vform, i, dst.Uint(vform, i - 1));
2992   }
2993   dst.SetUint(vform, 0, imm);
2994   return dst;
2995 }
2996 
2997 
mov(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2998 LogicVRegister Simulator::mov(VectorFormat vform,
2999                               LogicVRegister dst,
3000                               const LogicVRegister& src) {
3001   dst.ClearForWrite(vform);
3002   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
3003     dst.SetUint(vform, lane, src.Uint(vform, lane));
3004   }
3005   return dst;
3006 }
3007 
3008 
mov(LogicPRegister dst,const LogicPRegister & src)3009 LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
3010   // Avoid a copy if the registers already alias.
3011   if (dst.Aliases(src)) return dst;
3012 
3013   for (int i = 0; i < dst.GetChunkCount(); i++) {
3014     dst.SetChunk(i, src.GetChunk(i));
3015   }
3016   return dst;
3017 }
3018 
3019 
mov_merging(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3020 LogicVRegister Simulator::mov_merging(VectorFormat vform,
3021                                       LogicVRegister dst,
3022                                       const SimPRegister& pg,
3023                                       const LogicVRegister& src) {
3024   return sel(vform, dst, pg, src, dst);
3025 }
3026 
mov_zeroing(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3027 LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3028                                       LogicVRegister dst,
3029                                       const SimPRegister& pg,
3030                                       const LogicVRegister& src) {
3031   SimVRegister zero;
3032   dup_immediate(vform, zero, 0);
3033   return sel(vform, dst, pg, src, zero);
3034 }
3035 
mov_alternating(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int start_at)3036 LogicVRegister Simulator::mov_alternating(VectorFormat vform,
3037                                           LogicVRegister dst,
3038                                           const LogicVRegister& src,
3039                                           int start_at) {
3040   VIXL_ASSERT((start_at == 0) || (start_at == 1));
3041   for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
3042     dst.SetUint(vform, i, src.Uint(vform, i));
3043   }
3044   return dst;
3045 }
3046 
mov_merging(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3047 LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3048                                       const LogicPRegister& pg,
3049                                       const LogicPRegister& src) {
3050   return sel(dst, pg, src, dst);
3051 }
3052 
mov_zeroing(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3053 LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3054                                       const LogicPRegister& pg,
3055                                       const LogicPRegister& src) {
3056   SimPRegister all_false;
3057   return sel(dst, pg, src, pfalse(all_false));
3058 }
3059 
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)3060 LogicVRegister Simulator::movi(VectorFormat vform,
3061                                LogicVRegister dst,
3062                                uint64_t imm) {
3063   int lane_count = LaneCountFromFormat(vform);
3064   dst.ClearForWrite(vform);
3065   for (int i = 0; i < lane_count; ++i) {
3066     dst.SetUint(vform, i, imm);
3067   }
3068   return dst;
3069 }
3070 
3071 
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)3072 LogicVRegister Simulator::mvni(VectorFormat vform,
3073                                LogicVRegister dst,
3074                                uint64_t imm) {
3075   int lane_count = LaneCountFromFormat(vform);
3076   dst.ClearForWrite(vform);
3077   for (int i = 0; i < lane_count; ++i) {
3078     dst.SetUint(vform, i, ~imm);
3079   }
3080   return dst;
3081 }
3082 
3083 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)3084 LogicVRegister Simulator::orr(VectorFormat vform,
3085                               LogicVRegister dst,
3086                               const LogicVRegister& src,
3087                               uint64_t imm) {
3088   uint64_t result[16];
3089   int lane_count = LaneCountFromFormat(vform);
3090   for (int i = 0; i < lane_count; ++i) {
3091     result[i] = src.Uint(vform, i) | imm;
3092   }
3093   dst.ClearForWrite(vform);
3094   for (int i = 0; i < lane_count; ++i) {
3095     dst.SetUint(vform, i, result[i]);
3096   }
3097   return dst;
3098 }
3099 
3100 
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3101 LogicVRegister Simulator::uxtl(VectorFormat vform,
3102                                LogicVRegister dst,
3103                                const LogicVRegister& src,
3104                                bool is_2) {
3105   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3106   int lane_count = LaneCountFromFormat(vform);
3107   int src_offset = is_2 ? lane_count : 0;
3108 
3109   dst.ClearForWrite(vform);
3110   for (int i = 0; i < lane_count; i++) {
3111     dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));
3112   }
3113   return dst;
3114 }
3115 
3116 
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3117 LogicVRegister Simulator::sxtl(VectorFormat vform,
3118                                LogicVRegister dst,
3119                                const LogicVRegister& src,
3120                                bool is_2) {
3121   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3122   int lane_count = LaneCountFromFormat(vform);
3123   int src_offset = is_2 ? lane_count : 0;
3124 
3125   dst.ClearForWrite(vform);
3126   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3127     dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));
3128   }
3129   return dst;
3130 }
3131 
3132 
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3133 LogicVRegister Simulator::uxtl2(VectorFormat vform,
3134                                 LogicVRegister dst,
3135                                 const LogicVRegister& src) {
3136   return uxtl(vform, dst, src, /* is_2 = */ true);
3137 }
3138 
3139 
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3140 LogicVRegister Simulator::sxtl2(VectorFormat vform,
3141                                 LogicVRegister dst,
3142                                 const LogicVRegister& src) {
3143   return sxtl(vform, dst, src, /* is_2 = */ true);
3144 }
3145 
3146 
uxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3147 LogicVRegister Simulator::uxt(VectorFormat vform,
3148                               LogicVRegister dst,
3149                               const LogicVRegister& src,
3150                               unsigned from_size_in_bits) {
3151   int lane_count = LaneCountFromFormat(vform);
3152   uint64_t mask = GetUintMask(from_size_in_bits);
3153 
3154   dst.ClearForWrite(vform);
3155   for (int i = 0; i < lane_count; i++) {
3156     dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3157   }
3158   return dst;
3159 }
3160 
3161 
sxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3162 LogicVRegister Simulator::sxt(VectorFormat vform,
3163                               LogicVRegister dst,
3164                               const LogicVRegister& src,
3165                               unsigned from_size_in_bits) {
3166   int lane_count = LaneCountFromFormat(vform);
3167 
3168   dst.ClearForWrite(vform);
3169   for (int i = 0; i < lane_count; i++) {
3170     uint64_t value =
3171         ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3172     dst.SetInt(vform, i, value);
3173   }
3174   return dst;
3175 }
3176 
3177 
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3178 LogicVRegister Simulator::shrn(VectorFormat vform,
3179                                LogicVRegister dst,
3180                                const LogicVRegister& src,
3181                                int shift) {
3182   SimVRegister temp;
3183   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3184   VectorFormat vform_dst = vform;
3185   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3186   return extractnarrow(vform_dst, dst, false, shifted_src, false);
3187 }
3188 
3189 
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3190 LogicVRegister Simulator::shrn2(VectorFormat vform,
3191                                 LogicVRegister dst,
3192                                 const LogicVRegister& src,
3193                                 int shift) {
3194   SimVRegister temp;
3195   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3196   VectorFormat vformdst = vform;
3197   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3198   return extractnarrow(vformdst, dst, false, shifted_src, false);
3199 }
3200 
3201 
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3202 LogicVRegister Simulator::rshrn(VectorFormat vform,
3203                                 LogicVRegister dst,
3204                                 const LogicVRegister& src,
3205                                 int shift) {
3206   SimVRegister temp;
3207   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3208   VectorFormat vformdst = vform;
3209   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3210   return extractnarrow(vformdst, dst, false, shifted_src, false);
3211 }
3212 
3213 
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3214 LogicVRegister Simulator::rshrn2(VectorFormat vform,
3215                                  LogicVRegister dst,
3216                                  const LogicVRegister& src,
3217                                  int shift) {
3218   SimVRegister temp;
3219   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3220   VectorFormat vformdst = vform;
3221   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3222   return extractnarrow(vformdst, dst, false, shifted_src, false);
3223 }
3224 
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)3225 LogicVRegister Simulator::Table(VectorFormat vform,
3226                                 LogicVRegister dst,
3227                                 const LogicVRegister& ind,
3228                                 bool zero_out_of_bounds,
3229                                 const LogicVRegister* tab1,
3230                                 const LogicVRegister* tab2,
3231                                 const LogicVRegister* tab3,
3232                                 const LogicVRegister* tab4) {
3233   VIXL_ASSERT(tab1 != NULL);
3234   int lane_count = LaneCountFromFormat(vform);
3235   VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));
3236   uint64_t table[kZRegMaxSizeInBytes * 2];
3237   uint64_t result[kZRegMaxSizeInBytes];
3238 
3239   // For Neon, the table source registers are always 16B, and Neon allows only
3240   // 8B or 16B vform for the destination, so infer the table format from the
3241   // destination.
3242   VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;
3243 
3244   uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);
3245   if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);
3246   if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);
3247   if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);
3248 
3249   for (int i = 0; i < lane_count; i++) {
3250     uint64_t index = ind.Uint(vform, i);
3251     result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);
3252     if (index < tab_size) result[i] = table[index];
3253   }
3254   dst.SetUintArray(vform, result);
3255   return dst;
3256 }
3257 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3258 LogicVRegister Simulator::tbl(VectorFormat vform,
3259                               LogicVRegister dst,
3260                               const LogicVRegister& tab,
3261                               const LogicVRegister& ind) {
3262   return Table(vform, dst, ind, true, &tab);
3263 }
3264 
3265 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3266 LogicVRegister Simulator::tbl(VectorFormat vform,
3267                               LogicVRegister dst,
3268                               const LogicVRegister& tab,
3269                               const LogicVRegister& tab2,
3270                               const LogicVRegister& ind) {
3271   return Table(vform, dst, ind, true, &tab, &tab2);
3272 }
3273 
3274 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3275 LogicVRegister Simulator::tbl(VectorFormat vform,
3276                               LogicVRegister dst,
3277                               const LogicVRegister& tab,
3278                               const LogicVRegister& tab2,
3279                               const LogicVRegister& tab3,
3280                               const LogicVRegister& ind) {
3281   return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3282 }
3283 
3284 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3285 LogicVRegister Simulator::tbl(VectorFormat vform,
3286                               LogicVRegister dst,
3287                               const LogicVRegister& tab,
3288                               const LogicVRegister& tab2,
3289                               const LogicVRegister& tab3,
3290                               const LogicVRegister& tab4,
3291                               const LogicVRegister& ind) {
3292   return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3293 }
3294 
3295 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3296 LogicVRegister Simulator::tbx(VectorFormat vform,
3297                               LogicVRegister dst,
3298                               const LogicVRegister& tab,
3299                               const LogicVRegister& ind) {
3300   return Table(vform, dst, ind, false, &tab);
3301 }
3302 
3303 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3304 LogicVRegister Simulator::tbx(VectorFormat vform,
3305                               LogicVRegister dst,
3306                               const LogicVRegister& tab,
3307                               const LogicVRegister& tab2,
3308                               const LogicVRegister& ind) {
3309   return Table(vform, dst, ind, false, &tab, &tab2);
3310 }
3311 
3312 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3313 LogicVRegister Simulator::tbx(VectorFormat vform,
3314                               LogicVRegister dst,
3315                               const LogicVRegister& tab,
3316                               const LogicVRegister& tab2,
3317                               const LogicVRegister& tab3,
3318                               const LogicVRegister& ind) {
3319   return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3320 }
3321 
3322 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3323 LogicVRegister Simulator::tbx(VectorFormat vform,
3324                               LogicVRegister dst,
3325                               const LogicVRegister& tab,
3326                               const LogicVRegister& tab2,
3327                               const LogicVRegister& tab3,
3328                               const LogicVRegister& tab4,
3329                               const LogicVRegister& ind) {
3330   return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3331 }
3332 
3333 
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3334 LogicVRegister Simulator::uqshrn(VectorFormat vform,
3335                                  LogicVRegister dst,
3336                                  const LogicVRegister& src,
3337                                  int shift) {
3338   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3339 }
3340 
3341 
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3342 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3343                                   LogicVRegister dst,
3344                                   const LogicVRegister& src,
3345                                   int shift) {
3346   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3347 }
3348 
3349 
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3350 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3351                                   LogicVRegister dst,
3352                                   const LogicVRegister& src,
3353                                   int shift) {
3354   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3355 }
3356 
3357 
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3358 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3359                                    LogicVRegister dst,
3360                                    const LogicVRegister& src,
3361                                    int shift) {
3362   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3363 }
3364 
3365 
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3366 LogicVRegister Simulator::sqshrn(VectorFormat vform,
3367                                  LogicVRegister dst,
3368                                  const LogicVRegister& src,
3369                                  int shift) {
3370   SimVRegister temp;
3371   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3372   VectorFormat vformdst = vform;
3373   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3374   return sqxtn(vformdst, dst, shifted_src);
3375 }
3376 
3377 
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3378 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3379                                   LogicVRegister dst,
3380                                   const LogicVRegister& src,
3381                                   int shift) {
3382   SimVRegister temp;
3383   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3384   VectorFormat vformdst = vform;
3385   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3386   return sqxtn(vformdst, dst, shifted_src);
3387 }
3388 
3389 
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3390 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3391                                   LogicVRegister dst,
3392                                   const LogicVRegister& src,
3393                                   int shift) {
3394   SimVRegister temp;
3395   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3396   VectorFormat vformdst = vform;
3397   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3398   return sqxtn(vformdst, dst, shifted_src);
3399 }
3400 
3401 
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3402 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3403                                    LogicVRegister dst,
3404                                    const LogicVRegister& src,
3405                                    int shift) {
3406   SimVRegister temp;
3407   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3408   VectorFormat vformdst = vform;
3409   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3410   return sqxtn(vformdst, dst, shifted_src);
3411 }
3412 
3413 
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3414 LogicVRegister Simulator::sqshrun(VectorFormat vform,
3415                                   LogicVRegister dst,
3416                                   const LogicVRegister& src,
3417                                   int shift) {
3418   SimVRegister temp;
3419   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3420   VectorFormat vformdst = vform;
3421   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3422   return sqxtun(vformdst, dst, shifted_src);
3423 }
3424 
3425 
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3426 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3427                                    LogicVRegister dst,
3428                                    const LogicVRegister& src,
3429                                    int shift) {
3430   SimVRegister temp;
3431   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3432   VectorFormat vformdst = vform;
3433   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3434   return sqxtun(vformdst, dst, shifted_src);
3435 }
3436 
3437 
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3438 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3439                                    LogicVRegister dst,
3440                                    const LogicVRegister& src,
3441                                    int shift) {
3442   SimVRegister temp;
3443   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3444   VectorFormat vformdst = vform;
3445   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3446   return sqxtun(vformdst, dst, shifted_src);
3447 }
3448 
3449 
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3450 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3451                                     LogicVRegister dst,
3452                                     const LogicVRegister& src,
3453                                     int shift) {
3454   SimVRegister temp;
3455   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3456   VectorFormat vformdst = vform;
3457   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3458   return sqxtun(vformdst, dst, shifted_src);
3459 }
3460 
3461 
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3462 LogicVRegister Simulator::uaddl(VectorFormat vform,
3463                                 LogicVRegister dst,
3464                                 const LogicVRegister& src1,
3465                                 const LogicVRegister& src2) {
3466   SimVRegister temp1, temp2;
3467   uxtl(vform, temp1, src1);
3468   uxtl(vform, temp2, src2);
3469   add(vform, dst, temp1, temp2);
3470   return dst;
3471 }
3472 
3473 
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3474 LogicVRegister Simulator::uaddl2(VectorFormat vform,
3475                                  LogicVRegister dst,
3476                                  const LogicVRegister& src1,
3477                                  const LogicVRegister& src2) {
3478   SimVRegister temp1, temp2;
3479   uxtl2(vform, temp1, src1);
3480   uxtl2(vform, temp2, src2);
3481   add(vform, dst, temp1, temp2);
3482   return dst;
3483 }
3484 
3485 
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3486 LogicVRegister Simulator::uaddw(VectorFormat vform,
3487                                 LogicVRegister dst,
3488                                 const LogicVRegister& src1,
3489                                 const LogicVRegister& src2) {
3490   SimVRegister temp;
3491   uxtl(vform, temp, src2);
3492   add(vform, dst, src1, temp);
3493   return dst;
3494 }
3495 
3496 
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3497 LogicVRegister Simulator::uaddw2(VectorFormat vform,
3498                                  LogicVRegister dst,
3499                                  const LogicVRegister& src1,
3500                                  const LogicVRegister& src2) {
3501   SimVRegister temp;
3502   uxtl2(vform, temp, src2);
3503   add(vform, dst, src1, temp);
3504   return dst;
3505 }
3506 
3507 
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3508 LogicVRegister Simulator::saddl(VectorFormat vform,
3509                                 LogicVRegister dst,
3510                                 const LogicVRegister& src1,
3511                                 const LogicVRegister& src2) {
3512   SimVRegister temp1, temp2;
3513   sxtl(vform, temp1, src1);
3514   sxtl(vform, temp2, src2);
3515   add(vform, dst, temp1, temp2);
3516   return dst;
3517 }
3518 
3519 
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3520 LogicVRegister Simulator::saddl2(VectorFormat vform,
3521                                  LogicVRegister dst,
3522                                  const LogicVRegister& src1,
3523                                  const LogicVRegister& src2) {
3524   SimVRegister temp1, temp2;
3525   sxtl2(vform, temp1, src1);
3526   sxtl2(vform, temp2, src2);
3527   add(vform, dst, temp1, temp2);
3528   return dst;
3529 }
3530 
3531 
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3532 LogicVRegister Simulator::saddw(VectorFormat vform,
3533                                 LogicVRegister dst,
3534                                 const LogicVRegister& src1,
3535                                 const LogicVRegister& src2) {
3536   SimVRegister temp;
3537   sxtl(vform, temp, src2);
3538   add(vform, dst, src1, temp);
3539   return dst;
3540 }
3541 
3542 
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3543 LogicVRegister Simulator::saddw2(VectorFormat vform,
3544                                  LogicVRegister dst,
3545                                  const LogicVRegister& src1,
3546                                  const LogicVRegister& src2) {
3547   SimVRegister temp;
3548   sxtl2(vform, temp, src2);
3549   add(vform, dst, src1, temp);
3550   return dst;
3551 }
3552 
3553 
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3554 LogicVRegister Simulator::usubl(VectorFormat vform,
3555                                 LogicVRegister dst,
3556                                 const LogicVRegister& src1,
3557                                 const LogicVRegister& src2) {
3558   SimVRegister temp1, temp2;
3559   uxtl(vform, temp1, src1);
3560   uxtl(vform, temp2, src2);
3561   sub(vform, dst, temp1, temp2);
3562   return dst;
3563 }
3564 
3565 
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3566 LogicVRegister Simulator::usubl2(VectorFormat vform,
3567                                  LogicVRegister dst,
3568                                  const LogicVRegister& src1,
3569                                  const LogicVRegister& src2) {
3570   SimVRegister temp1, temp2;
3571   uxtl2(vform, temp1, src1);
3572   uxtl2(vform, temp2, src2);
3573   sub(vform, dst, temp1, temp2);
3574   return dst;
3575 }
3576 
3577 
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3578 LogicVRegister Simulator::usubw(VectorFormat vform,
3579                                 LogicVRegister dst,
3580                                 const LogicVRegister& src1,
3581                                 const LogicVRegister& src2) {
3582   SimVRegister temp;
3583   uxtl(vform, temp, src2);
3584   sub(vform, dst, src1, temp);
3585   return dst;
3586 }
3587 
3588 
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3589 LogicVRegister Simulator::usubw2(VectorFormat vform,
3590                                  LogicVRegister dst,
3591                                  const LogicVRegister& src1,
3592                                  const LogicVRegister& src2) {
3593   SimVRegister temp;
3594   uxtl2(vform, temp, src2);
3595   sub(vform, dst, src1, temp);
3596   return dst;
3597 }
3598 
3599 
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3600 LogicVRegister Simulator::ssubl(VectorFormat vform,
3601                                 LogicVRegister dst,
3602                                 const LogicVRegister& src1,
3603                                 const LogicVRegister& src2) {
3604   SimVRegister temp1, temp2;
3605   sxtl(vform, temp1, src1);
3606   sxtl(vform, temp2, src2);
3607   sub(vform, dst, temp1, temp2);
3608   return dst;
3609 }
3610 
3611 
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3612 LogicVRegister Simulator::ssubl2(VectorFormat vform,
3613                                  LogicVRegister dst,
3614                                  const LogicVRegister& src1,
3615                                  const LogicVRegister& src2) {
3616   SimVRegister temp1, temp2;
3617   sxtl2(vform, temp1, src1);
3618   sxtl2(vform, temp2, src2);
3619   sub(vform, dst, temp1, temp2);
3620   return dst;
3621 }
3622 
3623 
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3624 LogicVRegister Simulator::ssubw(VectorFormat vform,
3625                                 LogicVRegister dst,
3626                                 const LogicVRegister& src1,
3627                                 const LogicVRegister& src2) {
3628   SimVRegister temp;
3629   sxtl(vform, temp, src2);
3630   sub(vform, dst, src1, temp);
3631   return dst;
3632 }
3633 
3634 
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3635 LogicVRegister Simulator::ssubw2(VectorFormat vform,
3636                                  LogicVRegister dst,
3637                                  const LogicVRegister& src1,
3638                                  const LogicVRegister& src2) {
3639   SimVRegister temp;
3640   sxtl2(vform, temp, src2);
3641   sub(vform, dst, src1, temp);
3642   return dst;
3643 }
3644 
3645 
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3646 LogicVRegister Simulator::uabal(VectorFormat vform,
3647                                 LogicVRegister dst,
3648                                 const LogicVRegister& src1,
3649                                 const LogicVRegister& src2) {
3650   SimVRegister temp1, temp2;
3651   uxtl(vform, temp1, src1);
3652   uxtl(vform, temp2, src2);
3653   uaba(vform, dst, temp1, temp2);
3654   return dst;
3655 }
3656 
3657 
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3658 LogicVRegister Simulator::uabal2(VectorFormat vform,
3659                                  LogicVRegister dst,
3660                                  const LogicVRegister& src1,
3661                                  const LogicVRegister& src2) {
3662   SimVRegister temp1, temp2;
3663   uxtl2(vform, temp1, src1);
3664   uxtl2(vform, temp2, src2);
3665   uaba(vform, dst, temp1, temp2);
3666   return dst;
3667 }
3668 
3669 
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3670 LogicVRegister Simulator::sabal(VectorFormat vform,
3671                                 LogicVRegister dst,
3672                                 const LogicVRegister& src1,
3673                                 const LogicVRegister& src2) {
3674   SimVRegister temp1, temp2;
3675   sxtl(vform, temp1, src1);
3676   sxtl(vform, temp2, src2);
3677   saba(vform, dst, temp1, temp2);
3678   return dst;
3679 }
3680 
3681 
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3682 LogicVRegister Simulator::sabal2(VectorFormat vform,
3683                                  LogicVRegister dst,
3684                                  const LogicVRegister& src1,
3685                                  const LogicVRegister& src2) {
3686   SimVRegister temp1, temp2;
3687   sxtl2(vform, temp1, src1);
3688   sxtl2(vform, temp2, src2);
3689   saba(vform, dst, temp1, temp2);
3690   return dst;
3691 }
3692 
3693 
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3694 LogicVRegister Simulator::uabdl(VectorFormat vform,
3695                                 LogicVRegister dst,
3696                                 const LogicVRegister& src1,
3697                                 const LogicVRegister& src2) {
3698   SimVRegister temp1, temp2;
3699   uxtl(vform, temp1, src1);
3700   uxtl(vform, temp2, src2);
3701   absdiff(vform, dst, temp1, temp2, false);
3702   return dst;
3703 }
3704 
3705 
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3706 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3707                                  LogicVRegister dst,
3708                                  const LogicVRegister& src1,
3709                                  const LogicVRegister& src2) {
3710   SimVRegister temp1, temp2;
3711   uxtl2(vform, temp1, src1);
3712   uxtl2(vform, temp2, src2);
3713   absdiff(vform, dst, temp1, temp2, false);
3714   return dst;
3715 }
3716 
3717 
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3718 LogicVRegister Simulator::sabdl(VectorFormat vform,
3719                                 LogicVRegister dst,
3720                                 const LogicVRegister& src1,
3721                                 const LogicVRegister& src2) {
3722   SimVRegister temp1, temp2;
3723   sxtl(vform, temp1, src1);
3724   sxtl(vform, temp2, src2);
3725   absdiff(vform, dst, temp1, temp2, true);
3726   return dst;
3727 }
3728 
3729 
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3730 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3731                                  LogicVRegister dst,
3732                                  const LogicVRegister& src1,
3733                                  const LogicVRegister& src2) {
3734   SimVRegister temp1, temp2;
3735   sxtl2(vform, temp1, src1);
3736   sxtl2(vform, temp2, src2);
3737   absdiff(vform, dst, temp1, temp2, true);
3738   return dst;
3739 }
3740 
3741 
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3742 LogicVRegister Simulator::umull(VectorFormat vform,
3743                                 LogicVRegister dst,
3744                                 const LogicVRegister& src1,
3745                                 const LogicVRegister& src2,
3746                                 bool is_2) {
3747   SimVRegister temp1, temp2;
3748   uxtl(vform, temp1, src1, is_2);
3749   uxtl(vform, temp2, src2, is_2);
3750   mul(vform, dst, temp1, temp2);
3751   return dst;
3752 }
3753 
3754 
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3755 LogicVRegister Simulator::umull2(VectorFormat vform,
3756                                  LogicVRegister dst,
3757                                  const LogicVRegister& src1,
3758                                  const LogicVRegister& src2) {
3759   return umull(vform, dst, src1, src2, /* is_2 = */ true);
3760 }
3761 
3762 
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3763 LogicVRegister Simulator::smull(VectorFormat vform,
3764                                 LogicVRegister dst,
3765                                 const LogicVRegister& src1,
3766                                 const LogicVRegister& src2,
3767                                 bool is_2) {
3768   SimVRegister temp1, temp2;
3769   sxtl(vform, temp1, src1, is_2);
3770   sxtl(vform, temp2, src2, is_2);
3771   mul(vform, dst, temp1, temp2);
3772   return dst;
3773 }
3774 
3775 
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3776 LogicVRegister Simulator::smull2(VectorFormat vform,
3777                                  LogicVRegister dst,
3778                                  const LogicVRegister& src1,
3779                                  const LogicVRegister& src2) {
3780   return smull(vform, dst, src1, src2, /* is_2 = */ true);
3781 }
3782 
3783 
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3784 LogicVRegister Simulator::umlsl(VectorFormat vform,
3785                                 LogicVRegister dst,
3786                                 const LogicVRegister& src1,
3787                                 const LogicVRegister& src2,
3788                                 bool is_2) {
3789   SimVRegister temp1, temp2;
3790   uxtl(vform, temp1, src1, is_2);
3791   uxtl(vform, temp2, src2, is_2);
3792   mls(vform, dst, dst, temp1, temp2);
3793   return dst;
3794 }
3795 
3796 
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3797 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3798                                  LogicVRegister dst,
3799                                  const LogicVRegister& src1,
3800                                  const LogicVRegister& src2) {
3801   return umlsl(vform, dst, src1, src2, /* is_2 = */ true);
3802 }
3803 
3804 
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3805 LogicVRegister Simulator::smlsl(VectorFormat vform,
3806                                 LogicVRegister dst,
3807                                 const LogicVRegister& src1,
3808                                 const LogicVRegister& src2,
3809                                 bool is_2) {
3810   SimVRegister temp1, temp2;
3811   sxtl(vform, temp1, src1, is_2);
3812   sxtl(vform, temp2, src2, is_2);
3813   mls(vform, dst, dst, temp1, temp2);
3814   return dst;
3815 }
3816 
3817 
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3818 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3819                                  LogicVRegister dst,
3820                                  const LogicVRegister& src1,
3821                                  const LogicVRegister& src2) {
3822   return smlsl(vform, dst, src1, src2, /* is_2 = */ true);
3823 }
3824 
3825 
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3826 LogicVRegister Simulator::umlal(VectorFormat vform,
3827                                 LogicVRegister dst,
3828                                 const LogicVRegister& src1,
3829                                 const LogicVRegister& src2,
3830                                 bool is_2) {
3831   SimVRegister temp1, temp2;
3832   uxtl(vform, temp1, src1, is_2);
3833   uxtl(vform, temp2, src2, is_2);
3834   mla(vform, dst, dst, temp1, temp2);
3835   return dst;
3836 }
3837 
3838 
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3839 LogicVRegister Simulator::umlal2(VectorFormat vform,
3840                                  LogicVRegister dst,
3841                                  const LogicVRegister& src1,
3842                                  const LogicVRegister& src2) {
3843   return umlal(vform, dst, src1, src2, /* is_2 = */ true);
3844 }
3845 
3846 
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3847 LogicVRegister Simulator::smlal(VectorFormat vform,
3848                                 LogicVRegister dst,
3849                                 const LogicVRegister& src1,
3850                                 const LogicVRegister& src2,
3851                                 bool is_2) {
3852   SimVRegister temp1, temp2;
3853   sxtl(vform, temp1, src1, is_2);
3854   sxtl(vform, temp2, src2, is_2);
3855   mla(vform, dst, dst, temp1, temp2);
3856   return dst;
3857 }
3858 
3859 
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3860 LogicVRegister Simulator::smlal2(VectorFormat vform,
3861                                  LogicVRegister dst,
3862                                  const LogicVRegister& src1,
3863                                  const LogicVRegister& src2) {
3864   return smlal(vform, dst, src1, src2, /* is_2 = */ true);
3865 }
3866 
3867 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3868 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3869                                   LogicVRegister dst,
3870                                   const LogicVRegister& src1,
3871                                   const LogicVRegister& src2,
3872                                   bool is_2) {
3873   SimVRegister temp;
3874   LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3875   return add(vform, dst, dst, product).SignedSaturate(vform);
3876 }
3877 
3878 
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3879 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3880                                    LogicVRegister dst,
3881                                    const LogicVRegister& src1,
3882                                    const LogicVRegister& src2) {
3883   return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);
3884 }
3885 
3886 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3887 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3888                                   LogicVRegister dst,
3889                                   const LogicVRegister& src1,
3890                                   const LogicVRegister& src2,
3891                                   bool is_2) {
3892   SimVRegister temp;
3893   LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3894   return sub(vform, dst, dst, product).SignedSaturate(vform);
3895 }
3896 
3897 
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3898 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3899                                    LogicVRegister dst,
3900                                    const LogicVRegister& src1,
3901                                    const LogicVRegister& src2) {
3902   return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);
3903 }
3904 
3905 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3906 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3907                                   LogicVRegister dst,
3908                                   const LogicVRegister& src1,
3909                                   const LogicVRegister& src2,
3910                                   bool is_2) {
3911   SimVRegister temp;
3912   LogicVRegister product = smull(vform, temp, src1, src2, is_2);
3913   return add(vform, dst, product, product).SignedSaturate(vform);
3914 }
3915 
3916 
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3917 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3918                                    LogicVRegister dst,
3919                                    const LogicVRegister& src1,
3920                                    const LogicVRegister& src2) {
3921   return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);
3922 }
3923 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3924 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3925                                    LogicVRegister dst,
3926                                    const LogicVRegister& src1,
3927                                    const LogicVRegister& src2,
3928                                    bool round) {
3929   int esize = LaneSizeInBitsFromFormat(vform);
3930 
3931   SimVRegister temp_lo, temp_hi;
3932 
3933   // Compute low and high multiplication results.
3934   mul(vform, temp_lo, src1, src2);
3935   smulh(vform, temp_hi, src1, src2);
3936 
3937   // Double by shifting high half, and adding in most-significant bit of low
3938   // half.
3939   shl(vform, temp_hi, temp_hi, 1);
3940   usra(vform, temp_hi, temp_lo, esize - 1);
3941 
3942   if (round) {
3943     // Add the second (due to doubling) most-significant bit of the low half
3944     // into the result.
3945     shl(vform, temp_lo, temp_lo, 1);
3946     usra(vform, temp_hi, temp_lo, esize - 1);
3947   }
3948 
3949   SimPRegister not_sat;
3950   LogicPRegister ptemp(not_sat);
3951   dst.ClearForWrite(vform);
3952   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3953     // Saturation only occurs when src1 = src2 = minimum representable value.
3954     // Check this as a special case.
3955     ptemp.SetActive(vform, i, true);
3956     if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&
3957         (src2.Int(vform, i) == MinIntFromFormat(vform))) {
3958       ptemp.SetActive(vform, i, false);
3959     }
3960     dst.SetInt(vform, i, MaxIntFromFormat(vform));
3961   }
3962 
3963   mov_merging(vform, dst, not_sat, temp_hi);
3964   return dst;
3965 }
3966 
3967 
dot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_src1_signed,bool is_src2_signed)3968 LogicVRegister Simulator::dot(VectorFormat vform,
3969                               LogicVRegister dst,
3970                               const LogicVRegister& src1,
3971                               const LogicVRegister& src2,
3972                               bool is_src1_signed,
3973                               bool is_src2_signed) {
3974   VectorFormat quarter_vform =
3975       VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
3976 
3977   dst.ClearForWrite(vform);
3978   for (int e = 0; e < LaneCountFromFormat(vform); e++) {
3979     uint64_t result = 0;
3980     int64_t element1, element2;
3981     for (int i = 0; i < 4; i++) {
3982       int index = 4 * e + i;
3983       if (is_src1_signed) {
3984         element1 = src1.Int(quarter_vform, index);
3985       } else {
3986         element1 = src1.Uint(quarter_vform, index);
3987       }
3988       if (is_src2_signed) {
3989         element2 = src2.Int(quarter_vform, index);
3990       } else {
3991         element2 = src2.Uint(quarter_vform, index);
3992       }
3993       result += element1 * element2;
3994     }
3995     dst.SetUint(vform, e, result + dst.Uint(vform, e));
3996   }
3997   return dst;
3998 }
3999 
4000 
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4001 LogicVRegister Simulator::sdot(VectorFormat vform,
4002                                LogicVRegister dst,
4003                                const LogicVRegister& src1,
4004                                const LogicVRegister& src2) {
4005   return dot(vform, dst, src1, src2, true, true);
4006 }
4007 
4008 
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4009 LogicVRegister Simulator::udot(VectorFormat vform,
4010                                LogicVRegister dst,
4011                                const LogicVRegister& src1,
4012                                const LogicVRegister& src2) {
4013   return dot(vform, dst, src1, src2, false, false);
4014 }
4015 
usdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4016 LogicVRegister Simulator::usdot(VectorFormat vform,
4017                                 LogicVRegister dst,
4018                                 const LogicVRegister& src1,
4019                                 const LogicVRegister& src2) {
4020   return dot(vform, dst, src1, src2, false, true);
4021 }
4022 
cdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & acc,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4023 LogicVRegister Simulator::cdot(VectorFormat vform,
4024                                LogicVRegister dst,
4025                                const LogicVRegister& acc,
4026                                const LogicVRegister& src1,
4027                                const LogicVRegister& src2,
4028                                int rot) {
4029   VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
4030   VectorFormat quarter_vform =
4031       VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4032 
4033   int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;
4034   int sel_b = 1 - sel_a;
4035   int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;
4036 
4037   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4038     int64_t result = acc.Int(vform, i);
4039     for (int j = 0; j < 2; j++) {
4040       int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);
4041       int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);
4042       int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);
4043       int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);
4044       result += (r1 * r2) + (sub_i * i1 * i2);
4045     }
4046     dst.SetInt(vform, i, result);
4047   }
4048   return dst;
4049 }
4050 
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4051 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4052                                     LogicVRegister dst,
4053                                     const LogicVRegister& srca,
4054                                     const LogicVRegister& src1,
4055                                     const LogicVRegister& src2,
4056                                     int rot) {
4057   SimVRegister src1_a, src1_b;
4058   SimVRegister src2_a, src2_b;
4059   SimVRegister srca_i, srca_r;
4060   SimVRegister zero, temp;
4061   zero.Clear();
4062 
4063   if ((rot == 0) || (rot == 180)) {
4064     uzp1(vform, src1_a, src1, zero);
4065     uzp1(vform, src2_a, src2, zero);
4066     uzp2(vform, src2_b, src2, zero);
4067   } else {
4068     uzp2(vform, src1_a, src1, zero);
4069     uzp2(vform, src2_a, src2, zero);
4070     uzp1(vform, src2_b, src2, zero);
4071   }
4072 
4073   uzp1(vform, srca_r, srca, zero);
4074   uzp2(vform, srca_i, srca, zero);
4075 
4076   bool sub_r = (rot == 90) || (rot == 180);
4077   bool sub_i = (rot == 180) || (rot == 270);
4078 
4079   const bool round = true;
4080   sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);
4081   sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);
4082   zip1(vform, dst, srca_r, srca_i);
4083   return dst;
4084 }
4085 
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)4086 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4087                                     LogicVRegister dst,
4088                                     const LogicVRegister& srca,
4089                                     const LogicVRegister& src1,
4090                                     const LogicVRegister& src2,
4091                                     int index,
4092                                     int rot) {
4093   SimVRegister temp;
4094   dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
4095   return sqrdcmlah(vform, dst, srca, src1, temp, rot);
4096 }
4097 
sqrdmlash_d(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4098 LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,
4099                                       LogicVRegister dst,
4100                                       const LogicVRegister& src1,
4101                                       const LogicVRegister& src2,
4102                                       bool round,
4103                                       bool sub_op) {
4104   // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.
4105   // To avoid this, we use:
4106   //     (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4107   // which is same as:
4108   //     (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4109 
4110   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4111   int esize = kDRegSize;
4112   vixl_uint128_t round_const, accum;
4113   round_const.first = 0;
4114   if (round) {
4115     round_const.second = UINT64_C(1) << (esize - 2);
4116   } else {
4117     round_const.second = 0;
4118   }
4119 
4120   dst.ClearForWrite(vform);
4121   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4122     // Shift the whole value left by `esize - 1` bits.
4123     accum.first = dst.Int(vform, i) >> 1;
4124     accum.second = dst.Int(vform, i) << (esize - 1);
4125 
4126     vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));
4127 
4128     if (sub_op) {
4129       product = Neg128(product);
4130     }
4131     accum = Add128(accum, product);
4132 
4133     // Perform rounding.
4134     accum = Add128(accum, round_const);
4135 
4136     // Arithmetic shift the whole value right by `esize - 1` bits.
4137     accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
4138     accum.first = -(accum.first >> (esize - 1));
4139 
4140     // Perform saturation.
4141     bool is_pos = (accum.first == 0) ? true : false;
4142     if (is_pos &&
4143         (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {
4144       accum.second = MaxIntFromFormat(vform);
4145     } else if (!is_pos && (accum.second <
4146                            static_cast<uint64_t>(MinIntFromFormat(vform)))) {
4147       accum.second = MinIntFromFormat(vform);
4148     }
4149 
4150     dst.SetInt(vform, i, accum.second);
4151   }
4152 
4153   return dst;
4154 }
4155 
sqrdmlash(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4156 LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4157                                     LogicVRegister dst,
4158                                     const LogicVRegister& src1,
4159                                     const LogicVRegister& src2,
4160                                     bool round,
4161                                     bool sub_op) {
4162   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4163   // To avoid this, we use:
4164   //     (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4165   // which is same as:
4166   //     (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4167 
4168   if (vform == kFormatVnD) {
4169     return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);
4170   }
4171 
4172   int esize = LaneSizeInBitsFromFormat(vform);
4173   int round_const = round ? (1 << (esize - 2)) : 0;
4174   int64_t accum;
4175 
4176   dst.ClearForWrite(vform);
4177   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4178     accum = dst.Int(vform, i) << (esize - 1);
4179     if (sub_op) {
4180       accum -= src1.Int(vform, i) * src2.Int(vform, i);
4181     } else {
4182       accum += src1.Int(vform, i) * src2.Int(vform, i);
4183     }
4184     accum += round_const;
4185     accum = accum >> (esize - 1);
4186 
4187     if (accum > MaxIntFromFormat(vform)) {
4188       accum = MaxIntFromFormat(vform);
4189     } else if (accum < MinIntFromFormat(vform)) {
4190       accum = MinIntFromFormat(vform);
4191     }
4192     dst.SetInt(vform, i, accum);
4193   }
4194   return dst;
4195 }
4196 
4197 
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4198 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4199                                    LogicVRegister dst,
4200                                    const LogicVRegister& src1,
4201                                    const LogicVRegister& src2,
4202                                    bool round) {
4203   return sqrdmlash(vform, dst, src1, src2, round, false);
4204 }
4205 
4206 
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4207 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4208                                    LogicVRegister dst,
4209                                    const LogicVRegister& src1,
4210                                    const LogicVRegister& src2,
4211                                    bool round) {
4212   return sqrdmlash(vform, dst, src1, src2, round, true);
4213 }
4214 
4215 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4216 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4217                                   LogicVRegister dst,
4218                                   const LogicVRegister& src1,
4219                                   const LogicVRegister& src2) {
4220   return sqrdmulh(vform, dst, src1, src2, false);
4221 }
4222 
4223 
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4224 LogicVRegister Simulator::addhn(VectorFormat vform,
4225                                 LogicVRegister dst,
4226                                 const LogicVRegister& src1,
4227                                 const LogicVRegister& src2) {
4228   SimVRegister temp;
4229   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4230   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4231   return dst;
4232 }
4233 
4234 
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4235 LogicVRegister Simulator::addhn2(VectorFormat vform,
4236                                  LogicVRegister dst,
4237                                  const LogicVRegister& src1,
4238                                  const LogicVRegister& src2) {
4239   SimVRegister temp;
4240   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4241   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4242   return dst;
4243 }
4244 
4245 
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4246 LogicVRegister Simulator::raddhn(VectorFormat vform,
4247                                  LogicVRegister dst,
4248                                  const LogicVRegister& src1,
4249                                  const LogicVRegister& src2) {
4250   SimVRegister temp;
4251   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4252   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4253   return dst;
4254 }
4255 
4256 
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4257 LogicVRegister Simulator::raddhn2(VectorFormat vform,
4258                                   LogicVRegister dst,
4259                                   const LogicVRegister& src1,
4260                                   const LogicVRegister& src2) {
4261   SimVRegister temp;
4262   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4263   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4264   return dst;
4265 }
4266 
4267 
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4268 LogicVRegister Simulator::subhn(VectorFormat vform,
4269                                 LogicVRegister dst,
4270                                 const LogicVRegister& src1,
4271                                 const LogicVRegister& src2) {
4272   SimVRegister temp;
4273   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4274   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4275   return dst;
4276 }
4277 
4278 
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4279 LogicVRegister Simulator::subhn2(VectorFormat vform,
4280                                  LogicVRegister dst,
4281                                  const LogicVRegister& src1,
4282                                  const LogicVRegister& src2) {
4283   SimVRegister temp;
4284   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4285   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4286   return dst;
4287 }
4288 
4289 
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4290 LogicVRegister Simulator::rsubhn(VectorFormat vform,
4291                                  LogicVRegister dst,
4292                                  const LogicVRegister& src1,
4293                                  const LogicVRegister& src2) {
4294   SimVRegister temp;
4295   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4296   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4297   return dst;
4298 }
4299 
4300 
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4301 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4302                                   LogicVRegister dst,
4303                                   const LogicVRegister& src1,
4304                                   const LogicVRegister& src2) {
4305   SimVRegister temp;
4306   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4307   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4308   return dst;
4309 }
4310 
4311 
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4312 LogicVRegister Simulator::trn1(VectorFormat vform,
4313                                LogicVRegister dst,
4314                                const LogicVRegister& src1,
4315                                const LogicVRegister& src2) {
4316   uint64_t result[kZRegMaxSizeInBytes] = {};
4317   int lane_count = LaneCountFromFormat(vform);
4318   int pairs = lane_count / 2;
4319   for (int i = 0; i < pairs; ++i) {
4320     result[2 * i] = src1.Uint(vform, 2 * i);
4321     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4322   }
4323 
4324   dst.ClearForWrite(vform);
4325   for (int i = 0; i < lane_count; ++i) {
4326     dst.SetUint(vform, i, result[i]);
4327   }
4328   return dst;
4329 }
4330 
4331 
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4332 LogicVRegister Simulator::trn2(VectorFormat vform,
4333                                LogicVRegister dst,
4334                                const LogicVRegister& src1,
4335                                const LogicVRegister& src2) {
4336   uint64_t result[kZRegMaxSizeInBytes] = {};
4337   int lane_count = LaneCountFromFormat(vform);
4338   int pairs = lane_count / 2;
4339   for (int i = 0; i < pairs; ++i) {
4340     result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4341     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4342   }
4343 
4344   dst.ClearForWrite(vform);
4345   for (int i = 0; i < lane_count; ++i) {
4346     dst.SetUint(vform, i, result[i]);
4347   }
4348   return dst;
4349 }
4350 
4351 
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4352 LogicVRegister Simulator::zip1(VectorFormat vform,
4353                                LogicVRegister dst,
4354                                const LogicVRegister& src1,
4355                                const LogicVRegister& src2) {
4356   uint64_t result[kZRegMaxSizeInBytes] = {};
4357   int lane_count = LaneCountFromFormat(vform);
4358   int pairs = lane_count / 2;
4359   for (int i = 0; i < pairs; ++i) {
4360     result[2 * i] = src1.Uint(vform, i);
4361     result[(2 * i) + 1] = src2.Uint(vform, i);
4362   }
4363 
4364   dst.ClearForWrite(vform);
4365   for (int i = 0; i < lane_count; ++i) {
4366     dst.SetUint(vform, i, result[i]);
4367   }
4368   return dst;
4369 }
4370 
4371 
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4372 LogicVRegister Simulator::zip2(VectorFormat vform,
4373                                LogicVRegister dst,
4374                                const LogicVRegister& src1,
4375                                const LogicVRegister& src2) {
4376   uint64_t result[kZRegMaxSizeInBytes] = {};
4377   int lane_count = LaneCountFromFormat(vform);
4378   int pairs = lane_count / 2;
4379   for (int i = 0; i < pairs; ++i) {
4380     result[2 * i] = src1.Uint(vform, pairs + i);
4381     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4382   }
4383 
4384   dst.ClearForWrite(vform);
4385   for (int i = 0; i < lane_count; ++i) {
4386     dst.SetUint(vform, i, result[i]);
4387   }
4388   return dst;
4389 }
4390 
4391 
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4392 LogicVRegister Simulator::uzp1(VectorFormat vform,
4393                                LogicVRegister dst,
4394                                const LogicVRegister& src1,
4395                                const LogicVRegister& src2) {
4396   uint64_t result[kZRegMaxSizeInBytes * 2];
4397   int lane_count = LaneCountFromFormat(vform);
4398   for (int i = 0; i < lane_count; ++i) {
4399     result[i] = src1.Uint(vform, i);
4400     result[lane_count + i] = src2.Uint(vform, i);
4401   }
4402 
4403   dst.ClearForWrite(vform);
4404   for (int i = 0; i < lane_count; ++i) {
4405     dst.SetUint(vform, i, result[2 * i]);
4406   }
4407   return dst;
4408 }
4409 
4410 
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4411 LogicVRegister Simulator::uzp2(VectorFormat vform,
4412                                LogicVRegister dst,
4413                                const LogicVRegister& src1,
4414                                const LogicVRegister& src2) {
4415   uint64_t result[kZRegMaxSizeInBytes * 2];
4416   int lane_count = LaneCountFromFormat(vform);
4417   for (int i = 0; i < lane_count; ++i) {
4418     result[i] = src1.Uint(vform, i);
4419     result[lane_count + i] = src2.Uint(vform, i);
4420   }
4421 
4422   dst.ClearForWrite(vform);
4423   for (int i = 0; i < lane_count; ++i) {
4424     dst.SetUint(vform, i, result[(2 * i) + 1]);
4425   }
4426   return dst;
4427 }
4428 
interleave_top_bottom(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4429 LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,
4430                                                 LogicVRegister dst,
4431                                                 const LogicVRegister& src) {
4432   // Interleave the top and bottom half of a vector, ie. for a vector:
4433   //
4434   //   [ ... | F | D | B | ... | E | C | A ]
4435   //
4436   // where B is the first element in the top half of the vector, produce a
4437   // result vector:
4438   //
4439   //   [ ... | ... | F | E | D | C | B | A ]
4440 
4441   uint64_t result[kZRegMaxSizeInBytes] = {};
4442   int lane_count = LaneCountFromFormat(vform);
4443   for (int i = 0; i < lane_count; i += 2) {
4444     result[i] = src.Uint(vform, i / 2);
4445     result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));
4446   }
4447   dst.SetUintArray(vform, result);
4448   return dst;
4449 }
4450 
4451 template <typename T>
FPNeg(T op)4452 T Simulator::FPNeg(T op) {
4453   return -op;
4454 }
4455 
4456 template <typename T>
FPAdd(T op1,T op2)4457 T Simulator::FPAdd(T op1, T op2) {
4458   T result = FPProcessNaNs(op1, op2);
4459   if (IsNaN(result)) {
4460     return result;
4461   }
4462 
4463   if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4464     // inf + -inf returns the default NaN.
4465     FPProcessException();
4466     return FPDefaultNaN<T>();
4467   } else {
4468     // Other cases should be handled by standard arithmetic.
4469     return op1 + op2;
4470   }
4471 }
4472 
4473 
4474 template <typename T>
FPSub(T op1,T op2)4475 T Simulator::FPSub(T op1, T op2) {
4476   // NaNs should be handled elsewhere.
4477   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4478 
4479   if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4480     // inf - inf returns the default NaN.
4481     FPProcessException();
4482     return FPDefaultNaN<T>();
4483   } else {
4484     // Other cases should be handled by standard arithmetic.
4485     return op1 - op2;
4486   }
4487 }
4488 
4489 template <typename T>
FPMulNaNs(T op1,T op2)4490 T Simulator::FPMulNaNs(T op1, T op2) {
4491   T result = FPProcessNaNs(op1, op2);
4492   return IsNaN(result) ? result : FPMul(op1, op2);
4493 }
4494 
4495 template <typename T>
FPMul(T op1,T op2)4496 T Simulator::FPMul(T op1, T op2) {
4497   // NaNs should be handled elsewhere.
4498   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4499 
4500   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4501     // inf * 0.0 returns the default NaN.
4502     FPProcessException();
4503     return FPDefaultNaN<T>();
4504   } else {
4505     // Other cases should be handled by standard arithmetic.
4506     return op1 * op2;
4507   }
4508 }
4509 
4510 
4511 template <typename T>
FPMulx(T op1,T op2)4512 T Simulator::FPMulx(T op1, T op2) {
4513   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4514     // inf * 0.0 returns +/-2.0.
4515     T two = 2.0;
4516     return copysign(1.0, op1) * copysign(1.0, op2) * two;
4517   }
4518   return FPMul(op1, op2);
4519 }
4520 
4521 
4522 template <typename T>
FPMulAdd(T a,T op1,T op2)4523 T Simulator::FPMulAdd(T a, T op1, T op2) {
4524   T result = FPProcessNaNs3(a, op1, op2);
4525 
4526   T sign_a = copysign(1.0, a);
4527   T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
4528   bool isinf_prod = IsInf(op1) || IsInf(op2);
4529   bool operation_generates_nan =
4530       (IsInf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
4531       (IsInf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
4532       (IsInf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
4533 
4534   if (IsNaN(result)) {
4535     // Generated NaNs override quiet NaNs propagated from a.
4536     if (operation_generates_nan && IsQuietNaN(a)) {
4537       FPProcessException();
4538       return FPDefaultNaN<T>();
4539     } else {
4540       return result;
4541     }
4542   }
4543 
4544   // If the operation would produce a NaN, return the default NaN.
4545   if (operation_generates_nan) {
4546     FPProcessException();
4547     return FPDefaultNaN<T>();
4548   }
4549 
4550   // Work around broken fma implementations for exact zero results: The sign of
4551   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4552   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4553     return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
4554   }
4555 
4556   result = FusedMultiplyAdd(op1, op2, a);
4557   VIXL_ASSERT(!IsNaN(result));
4558 
4559   // Work around broken fma implementations for rounded zero results: If a is
4560   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4561   if ((a == 0.0) && (result == 0.0)) {
4562     return copysign(0.0, sign_prod);
4563   }
4564 
4565   return result;
4566 }
4567 
4568 
4569 template <typename T>
FPDiv(T op1,T op2)4570 T Simulator::FPDiv(T op1, T op2) {
4571   // NaNs should be handled elsewhere.
4572   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4573 
4574   if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4575     // inf / inf and 0.0 / 0.0 return the default NaN.
4576     FPProcessException();
4577     return FPDefaultNaN<T>();
4578   } else {
4579     if (op2 == 0.0) {
4580       FPProcessException();
4581       if (!IsNaN(op1)) {
4582         double op1_sign = copysign(1.0, op1);
4583         double op2_sign = copysign(1.0, op2);
4584         return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4585       }
4586     }
4587 
4588     // Other cases should be handled by standard arithmetic.
4589     return op1 / op2;
4590   }
4591 }
4592 
4593 
4594 template <typename T>
FPSqrt(T op)4595 T Simulator::FPSqrt(T op) {
4596   if (IsNaN(op)) {
4597     return FPProcessNaN(op);
4598   } else if (op < T(0.0)) {
4599     FPProcessException();
4600     return FPDefaultNaN<T>();
4601   } else {
4602     return sqrt(op);
4603   }
4604 }
4605 
4606 
4607 template <typename T>
FPMax(T a,T b)4608 T Simulator::FPMax(T a, T b) {
4609   T result = FPProcessNaNs(a, b);
4610   if (IsNaN(result)) return result;
4611 
4612   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4613     // a and b are zero, and the sign differs: return +0.0.
4614     return 0.0;
4615   } else {
4616     return (a > b) ? a : b;
4617   }
4618 }
4619 
4620 
4621 template <typename T>
FPMaxNM(T a,T b)4622 T Simulator::FPMaxNM(T a, T b) {
4623   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4624     a = kFP64NegativeInfinity;
4625   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4626     b = kFP64NegativeInfinity;
4627   }
4628 
4629   T result = FPProcessNaNs(a, b);
4630   return IsNaN(result) ? result : FPMax(a, b);
4631 }
4632 
4633 
4634 template <typename T>
FPMin(T a,T b)4635 T Simulator::FPMin(T a, T b) {
4636   T result = FPProcessNaNs(a, b);
4637   if (IsNaN(result)) return result;
4638 
4639   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4640     // a and b are zero, and the sign differs: return -0.0.
4641     return -0.0;
4642   } else {
4643     return (a < b) ? a : b;
4644   }
4645 }
4646 
4647 
4648 template <typename T>
FPMinNM(T a,T b)4649 T Simulator::FPMinNM(T a, T b) {
4650   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4651     a = kFP64PositiveInfinity;
4652   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4653     b = kFP64PositiveInfinity;
4654   }
4655 
4656   T result = FPProcessNaNs(a, b);
4657   return IsNaN(result) ? result : FPMin(a, b);
4658 }
4659 
4660 
4661 template <typename T>
FPRecipStepFused(T op1,T op2)4662 T Simulator::FPRecipStepFused(T op1, T op2) {
4663   const T two = 2.0;
4664   if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4665     return two;
4666   } else if (IsInf(op1) || IsInf(op2)) {
4667     // Return +inf if signs match, otherwise -inf.
4668     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4669                                           : kFP64NegativeInfinity;
4670   } else {
4671     return FusedMultiplyAdd(op1, op2, two);
4672   }
4673 }
4674 
4675 template <typename T>
IsNormal(T value)4676 bool IsNormal(T value) {
4677   return std::isnormal(value);
4678 }
4679 
4680 template <>
IsNormal(SimFloat16 value)4681 bool IsNormal(SimFloat16 value) {
4682   uint16_t rawbits = Float16ToRawbits(value);
4683   uint16_t exp_mask = 0x7c00;
4684   // Check that the exponent is neither all zeroes or all ones.
4685   return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4686 }
4687 
4688 
4689 template <typename T>
FPRSqrtStepFused(T op1,T op2)4690 T Simulator::FPRSqrtStepFused(T op1, T op2) {
4691   const T one_point_five = 1.5;
4692   const T two = 2.0;
4693 
4694   if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4695     return one_point_five;
4696   } else if (IsInf(op1) || IsInf(op2)) {
4697     // Return +inf if signs match, otherwise -inf.
4698     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4699                                           : kFP64NegativeInfinity;
4700   } else {
4701     // The multiply-add-halve operation must be fully fused, so avoid interim
4702     // rounding by checking which operand can be losslessly divided by two
4703     // before doing the multiply-add.
4704     if (IsNormal(op1 / two)) {
4705       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4706     } else if (IsNormal(op2 / two)) {
4707       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4708     } else {
4709       // Neither operand is normal after halving: the result is dominated by
4710       // the addition term, so just return that.
4711       return one_point_five;
4712     }
4713   }
4714 }
4715 
FPToFixedJS(double value)4716 int32_t Simulator::FPToFixedJS(double value) {
4717   // The Z-flag is set when the conversion from double precision floating-point
4718   // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4719   // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4720   // Z-flag is unset.
4721   int Z = 1;
4722   int32_t result;
4723 
4724   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4725       (value == kFP64NegativeInfinity)) {
4726     // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4727     // unset the Z-flag.
4728     result = 0.0;
4729     if ((value != 0.0) || std::signbit(value)) {
4730       Z = 0;
4731     }
4732   } else if (std::isnan(value)) {
4733     // NaN values unset the Z-flag and set the result to 0.
4734     FPProcessNaN(value);
4735     result = 0;
4736     Z = 0;
4737   } else {
4738     // All other values are converted to an integer representation, rounded
4739     // toward zero.
4740     double int_result = std::floor(value);
4741     double error = value - int_result;
4742 
4743     if ((error != 0.0) && (int_result < 0.0)) {
4744       int_result++;
4745     }
4746 
4747     // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4748     // write a one-liner with std::round, but the behaviour on ties is incorrect
4749     // for our purposes.
4750     double mod_const = static_cast<double>(UINT64_C(1) << 32);
4751     double mod_error =
4752         (int_result / mod_const) - std::floor(int_result / mod_const);
4753     double constrained;
4754     if (mod_error == 0.5) {
4755       constrained = INT32_MIN;
4756     } else {
4757       constrained = int_result - mod_const * round(int_result / mod_const);
4758     }
4759 
4760     VIXL_ASSERT(std::floor(constrained) == constrained);
4761     VIXL_ASSERT(constrained >= INT32_MIN);
4762     VIXL_ASSERT(constrained <= INT32_MAX);
4763 
4764     // Take the bottom 32 bits of the result as a 32-bit integer.
4765     result = static_cast<int32_t>(constrained);
4766 
4767     if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4768         (error != 0.0)) {
4769       // If the integer result is out of range or the conversion isn't exact,
4770       // take exception and unset the Z-flag.
4771       FPProcessException();
4772       Z = 0;
4773     }
4774   }
4775 
4776   ReadNzcv().SetN(0);
4777   ReadNzcv().SetZ(Z);
4778   ReadNzcv().SetC(0);
4779   ReadNzcv().SetV(0);
4780 
4781   return result;
4782 }
4783 
FPRoundIntCommon(double value,FPRounding round_mode)4784 double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4785   VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4786               (value != kFP64NegativeInfinity));
4787   VIXL_ASSERT(!IsNaN(value));
4788 
4789   double int_result = std::floor(value);
4790   double error = value - int_result;
4791   switch (round_mode) {
4792     case FPTieAway: {
4793       // Take care of correctly handling the range ]-0.5, -0.0], which must
4794       // yield -0.0.
4795       if ((-0.5 < value) && (value < 0.0)) {
4796         int_result = -0.0;
4797 
4798       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4799         // If the error is greater than 0.5, or is equal to 0.5 and the integer
4800         // result is positive, round up.
4801         int_result++;
4802       }
4803       break;
4804     }
4805     case FPTieEven: {
4806       // Take care of correctly handling the range [-0.5, -0.0], which must
4807       // yield -0.0.
4808       if ((-0.5 <= value) && (value < 0.0)) {
4809         int_result = -0.0;
4810 
4811         // If the error is greater than 0.5, or is equal to 0.5 and the integer
4812         // result is odd, round up.
4813       } else if ((error > 0.5) ||
4814                  ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4815         int_result++;
4816       }
4817       break;
4818     }
4819     case FPZero: {
4820       // If value>0 then we take floor(value)
4821       // otherwise, ceil(value).
4822       if (value < 0) {
4823         int_result = ceil(value);
4824       }
4825       break;
4826     }
4827     case FPNegativeInfinity: {
4828       // We always use floor(value).
4829       break;
4830     }
4831     case FPPositiveInfinity: {
4832       // Take care of correctly handling the range ]-1.0, -0.0], which must
4833       // yield -0.0.
4834       if ((-1.0 < value) && (value < 0.0)) {
4835         int_result = -0.0;
4836 
4837         // If the error is non-zero, round up.
4838       } else if (error > 0.0) {
4839         int_result++;
4840       }
4841       break;
4842     }
4843     default:
4844       VIXL_UNIMPLEMENTED();
4845   }
4846   return int_result;
4847 }
4848 
FPRoundInt(double value,FPRounding round_mode)4849 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4850   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4851       (value == kFP64NegativeInfinity)) {
4852     return value;
4853   } else if (IsNaN(value)) {
4854     return FPProcessNaN(value);
4855   }
4856   return FPRoundIntCommon(value, round_mode);
4857 }
4858 
FPRoundInt(double value,FPRounding round_mode,FrintMode frint_mode)4859 double Simulator::FPRoundInt(double value,
4860                              FPRounding round_mode,
4861                              FrintMode frint_mode) {
4862   if (frint_mode == kFrintToInteger) {
4863     return FPRoundInt(value, round_mode);
4864   }
4865 
4866   VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4867 
4868   if (value == 0.0) {
4869     return value;
4870   }
4871 
4872   if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4873       IsNaN(value)) {
4874     if (frint_mode == kFrintToInt32) {
4875       return INT32_MIN;
4876     } else {
4877       return INT64_MIN;
4878     }
4879   }
4880 
4881   double result = FPRoundIntCommon(value, round_mode);
4882 
4883   // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4884   // representable as a double, and is rounded to (INT64_MAX + 1) when
4885   // converted. To avoid this, we compare `result >= int64_max_plus_one`
4886   // instead; this is safe because `result` is known to be integral, and
4887   // `int64_max_plus_one` is exactly representable as a double.
4888   constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4889   VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4890                          int64_max_plus_one)) == int64_max_plus_one);
4891 
4892   if (frint_mode == kFrintToInt32) {
4893     if ((result > INT32_MAX) || (result < INT32_MIN)) {
4894       return INT32_MIN;
4895     }
4896   } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4897     return INT64_MIN;
4898   }
4899 
4900   return result;
4901 }
4902 
FPToInt16(double value,FPRounding rmode)4903 int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4904   value = FPRoundInt(value, rmode);
4905   if (value >= kHMaxInt) {
4906     return kHMaxInt;
4907   } else if (value < kHMinInt) {
4908     return kHMinInt;
4909   }
4910   return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4911 }
4912 
4913 
FPToInt32(double value,FPRounding rmode)4914 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4915   value = FPRoundInt(value, rmode);
4916   if (value >= kWMaxInt) {
4917     return kWMaxInt;
4918   } else if (value < kWMinInt) {
4919     return kWMinInt;
4920   }
4921   return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4922 }
4923 
4924 
FPToInt64(double value,FPRounding rmode)4925 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4926   value = FPRoundInt(value, rmode);
4927   // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
4928   // as a result of kMaxInt not being representable as a double.
4929   if (value >= 9223372036854775808.) {
4930     return kXMaxInt;
4931   } else if (value < kXMinInt) {
4932     return kXMinInt;
4933   }
4934   return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4935 }
4936 
4937 
FPToUInt16(double value,FPRounding rmode)4938 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4939   value = FPRoundInt(value, rmode);
4940   if (value >= kHMaxUInt) {
4941     return kHMaxUInt;
4942   } else if (value < 0.0) {
4943     return 0;
4944   }
4945   return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4946 }
4947 
4948 
FPToUInt32(double value,FPRounding rmode)4949 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4950   value = FPRoundInt(value, rmode);
4951   if (value >= kWMaxUInt) {
4952     return kWMaxUInt;
4953   } else if (value < 0.0) {
4954     return 0;
4955   }
4956   return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
4957 }
4958 
4959 
FPToUInt64(double value,FPRounding rmode)4960 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
4961   value = FPRoundInt(value, rmode);
4962   // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
4963   // as a result of kMaxUInt not being representable as a double.
4964   if (value >= 18446744073709551616.) {
4965     return kXMaxUInt;
4966   } else if (value < 0.0) {
4967     return 0;
4968   }
4969   return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
4970 }
4971 
4972 
4973 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
4974   template <typename T>                                          \
4975   LogicVRegister Simulator::FN(VectorFormat vform,               \
4976                                LogicVRegister dst,               \
4977                                const LogicVRegister& src1,       \
4978                                const LogicVRegister& src2) {     \
4979     dst.ClearForWrite(vform);                                    \
4980     for (int i = 0; i < LaneCountFromFormat(vform); i++) {       \
4981       T op1 = src1.Float<T>(i);                                  \
4982       T op2 = src2.Float<T>(i);                                  \
4983       T result;                                                  \
4984       if (PROCNAN) {                                             \
4985         result = FPProcessNaNs(op1, op2);                        \
4986         if (!IsNaN(result)) {                                    \
4987           result = OP(op1, op2);                                 \
4988         }                                                        \
4989       } else {                                                   \
4990         result = OP(op1, op2);                                   \
4991       }                                                          \
4992       dst.SetFloat(vform, i, result);                            \
4993     }                                                            \
4994     return dst;                                                  \
4995   }                                                              \
4996                                                                  \
4997   LogicVRegister Simulator::FN(VectorFormat vform,               \
4998                                LogicVRegister dst,               \
4999                                const LogicVRegister& src1,       \
5000                                const LogicVRegister& src2) {     \
5001     if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {          \
5002       FN<SimFloat16>(vform, dst, src1, src2);                    \
5003     } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {   \
5004       FN<float>(vform, dst, src1, src2);                         \
5005     } else {                                                     \
5006       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
5007       FN<double>(vform, dst, src1, src2);                        \
5008     }                                                            \
5009     return dst;                                                  \
5010   }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)5011 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
5012 #undef DEFINE_NEON_FP_VECTOR_OP
5013 
5014 
5015 LogicVRegister Simulator::fnmul(VectorFormat vform,
5016                                 LogicVRegister dst,
5017                                 const LogicVRegister& src1,
5018                                 const LogicVRegister& src2) {
5019   SimVRegister temp;
5020   LogicVRegister product = fmul(vform, temp, src1, src2);
5021   return fneg(vform, dst, product);
5022 }
5023 
5024 
5025 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5026 LogicVRegister Simulator::frecps(VectorFormat vform,
5027                                  LogicVRegister dst,
5028                                  const LogicVRegister& src1,
5029                                  const LogicVRegister& src2) {
5030   dst.ClearForWrite(vform);
5031   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5032     T op1 = -src1.Float<T>(i);
5033     T op2 = src2.Float<T>(i);
5034     T result = FPProcessNaNs(op1, op2);
5035     dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
5036   }
5037   return dst;
5038 }
5039 
5040 
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5041 LogicVRegister Simulator::frecps(VectorFormat vform,
5042                                  LogicVRegister dst,
5043                                  const LogicVRegister& src1,
5044                                  const LogicVRegister& src2) {
5045   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5046     frecps<SimFloat16>(vform, dst, src1, src2);
5047   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5048     frecps<float>(vform, dst, src1, src2);
5049   } else {
5050     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5051     frecps<double>(vform, dst, src1, src2);
5052   }
5053   return dst;
5054 }
5055 
5056 
5057 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5058 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5059                                   LogicVRegister dst,
5060                                   const LogicVRegister& src1,
5061                                   const LogicVRegister& src2) {
5062   dst.ClearForWrite(vform);
5063   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5064     T op1 = -src1.Float<T>(i);
5065     T op2 = src2.Float<T>(i);
5066     T result = FPProcessNaNs(op1, op2);
5067     dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
5068   }
5069   return dst;
5070 }
5071 
5072 
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5073 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5074                                   LogicVRegister dst,
5075                                   const LogicVRegister& src1,
5076                                   const LogicVRegister& src2) {
5077   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5078     frsqrts<SimFloat16>(vform, dst, src1, src2);
5079   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5080     frsqrts<float>(vform, dst, src1, src2);
5081   } else {
5082     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5083     frsqrts<double>(vform, dst, src1, src2);
5084   }
5085   return dst;
5086 }
5087 
5088 
5089 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5090 LogicVRegister Simulator::fcmp(VectorFormat vform,
5091                                LogicVRegister dst,
5092                                const LogicVRegister& src1,
5093                                const LogicVRegister& src2,
5094                                Condition cond) {
5095   dst.ClearForWrite(vform);
5096   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5097     bool result = false;
5098     T op1 = src1.Float<T>(i);
5099     T op2 = src2.Float<T>(i);
5100     bool unordered = IsNaN(FPProcessNaNs(op1, op2));
5101 
5102     switch (cond) {
5103       case eq:
5104         result = (op1 == op2);
5105         break;
5106       case ge:
5107         result = (op1 >= op2);
5108         break;
5109       case gt:
5110         result = (op1 > op2);
5111         break;
5112       case le:
5113         result = (op1 <= op2);
5114         break;
5115       case lt:
5116         result = (op1 < op2);
5117         break;
5118       case ne:
5119         result = (op1 != op2);
5120         break;
5121       case uo:
5122         result = unordered;
5123         break;
5124       default:
5125         // Other conditions are defined in terms of those above.
5126         VIXL_UNREACHABLE();
5127         break;
5128     }
5129 
5130     if (result && unordered) {
5131       // Only `uo` and `ne` can be true for unordered comparisons.
5132       VIXL_ASSERT((cond == uo) || (cond == ne));
5133     }
5134 
5135     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
5136   }
5137   return dst;
5138 }
5139 
5140 
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5141 LogicVRegister Simulator::fcmp(VectorFormat vform,
5142                                LogicVRegister dst,
5143                                const LogicVRegister& src1,
5144                                const LogicVRegister& src2,
5145                                Condition cond) {
5146   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5147     fcmp<SimFloat16>(vform, dst, src1, src2, cond);
5148   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5149     fcmp<float>(vform, dst, src1, src2, cond);
5150   } else {
5151     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5152     fcmp<double>(vform, dst, src1, src2, cond);
5153   }
5154   return dst;
5155 }
5156 
5157 
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)5158 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
5159                                     LogicVRegister dst,
5160                                     const LogicVRegister& src,
5161                                     Condition cond) {
5162   SimVRegister temp;
5163   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5164     LogicVRegister zero_reg =
5165         dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
5166     fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
5167   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5168     LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
5169     fcmp<float>(vform, dst, src, zero_reg, cond);
5170   } else {
5171     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5172     LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5173     fcmp<double>(vform, dst, src, zero_reg, cond);
5174   }
5175   return dst;
5176 }
5177 
5178 
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5179 LogicVRegister Simulator::fabscmp(VectorFormat vform,
5180                                   LogicVRegister dst,
5181                                   const LogicVRegister& src1,
5182                                   const LogicVRegister& src2,
5183                                   Condition cond) {
5184   SimVRegister temp1, temp2;
5185   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5186     LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5187     LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5188     fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5189   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5190     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5191     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5192     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5193   } else {
5194     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5195     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5196     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5197     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5198   }
5199   return dst;
5200 }
5201 
5202 
5203 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5204 LogicVRegister Simulator::fmla(VectorFormat vform,
5205                                LogicVRegister dst,
5206                                const LogicVRegister& srca,
5207                                const LogicVRegister& src1,
5208                                const LogicVRegister& src2) {
5209   dst.ClearForWrite(vform);
5210   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5211     T op1 = src1.Float<T>(i);
5212     T op2 = src2.Float<T>(i);
5213     T acc = srca.Float<T>(i);
5214     T result = FPMulAdd(acc, op1, op2);
5215     dst.SetFloat(vform, i, result);
5216   }
5217   return dst;
5218 }
5219 
5220 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5221 LogicVRegister Simulator::fmla(VectorFormat vform,
5222                                LogicVRegister dst,
5223                                const LogicVRegister& srca,
5224                                const LogicVRegister& src1,
5225                                const LogicVRegister& src2) {
5226   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5227     fmla<SimFloat16>(vform, dst, srca, src1, src2);
5228   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5229     fmla<float>(vform, dst, srca, src1, src2);
5230   } else {
5231     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5232     fmla<double>(vform, dst, srca, src1, src2);
5233   }
5234   return dst;
5235 }
5236 
5237 
5238 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5239 LogicVRegister Simulator::fmls(VectorFormat vform,
5240                                LogicVRegister dst,
5241                                const LogicVRegister& srca,
5242                                const LogicVRegister& src1,
5243                                const LogicVRegister& src2) {
5244   dst.ClearForWrite(vform);
5245   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5246     T op1 = -src1.Float<T>(i);
5247     T op2 = src2.Float<T>(i);
5248     T acc = srca.Float<T>(i);
5249     T result = FPMulAdd(acc, op1, op2);
5250     dst.SetFloat(i, result);
5251   }
5252   return dst;
5253 }
5254 
5255 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5256 LogicVRegister Simulator::fmls(VectorFormat vform,
5257                                LogicVRegister dst,
5258                                const LogicVRegister& srca,
5259                                const LogicVRegister& src1,
5260                                const LogicVRegister& src2) {
5261   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5262     fmls<SimFloat16>(vform, dst, srca, src1, src2);
5263   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5264     fmls<float>(vform, dst, srca, src1, src2);
5265   } else {
5266     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5267     fmls<double>(vform, dst, srca, src1, src2);
5268   }
5269   return dst;
5270 }
5271 
5272 
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5273 LogicVRegister Simulator::fmlal(VectorFormat vform,
5274                                 LogicVRegister dst,
5275                                 const LogicVRegister& src1,
5276                                 const LogicVRegister& src2) {
5277   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5278   dst.ClearForWrite(vform);
5279   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5280     float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5281     float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5282     float acc = dst.Float<float>(i);
5283     float result = FPMulAdd(acc, op1, op2);
5284     dst.SetFloat(i, result);
5285   }
5286   return dst;
5287 }
5288 
5289 
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5290 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5291                                  LogicVRegister dst,
5292                                  const LogicVRegister& src1,
5293                                  const LogicVRegister& src2) {
5294   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5295   dst.ClearForWrite(vform);
5296   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5297     int src = i + LaneCountFromFormat(vform);
5298     float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5299     float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5300     float acc = dst.Float<float>(i);
5301     float result = FPMulAdd(acc, op1, op2);
5302     dst.SetFloat(i, result);
5303   }
5304   return dst;
5305 }
5306 
5307 
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5308 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5309                                 LogicVRegister dst,
5310                                 const LogicVRegister& src1,
5311                                 const LogicVRegister& src2) {
5312   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5313   dst.ClearForWrite(vform);
5314   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5315     float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5316     float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5317     float acc = dst.Float<float>(i);
5318     float result = FPMulAdd(acc, op1, op2);
5319     dst.SetFloat(i, result);
5320   }
5321   return dst;
5322 }
5323 
5324 
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5325 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5326                                  LogicVRegister dst,
5327                                  const LogicVRegister& src1,
5328                                  const LogicVRegister& src2) {
5329   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5330   dst.ClearForWrite(vform);
5331   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5332     int src = i + LaneCountFromFormat(vform);
5333     float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5334     float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5335     float acc = dst.Float<float>(i);
5336     float result = FPMulAdd(acc, op1, op2);
5337     dst.SetFloat(i, result);
5338   }
5339   return dst;
5340 }
5341 
5342 
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5343 LogicVRegister Simulator::fmlal(VectorFormat vform,
5344                                 LogicVRegister dst,
5345                                 const LogicVRegister& src1,
5346                                 const LogicVRegister& src2,
5347                                 int index) {
5348   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5349   dst.ClearForWrite(vform);
5350   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5351   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5352     float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5353     float acc = dst.Float<float>(i);
5354     float result = FPMulAdd(acc, op1, op2);
5355     dst.SetFloat(i, result);
5356   }
5357   return dst;
5358 }
5359 
5360 
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5361 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5362                                  LogicVRegister dst,
5363                                  const LogicVRegister& src1,
5364                                  const LogicVRegister& src2,
5365                                  int index) {
5366   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5367   dst.ClearForWrite(vform);
5368   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5369   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5370     int src = i + LaneCountFromFormat(vform);
5371     float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5372     float acc = dst.Float<float>(i);
5373     float result = FPMulAdd(acc, op1, op2);
5374     dst.SetFloat(i, result);
5375   }
5376   return dst;
5377 }
5378 
5379 
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5380 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5381                                 LogicVRegister dst,
5382                                 const LogicVRegister& src1,
5383                                 const LogicVRegister& src2,
5384                                 int index) {
5385   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5386   dst.ClearForWrite(vform);
5387   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5388   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5389     float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5390     float acc = dst.Float<float>(i);
5391     float result = FPMulAdd(acc, op1, op2);
5392     dst.SetFloat(i, result);
5393   }
5394   return dst;
5395 }
5396 
5397 
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5398 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5399                                  LogicVRegister dst,
5400                                  const LogicVRegister& src1,
5401                                  const LogicVRegister& src2,
5402                                  int index) {
5403   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5404   dst.ClearForWrite(vform);
5405   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5406   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5407     int src = i + LaneCountFromFormat(vform);
5408     float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5409     float acc = dst.Float<float>(i);
5410     float result = FPMulAdd(acc, op1, op2);
5411     dst.SetFloat(i, result);
5412   }
5413   return dst;
5414 }
5415 
5416 
5417 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5418 LogicVRegister Simulator::fneg(VectorFormat vform,
5419                                LogicVRegister dst,
5420                                const LogicVRegister& src) {
5421   dst.ClearForWrite(vform);
5422   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5423     T op = src.Float<T>(i);
5424     op = -op;
5425     dst.SetFloat(i, op);
5426   }
5427   return dst;
5428 }
5429 
5430 
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5431 LogicVRegister Simulator::fneg(VectorFormat vform,
5432                                LogicVRegister dst,
5433                                const LogicVRegister& src) {
5434   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5435     fneg<SimFloat16>(vform, dst, src);
5436   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5437     fneg<float>(vform, dst, src);
5438   } else {
5439     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5440     fneg<double>(vform, dst, src);
5441   }
5442   return dst;
5443 }
5444 
5445 
5446 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5447 LogicVRegister Simulator::fabs_(VectorFormat vform,
5448                                 LogicVRegister dst,
5449                                 const LogicVRegister& src) {
5450   dst.ClearForWrite(vform);
5451   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5452     T op = src.Float<T>(i);
5453     if (copysign(1.0, op) < 0.0) {
5454       op = -op;
5455     }
5456     dst.SetFloat(i, op);
5457   }
5458   return dst;
5459 }
5460 
5461 
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5462 LogicVRegister Simulator::fabs_(VectorFormat vform,
5463                                 LogicVRegister dst,
5464                                 const LogicVRegister& src) {
5465   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5466     fabs_<SimFloat16>(vform, dst, src);
5467   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5468     fabs_<float>(vform, dst, src);
5469   } else {
5470     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5471     fabs_<double>(vform, dst, src);
5472   }
5473   return dst;
5474 }
5475 
5476 
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5477 LogicVRegister Simulator::fabd(VectorFormat vform,
5478                                LogicVRegister dst,
5479                                const LogicVRegister& src1,
5480                                const LogicVRegister& src2) {
5481   SimVRegister temp;
5482   fsub(vform, temp, src1, src2);
5483   fabs_(vform, dst, temp);
5484   return dst;
5485 }
5486 
5487 
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5488 LogicVRegister Simulator::fsqrt(VectorFormat vform,
5489                                 LogicVRegister dst,
5490                                 const LogicVRegister& src) {
5491   dst.ClearForWrite(vform);
5492   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5493     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5494       SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5495       dst.SetFloat(i, result);
5496     }
5497   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5498     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5499       float result = FPSqrt(src.Float<float>(i));
5500       dst.SetFloat(i, result);
5501     }
5502   } else {
5503     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5504     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5505       double result = FPSqrt(src.Float<double>(i));
5506       dst.SetFloat(i, result);
5507     }
5508   }
5509   return dst;
5510 }
5511 
5512 
5513 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                                    \
5514   LogicVRegister Simulator::FNP(VectorFormat vform,                            \
5515                                 LogicVRegister dst,                            \
5516                                 const LogicVRegister& src1,                    \
5517                                 const LogicVRegister& src2) {                  \
5518     SimVRegister temp1, temp2;                                                 \
5519     uzp1(vform, temp1, src1, src2);                                            \
5520     uzp2(vform, temp2, src1, src2);                                            \
5521     FN(vform, dst, temp1, temp2);                                              \
5522     if (IsSVEFormat(vform)) {                                                  \
5523       interleave_top_bottom(vform, dst, dst);                                  \
5524     }                                                                          \
5525     return dst;                                                                \
5526   }                                                                            \
5527                                                                                \
5528   LogicVRegister Simulator::FNP(VectorFormat vform,                            \
5529                                 LogicVRegister dst,                            \
5530                                 const LogicVRegister& src) {                   \
5531     if (vform == kFormatH) {                                                   \
5532       SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))),   \
5533                            SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
5534       dst.SetUint(vform, 0, Float16ToRawbits(result));                         \
5535     } else if (vform == kFormatS) {                                            \
5536       float result = OP(src.Float<float>(0), src.Float<float>(1));             \
5537       dst.SetFloat(0, result);                                                 \
5538     } else {                                                                   \
5539       VIXL_ASSERT(vform == kFormatD);                                          \
5540       double result = OP(src.Float<double>(0), src.Float<double>(1));          \
5541       dst.SetFloat(0, result);                                                 \
5542     }                                                                          \
5543     dst.ClearForWrite(vform);                                                  \
5544     return dst;                                                                \
5545   }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)5546 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5547 #undef DEFINE_NEON_FP_PAIR_OP
5548 
5549 template <typename T>
5550 LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5551                                                LogicVRegister dst,
5552                                                const LogicVRegister& src,
5553                                                typename TFPPairOp<T>::type fn,
5554                                                uint64_t inactive_value) {
5555   int lane_count = LaneCountFromFormat(vform);
5556   T result[kZRegMaxSizeInBytes / sizeof(T)];
5557   // Copy the source vector into a working array. Initialise the unused elements
5558   // at the end of the array to the same value that a false predicate would set.
5559   for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5560     result[i] = (i < lane_count)
5561                     ? src.Float<T>(i)
5562                     : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5563   }
5564 
5565   // Pairwise reduce the elements to a single value, using the pair op function
5566   // argument.
5567   for (int step = 1; step < lane_count; step *= 2) {
5568     for (int i = 0; i < lane_count; i += step * 2) {
5569       result[i] = (this->*fn)(result[i], result[i + step]);
5570     }
5571   }
5572   dst.ClearForWrite(ScalarFormatFromFormat(vform));
5573   dst.SetFloat<T>(0, result[0]);
5574   return dst;
5575 }
5576 
FPPairedAcrossHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,typename TFPPairOp<SimFloat16>::type fn16,typename TFPPairOp<float>::type fn32,typename TFPPairOp<double>::type fn64,uint64_t inactive_value)5577 LogicVRegister Simulator::FPPairedAcrossHelper(
5578     VectorFormat vform,
5579     LogicVRegister dst,
5580     const LogicVRegister& src,
5581     typename TFPPairOp<SimFloat16>::type fn16,
5582     typename TFPPairOp<float>::type fn32,
5583     typename TFPPairOp<double>::type fn64,
5584     uint64_t inactive_value) {
5585   switch (LaneSizeInBitsFromFormat(vform)) {
5586     case kHRegSize:
5587       return FPPairedAcrossHelper<SimFloat16>(vform,
5588                                               dst,
5589                                               src,
5590                                               fn16,
5591                                               inactive_value);
5592     case kSRegSize:
5593       return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5594     default:
5595       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5596       return FPPairedAcrossHelper<double>(vform,
5597                                           dst,
5598                                           src,
5599                                           fn64,
5600                                           inactive_value);
5601   }
5602 }
5603 
faddv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5604 LogicVRegister Simulator::faddv(VectorFormat vform,
5605                                 LogicVRegister dst,
5606                                 const LogicVRegister& src) {
5607   return FPPairedAcrossHelper(vform,
5608                               dst,
5609                               src,
5610                               &Simulator::FPAdd<SimFloat16>,
5611                               &Simulator::FPAdd<float>,
5612                               &Simulator::FPAdd<double>,
5613                               0);
5614 }
5615 
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5616 LogicVRegister Simulator::fmaxv(VectorFormat vform,
5617                                 LogicVRegister dst,
5618                                 const LogicVRegister& src) {
5619   int lane_size = LaneSizeInBitsFromFormat(vform);
5620   uint64_t inactive_value =
5621       FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5622   return FPPairedAcrossHelper(vform,
5623                               dst,
5624                               src,
5625                               &Simulator::FPMax<SimFloat16>,
5626                               &Simulator::FPMax<float>,
5627                               &Simulator::FPMax<double>,
5628                               inactive_value);
5629 }
5630 
5631 
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5632 LogicVRegister Simulator::fminv(VectorFormat vform,
5633                                 LogicVRegister dst,
5634                                 const LogicVRegister& src) {
5635   int lane_size = LaneSizeInBitsFromFormat(vform);
5636   uint64_t inactive_value =
5637       FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5638   return FPPairedAcrossHelper(vform,
5639                               dst,
5640                               src,
5641                               &Simulator::FPMin<SimFloat16>,
5642                               &Simulator::FPMin<float>,
5643                               &Simulator::FPMin<double>,
5644                               inactive_value);
5645 }
5646 
5647 
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5648 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5649                                   LogicVRegister dst,
5650                                   const LogicVRegister& src) {
5651   int lane_size = LaneSizeInBitsFromFormat(vform);
5652   uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5653   return FPPairedAcrossHelper(vform,
5654                               dst,
5655                               src,
5656                               &Simulator::FPMaxNM<SimFloat16>,
5657                               &Simulator::FPMaxNM<float>,
5658                               &Simulator::FPMaxNM<double>,
5659                               inactive_value);
5660 }
5661 
5662 
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5663 LogicVRegister Simulator::fminnmv(VectorFormat vform,
5664                                   LogicVRegister dst,
5665                                   const LogicVRegister& src) {
5666   int lane_size = LaneSizeInBitsFromFormat(vform);
5667   uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5668   return FPPairedAcrossHelper(vform,
5669                               dst,
5670                               src,
5671                               &Simulator::FPMinNM<SimFloat16>,
5672                               &Simulator::FPMinNM<float>,
5673                               &Simulator::FPMinNM<double>,
5674                               inactive_value);
5675 }
5676 
5677 
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5678 LogicVRegister Simulator::fmul(VectorFormat vform,
5679                                LogicVRegister dst,
5680                                const LogicVRegister& src1,
5681                                const LogicVRegister& src2,
5682                                int index) {
5683   dst.ClearForWrite(vform);
5684   SimVRegister temp;
5685   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5686     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5687     fmul<SimFloat16>(vform, dst, src1, index_reg);
5688   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5689     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5690     fmul<float>(vform, dst, src1, index_reg);
5691   } else {
5692     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5693     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5694     fmul<double>(vform, dst, src1, index_reg);
5695   }
5696   return dst;
5697 }
5698 
5699 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5700 LogicVRegister Simulator::fmla(VectorFormat vform,
5701                                LogicVRegister dst,
5702                                const LogicVRegister& src1,
5703                                const LogicVRegister& src2,
5704                                int index) {
5705   dst.ClearForWrite(vform);
5706   SimVRegister temp;
5707   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5708     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5709     fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5710   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5711     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5712     fmla<float>(vform, dst, dst, src1, index_reg);
5713   } else {
5714     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5715     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5716     fmla<double>(vform, dst, dst, src1, index_reg);
5717   }
5718   return dst;
5719 }
5720 
5721 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5722 LogicVRegister Simulator::fmls(VectorFormat vform,
5723                                LogicVRegister dst,
5724                                const LogicVRegister& src1,
5725                                const LogicVRegister& src2,
5726                                int index) {
5727   dst.ClearForWrite(vform);
5728   SimVRegister temp;
5729   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5730     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5731     fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5732   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5733     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5734     fmls<float>(vform, dst, dst, src1, index_reg);
5735   } else {
5736     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5737     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5738     fmls<double>(vform, dst, dst, src1, index_reg);
5739   }
5740   return dst;
5741 }
5742 
5743 
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5744 LogicVRegister Simulator::fmulx(VectorFormat vform,
5745                                 LogicVRegister dst,
5746                                 const LogicVRegister& src1,
5747                                 const LogicVRegister& src2,
5748                                 int index) {
5749   dst.ClearForWrite(vform);
5750   SimVRegister temp;
5751   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5752     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5753     fmulx<SimFloat16>(vform, dst, src1, index_reg);
5754   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5755     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5756     fmulx<float>(vform, dst, src1, index_reg);
5757   } else {
5758     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5759     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5760     fmulx<double>(vform, dst, src1, index_reg);
5761   }
5762   return dst;
5763 }
5764 
5765 
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception,FrintMode frint_mode)5766 LogicVRegister Simulator::frint(VectorFormat vform,
5767                                 LogicVRegister dst,
5768                                 const LogicVRegister& src,
5769                                 FPRounding rounding_mode,
5770                                 bool inexact_exception,
5771                                 FrintMode frint_mode) {
5772   dst.ClearForWrite(vform);
5773   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5774     VIXL_ASSERT(frint_mode == kFrintToInteger);
5775     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5776       SimFloat16 input = src.Float<SimFloat16>(i);
5777       SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5778       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5779         FPProcessException();
5780       }
5781       dst.SetFloat<SimFloat16>(i, rounded);
5782     }
5783   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5784     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5785       float input = src.Float<float>(i);
5786       float rounded = FPRoundInt(input, rounding_mode, frint_mode);
5787 
5788       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5789         FPProcessException();
5790       }
5791       dst.SetFloat<float>(i, rounded);
5792     }
5793   } else {
5794     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5795     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5796       double input = src.Float<double>(i);
5797       double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5798       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5799         FPProcessException();
5800       }
5801       dst.SetFloat<double>(i, rounded);
5802     }
5803   }
5804   return dst;
5805 }
5806 
fcvt(VectorFormat dst_vform,VectorFormat src_vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)5807 LogicVRegister Simulator::fcvt(VectorFormat dst_vform,
5808                                VectorFormat src_vform,
5809                                LogicVRegister dst,
5810                                const LogicPRegister& pg,
5811                                const LogicVRegister& src) {
5812   unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);
5813   unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);
5814   VectorFormat vform = SVEFormatFromLaneSizeInBits(
5815       std::max(dst_data_size_in_bits, src_data_size_in_bits));
5816 
5817   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5818     if (!pg.IsActive(vform, i)) continue;
5819 
5820     uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5821                                                       0,
5822                                                       src.Uint(vform, i));
5823     double dst_value =
5824         RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5825 
5826     uint64_t dst_raw_bits =
5827         FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5828 
5829     dst.SetUint(vform, i, dst_raw_bits);
5830   }
5831 
5832   return dst;
5833 }
5834 
fcvts(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5835 LogicVRegister Simulator::fcvts(VectorFormat vform,
5836                                 unsigned dst_data_size_in_bits,
5837                                 unsigned src_data_size_in_bits,
5838                                 LogicVRegister dst,
5839                                 const LogicPRegister& pg,
5840                                 const LogicVRegister& src,
5841                                 FPRounding round,
5842                                 int fbits) {
5843   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5844   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5845 
5846   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5847     if (!pg.IsActive(vform, i)) continue;
5848 
5849     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5850                                                0,
5851                                                src.Uint(vform, i));
5852     double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5853                     std::pow(2.0, fbits);
5854 
5855     switch (dst_data_size_in_bits) {
5856       case kHRegSize:
5857         dst.SetInt(vform, i, FPToInt16(result, round));
5858         break;
5859       case kSRegSize:
5860         dst.SetInt(vform, i, FPToInt32(result, round));
5861         break;
5862       case kDRegSize:
5863         dst.SetInt(vform, i, FPToInt64(result, round));
5864         break;
5865       default:
5866         VIXL_UNIMPLEMENTED();
5867         break;
5868     }
5869   }
5870 
5871   return dst;
5872 }
5873 
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5874 LogicVRegister Simulator::fcvts(VectorFormat vform,
5875                                 LogicVRegister dst,
5876                                 const LogicVRegister& src,
5877                                 FPRounding round,
5878                                 int fbits) {
5879   dst.ClearForWrite(vform);
5880   return fcvts(vform,
5881                LaneSizeInBitsFromFormat(vform),
5882                LaneSizeInBitsFromFormat(vform),
5883                dst,
5884                GetPTrue(),
5885                src,
5886                round,
5887                fbits);
5888 }
5889 
fcvtu(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5890 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5891                                 unsigned dst_data_size_in_bits,
5892                                 unsigned src_data_size_in_bits,
5893                                 LogicVRegister dst,
5894                                 const LogicPRegister& pg,
5895                                 const LogicVRegister& src,
5896                                 FPRounding round,
5897                                 int fbits) {
5898   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5899   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5900 
5901   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5902     if (!pg.IsActive(vform, i)) continue;
5903 
5904     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5905                                                0,
5906                                                src.Uint(vform, i));
5907     double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5908                     std::pow(2.0, fbits);
5909 
5910     switch (dst_data_size_in_bits) {
5911       case kHRegSize:
5912         dst.SetUint(vform, i, FPToUInt16(result, round));
5913         break;
5914       case kSRegSize:
5915         dst.SetUint(vform, i, FPToUInt32(result, round));
5916         break;
5917       case kDRegSize:
5918         dst.SetUint(vform, i, FPToUInt64(result, round));
5919         break;
5920       default:
5921         VIXL_UNIMPLEMENTED();
5922         break;
5923     }
5924   }
5925 
5926   return dst;
5927 }
5928 
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5929 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5930                                 LogicVRegister dst,
5931                                 const LogicVRegister& src,
5932                                 FPRounding round,
5933                                 int fbits) {
5934   dst.ClearForWrite(vform);
5935   return fcvtu(vform,
5936                LaneSizeInBitsFromFormat(vform),
5937                LaneSizeInBitsFromFormat(vform),
5938                dst,
5939                GetPTrue(),
5940                src,
5941                round,
5942                fbits);
5943 }
5944 
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5945 LogicVRegister Simulator::fcvtl(VectorFormat vform,
5946                                 LogicVRegister dst,
5947                                 const LogicVRegister& src) {
5948   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5949     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5950       // TODO: Full support for SimFloat16 in SimRegister(s).
5951       dst.SetFloat(i,
5952                    FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
5953                              ReadDN()));
5954     }
5955   } else {
5956     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5957     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5958       dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
5959     }
5960   }
5961   return dst;
5962 }
5963 
5964 
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5965 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
5966                                  LogicVRegister dst,
5967                                  const LogicVRegister& src) {
5968   int lane_count = LaneCountFromFormat(vform);
5969   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5970     for (int i = 0; i < lane_count; i++) {
5971       // TODO: Full support for SimFloat16 in SimRegister(s).
5972       dst.SetFloat(i,
5973                    FPToFloat(RawbitsToFloat16(
5974                                  src.Float<uint16_t>(i + lane_count)),
5975                              ReadDN()));
5976     }
5977   } else {
5978     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5979     for (int i = 0; i < lane_count; i++) {
5980       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
5981     }
5982   }
5983   return dst;
5984 }
5985 
5986 
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5987 LogicVRegister Simulator::fcvtn(VectorFormat vform,
5988                                 LogicVRegister dst,
5989                                 const LogicVRegister& src) {
5990   SimVRegister tmp;
5991   LogicVRegister srctmp = mov(kFormat2D, tmp, src);
5992   dst.ClearForWrite(vform);
5993   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5994     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5995       dst.SetFloat(i,
5996                    Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
5997                                                 FPTieEven,
5998                                                 ReadDN())));
5999     }
6000   } else {
6001     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6002     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6003       dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
6004     }
6005   }
6006   return dst;
6007 }
6008 
6009 
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6010 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
6011                                  LogicVRegister dst,
6012                                  const LogicVRegister& src) {
6013   int lane_count = LaneCountFromFormat(vform) / 2;
6014   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6015     for (int i = lane_count - 1; i >= 0; i--) {
6016       dst.SetFloat(i + lane_count,
6017                    Float16ToRawbits(
6018                        FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
6019     }
6020   } else {
6021     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6022     for (int i = lane_count - 1; i >= 0; i--) {
6023       dst.SetFloat(i + lane_count,
6024                    FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
6025     }
6026   }
6027   return dst;
6028 }
6029 
6030 
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6031 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
6032                                  LogicVRegister dst,
6033                                  const LogicVRegister& src) {
6034   SimVRegister tmp;
6035   LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6036   int input_lane_count = LaneCountFromFormat(vform);
6037   if (IsSVEFormat(vform)) {
6038     mov(kFormatVnB, tmp, src);
6039     input_lane_count /= 2;
6040   }
6041 
6042   dst.ClearForWrite(vform);
6043   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6044 
6045   for (int i = 0; i < input_lane_count; i++) {
6046     dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
6047   }
6048   return dst;
6049 }
6050 
6051 
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6052 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
6053                                   LogicVRegister dst,
6054                                   const LogicVRegister& src) {
6055   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6056   int lane_count = LaneCountFromFormat(vform) / 2;
6057   for (int i = lane_count - 1; i >= 0; i--) {
6058     dst.SetFloat(i + lane_count,
6059                  FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
6060   }
6061   return dst;
6062 }
6063 
6064 
6065 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)6066 double Simulator::recip_sqrt_estimate(double a) {
6067   int quot0, quot1, s;
6068   double r;
6069   if (a < 0.5) {
6070     quot0 = static_cast<int>(a * 512.0);
6071     r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);
6072   } else {
6073     quot1 = static_cast<int>(a * 256.0);
6074     r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);
6075   }
6076   s = static_cast<int>(256.0 * r + 0.5);
6077   return static_cast<double>(s) / 256.0;
6078 }
6079 
6080 
Bits(uint64_t val,int start_bit,int end_bit)6081 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
6082   return ExtractUnsignedBitfield64(start_bit, end_bit, val);
6083 }
6084 
6085 
6086 template <typename T>
FPRecipSqrtEstimate(T op)6087 T Simulator::FPRecipSqrtEstimate(T op) {
6088   if (IsNaN(op)) {
6089     return FPProcessNaN(op);
6090   } else if (op == 0.0) {
6091     if (copysign(1.0, op) < 0.0) {
6092       return kFP64NegativeInfinity;
6093     } else {
6094       return kFP64PositiveInfinity;
6095     }
6096   } else if (copysign(1.0, op) < 0.0) {
6097     FPProcessException();
6098     return FPDefaultNaN<T>();
6099   } else if (IsInf(op)) {
6100     return 0.0;
6101   } else {
6102     uint64_t fraction;
6103     int exp, result_exp;
6104 
6105     if (IsFloat16<T>()) {
6106       exp = Float16Exp(op);
6107       fraction = Float16Mantissa(op);
6108       fraction <<= 42;
6109     } else if (IsFloat32<T>()) {
6110       exp = FloatExp(op);
6111       fraction = FloatMantissa(op);
6112       fraction <<= 29;
6113     } else {
6114       VIXL_ASSERT(IsFloat64<T>());
6115       exp = DoubleExp(op);
6116       fraction = DoubleMantissa(op);
6117     }
6118 
6119     if (exp == 0) {
6120       while (Bits(fraction, 51, 51) == 0) {
6121         fraction = Bits(fraction, 50, 0) << 1;
6122         exp -= 1;
6123       }
6124       fraction = Bits(fraction, 50, 0) << 1;
6125     }
6126 
6127     double scaled;
6128     if (Bits(exp, 0, 0) == 0) {
6129       scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6130     } else {
6131       scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
6132     }
6133 
6134     if (IsFloat16<T>()) {
6135       result_exp = (44 - exp) / 2;
6136     } else if (IsFloat32<T>()) {
6137       result_exp = (380 - exp) / 2;
6138     } else {
6139       VIXL_ASSERT(IsFloat64<T>());
6140       result_exp = (3068 - exp) / 2;
6141     }
6142 
6143     uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
6144 
6145     if (IsFloat16<T>()) {
6146       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6147       uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
6148       return Float16Pack(0, exp_bits, est_bits);
6149     } else if (IsFloat32<T>()) {
6150       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6151       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
6152       return FloatPack(0, exp_bits, est_bits);
6153     } else {
6154       VIXL_ASSERT(IsFloat64<T>());
6155       return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
6156     }
6157   }
6158 }
6159 
6160 
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6161 LogicVRegister Simulator::frsqrte(VectorFormat vform,
6162                                   LogicVRegister dst,
6163                                   const LogicVRegister& src) {
6164   dst.ClearForWrite(vform);
6165   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6166     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6167       SimFloat16 input = src.Float<SimFloat16>(i);
6168       dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
6169     }
6170   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6171     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6172       float input = src.Float<float>(i);
6173       dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
6174     }
6175   } else {
6176     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6177     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6178       double input = src.Float<double>(i);
6179       dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
6180     }
6181   }
6182   return dst;
6183 }
6184 
6185 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)6186 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
6187   uint32_t sign;
6188 
6189   if (IsFloat16<T>()) {
6190     sign = Float16Sign(op);
6191   } else if (IsFloat32<T>()) {
6192     sign = FloatSign(op);
6193   } else {
6194     VIXL_ASSERT(IsFloat64<T>());
6195     sign = DoubleSign(op);
6196   }
6197 
6198   if (IsNaN(op)) {
6199     return FPProcessNaN(op);
6200   } else if (IsInf(op)) {
6201     return (sign == 1) ? -0.0 : 0.0;
6202   } else if (op == 0.0) {
6203     FPProcessException();  // FPExc_DivideByZero exception.
6204     return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6205   } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6206              (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6207              (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6208     bool overflow_to_inf = false;
6209     switch (rounding) {
6210       case FPTieEven:
6211         overflow_to_inf = true;
6212         break;
6213       case FPPositiveInfinity:
6214         overflow_to_inf = (sign == 0);
6215         break;
6216       case FPNegativeInfinity:
6217         overflow_to_inf = (sign == 1);
6218         break;
6219       case FPZero:
6220         overflow_to_inf = false;
6221         break;
6222       default:
6223         break;
6224     }
6225     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
6226     if (overflow_to_inf) {
6227       return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6228     } else {
6229       // Return FPMaxNormal(sign).
6230       if (IsFloat16<T>()) {
6231         return Float16Pack(sign, 0x1f, 0x3ff);
6232       } else if (IsFloat32<T>()) {
6233         return FloatPack(sign, 0xfe, 0x07fffff);
6234       } else {
6235         VIXL_ASSERT(IsFloat64<T>());
6236         return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6237       }
6238     }
6239   } else {
6240     uint64_t fraction;
6241     int exp, result_exp;
6242 
6243     if (IsFloat16<T>()) {
6244       sign = Float16Sign(op);
6245       exp = Float16Exp(op);
6246       fraction = Float16Mantissa(op);
6247       fraction <<= 42;
6248     } else if (IsFloat32<T>()) {
6249       sign = FloatSign(op);
6250       exp = FloatExp(op);
6251       fraction = FloatMantissa(op);
6252       fraction <<= 29;
6253     } else {
6254       VIXL_ASSERT(IsFloat64<T>());
6255       sign = DoubleSign(op);
6256       exp = DoubleExp(op);
6257       fraction = DoubleMantissa(op);
6258     }
6259 
6260     if (exp == 0) {
6261       if (Bits(fraction, 51, 51) == 0) {
6262         exp -= 1;
6263         fraction = Bits(fraction, 49, 0) << 2;
6264       } else {
6265         fraction = Bits(fraction, 50, 0) << 1;
6266       }
6267     }
6268 
6269     double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6270 
6271     if (IsFloat16<T>()) {
6272       result_exp = (29 - exp);  // In range 29-30 = -1 to 29+1 = 30.
6273     } else if (IsFloat32<T>()) {
6274       result_exp = (253 - exp);  // In range 253-254 = -1 to 253+1 = 254.
6275     } else {
6276       VIXL_ASSERT(IsFloat64<T>());
6277       result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
6278     }
6279 
6280     double estimate = recip_estimate(scaled);
6281 
6282     fraction = DoubleMantissa(estimate);
6283     if (result_exp == 0) {
6284       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6285     } else if (result_exp == -1) {
6286       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6287       result_exp = 0;
6288     }
6289     if (IsFloat16<T>()) {
6290       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6291       uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6292       return Float16Pack(sign, exp_bits, frac_bits);
6293     } else if (IsFloat32<T>()) {
6294       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6295       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6296       return FloatPack(sign, exp_bits, frac_bits);
6297     } else {
6298       VIXL_ASSERT(IsFloat64<T>());
6299       return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6300     }
6301   }
6302 }
6303 
6304 
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)6305 LogicVRegister Simulator::frecpe(VectorFormat vform,
6306                                  LogicVRegister dst,
6307                                  const LogicVRegister& src,
6308                                  FPRounding round) {
6309   dst.ClearForWrite(vform);
6310   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6311     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6312       SimFloat16 input = src.Float<SimFloat16>(i);
6313       dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6314     }
6315   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6316     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6317       float input = src.Float<float>(i);
6318       dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6319     }
6320   } else {
6321     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6322     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6323       double input = src.Float<double>(i);
6324       dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6325     }
6326   }
6327   return dst;
6328 }
6329 
6330 
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6331 LogicVRegister Simulator::ursqrte(VectorFormat vform,
6332                                   LogicVRegister dst,
6333                                   const LogicVRegister& src) {
6334   dst.ClearForWrite(vform);
6335   uint64_t operand;
6336   uint32_t result;
6337   double dp_operand, dp_result;
6338   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6339     operand = src.Uint(vform, i);
6340     if (operand <= 0x3FFFFFFF) {
6341       result = 0xFFFFFFFF;
6342     } else {
6343       dp_operand = operand * std::pow(2.0, -32);
6344       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6345       result = static_cast<uint32_t>(dp_result);
6346     }
6347     dst.SetUint(vform, i, result);
6348   }
6349   return dst;
6350 }
6351 
6352 
6353 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)6354 double Simulator::recip_estimate(double a) {
6355   int q, s;
6356   double r;
6357   q = static_cast<int>(a * 512.0);
6358   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6359   s = static_cast<int>(256.0 * r + 0.5);
6360   return static_cast<double>(s) / 256.0;
6361 }
6362 
6363 
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6364 LogicVRegister Simulator::urecpe(VectorFormat vform,
6365                                  LogicVRegister dst,
6366                                  const LogicVRegister& src) {
6367   dst.ClearForWrite(vform);
6368   uint64_t operand;
6369   uint32_t result;
6370   double dp_operand, dp_result;
6371   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6372     operand = src.Uint(vform, i);
6373     if (operand <= 0x7FFFFFFF) {
6374       result = 0xFFFFFFFF;
6375     } else {
6376       dp_operand = operand * std::pow(2.0, -32);
6377       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6378       result = static_cast<uint32_t>(dp_result);
6379     }
6380     dst.SetUint(vform, i, result);
6381   }
6382   return dst;
6383 }
6384 
pfalse(LogicPRegister dst)6385 LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6386   dst.Clear();
6387   return dst;
6388 }
6389 
pfirst(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6390 LogicPRegister Simulator::pfirst(LogicPRegister dst,
6391                                  const LogicPRegister& pg,
6392                                  const LogicPRegister& src) {
6393   int first_pg = GetFirstActive(kFormatVnB, pg);
6394   VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6395   mov(dst, src);
6396   if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6397   return dst;
6398 }
6399 
ptrue(VectorFormat vform,LogicPRegister dst,int pattern)6400 LogicPRegister Simulator::ptrue(VectorFormat vform,
6401                                 LogicPRegister dst,
6402                                 int pattern) {
6403   int count = GetPredicateConstraintLaneCount(vform, pattern);
6404   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6405     dst.SetActive(vform, i, i < count);
6406   }
6407   return dst;
6408 }
6409 
pnext(VectorFormat vform,LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6410 LogicPRegister Simulator::pnext(VectorFormat vform,
6411                                 LogicPRegister dst,
6412                                 const LogicPRegister& pg,
6413                                 const LogicPRegister& src) {
6414   int next = GetLastActive(vform, src) + 1;
6415   while (next < LaneCountFromFormat(vform)) {
6416     if (pg.IsActive(vform, next)) break;
6417     next++;
6418   }
6419 
6420   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6421     dst.SetActive(vform, i, (i == next));
6422   }
6423   return dst;
6424 }
6425 
6426 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6427 LogicVRegister Simulator::frecpx(VectorFormat vform,
6428                                  LogicVRegister dst,
6429                                  const LogicVRegister& src) {
6430   dst.ClearForWrite(vform);
6431   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6432     T op = src.Float<T>(i);
6433     T result;
6434     if (IsNaN(op)) {
6435       result = FPProcessNaN(op);
6436     } else {
6437       int exp;
6438       uint32_t sign;
6439       if (IsFloat16<T>()) {
6440         sign = Float16Sign(op);
6441         exp = Float16Exp(op);
6442         exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6443         result = Float16Pack(sign, exp, 0);
6444       } else if (IsFloat32<T>()) {
6445         sign = FloatSign(op);
6446         exp = FloatExp(op);
6447         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6448         result = FloatPack(sign, exp, 0);
6449       } else {
6450         VIXL_ASSERT(IsFloat64<T>());
6451         sign = DoubleSign(op);
6452         exp = DoubleExp(op);
6453         exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6454         result = DoublePack(sign, exp, 0);
6455       }
6456     }
6457     dst.SetFloat(i, result);
6458   }
6459   return dst;
6460 }
6461 
6462 
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6463 LogicVRegister Simulator::frecpx(VectorFormat vform,
6464                                  LogicVRegister dst,
6465                                  const LogicVRegister& src) {
6466   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6467     frecpx<SimFloat16>(vform, dst, src);
6468   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6469     frecpx<float>(vform, dst, src);
6470   } else {
6471     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6472     frecpx<double>(vform, dst, src);
6473   }
6474   return dst;
6475 }
6476 
flogb(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6477 LogicVRegister Simulator::flogb(VectorFormat vform,
6478                                 LogicVRegister dst,
6479                                 const LogicVRegister& src) {
6480   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6481     double op = 0.0;
6482     switch (vform) {
6483       case kFormatVnH:
6484         op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);
6485         break;
6486       case kFormatVnS:
6487         op = src.Float<float>(i);
6488         break;
6489       case kFormatVnD:
6490         op = src.Float<double>(i);
6491         break;
6492       default:
6493         VIXL_UNREACHABLE();
6494     }
6495 
6496     switch (std::fpclassify(op)) {
6497       case FP_INFINITE:
6498         dst.SetInt(vform, i, MaxIntFromFormat(vform));
6499         break;
6500       case FP_NAN:
6501       case FP_ZERO:
6502         dst.SetInt(vform, i, MinIntFromFormat(vform));
6503         break;
6504       case FP_SUBNORMAL: {
6505         // DoubleMantissa returns the mantissa of its input, leaving 12 zero
6506         // bits where the sign and exponent would be. We subtract 12 to
6507         // find the number of leading zero bits in the mantissa itself.
6508         int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;
6509         // Log2 of a subnormal is the lowest exponent a normal number can
6510         // represent, together with the zeros in the mantissa.
6511         dst.SetInt(vform, i, -1023 - mant_zero_count);
6512         break;
6513       }
6514       case FP_NORMAL:
6515         // Log2 of a normal number is the exponent minus the bias.
6516         dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);
6517         break;
6518     }
6519   }
6520   return dst;
6521 }
6522 
ftsmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6523 LogicVRegister Simulator::ftsmul(VectorFormat vform,
6524                                  LogicVRegister dst,
6525                                  const LogicVRegister& src1,
6526                                  const LogicVRegister& src2) {
6527   SimVRegister maybe_neg_src1;
6528 
6529   // The bottom bit of src2 controls the sign of the result. Use it to
6530   // conditionally invert the sign of one `fmul` operand.
6531   shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6532   eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6533 
6534   // Multiply src1 by the modified neg_src1, which is potentially its negation.
6535   // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6536   // rather than neg_src1, must be the first source argument.
6537   fmul(vform, dst, src1, maybe_neg_src1);
6538 
6539   return dst;
6540 }
6541 
ftssel(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6542 LogicVRegister Simulator::ftssel(VectorFormat vform,
6543                                  LogicVRegister dst,
6544                                  const LogicVRegister& src1,
6545                                  const LogicVRegister& src2) {
6546   unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6547   uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6548   uint64_t one;
6549 
6550   if (lane_bits == kHRegSize) {
6551     one = Float16ToRawbits(Float16(1.0));
6552   } else if (lane_bits == kSRegSize) {
6553     one = FloatToRawbits(1.0);
6554   } else {
6555     VIXL_ASSERT(lane_bits == kDRegSize);
6556     one = DoubleToRawbits(1.0);
6557   }
6558 
6559   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6560     // Use integer accessors for this operation, as this is a data manipulation
6561     // task requiring no calculation.
6562     uint64_t op = src1.Uint(vform, i);
6563 
6564     // Only the bottom two bits of the src2 register are significant, indicating
6565     // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6566     // determines the sign of the value written to dst.
6567     uint64_t q = src2.Uint(vform, i);
6568     if ((q & 1) == 1) op = one;
6569     if ((q & 2) == 2) op ^= sign_bit;
6570 
6571     dst.SetUint(vform, i, op);
6572   }
6573 
6574   return dst;
6575 }
6576 
6577 template <typename T>
FTMaddHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,uint64_t coeff_pos,uint64_t coeff_neg)6578 LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6579                                        LogicVRegister dst,
6580                                        const LogicVRegister& src1,
6581                                        const LogicVRegister& src2,
6582                                        uint64_t coeff_pos,
6583                                        uint64_t coeff_neg) {
6584   SimVRegister zero;
6585   dup_immediate(kFormatVnB, zero, 0);
6586 
6587   SimVRegister cf;
6588   SimVRegister cfn;
6589   dup_immediate(vform, cf, coeff_pos);
6590   dup_immediate(vform, cfn, coeff_neg);
6591 
6592   // The specification requires testing the top bit of the raw value, rather
6593   // than the sign of the floating point number, so use an integer comparison
6594   // here.
6595   SimPRegister is_neg;
6596   SVEIntCompareVectorsHelper(lt,
6597                              vform,
6598                              is_neg,
6599                              GetPTrue(),
6600                              src2,
6601                              zero,
6602                              false,
6603                              LeaveFlags);
6604   mov_merging(vform, cf, is_neg, cfn);
6605 
6606   SimVRegister temp;
6607   fabs_<T>(vform, temp, src2);
6608   fmla<T>(vform, cf, cf, src1, temp);
6609   mov(vform, dst, cf);
6610   return dst;
6611 }
6612 
6613 
ftmad(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,unsigned index)6614 LogicVRegister Simulator::ftmad(VectorFormat vform,
6615                                 LogicVRegister dst,
6616                                 const LogicVRegister& src1,
6617                                 const LogicVRegister& src2,
6618                                 unsigned index) {
6619   static const uint64_t ftmad_coeff16[] = {0x3c00,
6620                                            0xb155,
6621                                            0x2030,
6622                                            0x0000,
6623                                            0x0000,
6624                                            0x0000,
6625                                            0x0000,
6626                                            0x0000,
6627                                            0x3c00,
6628                                            0xb800,
6629                                            0x293a,
6630                                            0x0000,
6631                                            0x0000,
6632                                            0x0000,
6633                                            0x0000,
6634                                            0x0000};
6635 
6636   static const uint64_t ftmad_coeff32[] = {0x3f800000,
6637                                            0xbe2aaaab,
6638                                            0x3c088886,
6639                                            0xb95008b9,
6640                                            0x36369d6d,
6641                                            0x00000000,
6642                                            0x00000000,
6643                                            0x00000000,
6644                                            0x3f800000,
6645                                            0xbf000000,
6646                                            0x3d2aaaa6,
6647                                            0xbab60705,
6648                                            0x37cd37cc,
6649                                            0x00000000,
6650                                            0x00000000,
6651                                            0x00000000};
6652 
6653   static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6654                                            0xbfc5555555555543,
6655                                            0x3f8111111110f30c,
6656                                            0xbf2a01a019b92fc6,
6657                                            0x3ec71de351f3d22b,
6658                                            0xbe5ae5e2b60f7b91,
6659                                            0x3de5d8408868552f,
6660                                            0x0000000000000000,
6661                                            0x3ff0000000000000,
6662                                            0xbfe0000000000000,
6663                                            0x3fa5555555555536,
6664                                            0xbf56c16c16c13a0b,
6665                                            0x3efa01a019b1e8d8,
6666                                            0xbe927e4f7282f468,
6667                                            0x3e21ee96d2641b13,
6668                                            0xbda8f76380fbb401};
6669   VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6670   VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6671   VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6672 
6673   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6674     FTMaddHelper<SimFloat16>(vform,
6675                              dst,
6676                              src1,
6677                              src2,
6678                              ftmad_coeff16[index],
6679                              ftmad_coeff16[index + 8]);
6680   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6681     FTMaddHelper<float>(vform,
6682                         dst,
6683                         src1,
6684                         src2,
6685                         ftmad_coeff32[index],
6686                         ftmad_coeff32[index + 8]);
6687   } else {
6688     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6689     FTMaddHelper<double>(vform,
6690                          dst,
6691                          src1,
6692                          src2,
6693                          ftmad_coeff64[index],
6694                          ftmad_coeff64[index + 8]);
6695   }
6696   return dst;
6697 }
6698 
fexpa(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6699 LogicVRegister Simulator::fexpa(VectorFormat vform,
6700                                 LogicVRegister dst,
6701                                 const LogicVRegister& src) {
6702   static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6703                                            0x005d, 0x0075, 0x008e, 0x00a8,
6704                                            0x00c2, 0x00dc, 0x00f8, 0x0114,
6705                                            0x0130, 0x014d, 0x016b, 0x0189,
6706                                            0x01a8, 0x01c8, 0x01e8, 0x0209,
6707                                            0x022b, 0x024e, 0x0271, 0x0295,
6708                                            0x02ba, 0x02e0, 0x0306, 0x032e,
6709                                            0x0356, 0x037f, 0x03a9, 0x03d4};
6710 
6711   static const uint64_t fexpa_coeff32[] =
6712       {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6713        0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6714        0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6715        0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6716        0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6717        0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6718        0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6719        0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6720        0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6721        0x7d3e0c};
6722 
6723   static const uint64_t fexpa_coeff64[] =
6724       {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6725        0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6726        0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6727        0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6728        0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6729        0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6730        0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6731        0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6732        0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6733        0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6734        0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6735        0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6736        0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6737        0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6738        0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6739        0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6740 
6741   unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6742   int index_highbit = 5;
6743   int op_highbit, op_shift;
6744   const uint64_t* fexpa_coeff;
6745 
6746   if (lane_size == kHRegSize) {
6747     index_highbit = 4;
6748     VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
6749     fexpa_coeff = fexpa_coeff16;
6750     op_highbit = 9;
6751     op_shift = 10;
6752   } else if (lane_size == kSRegSize) {
6753     VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
6754     fexpa_coeff = fexpa_coeff32;
6755     op_highbit = 13;
6756     op_shift = 23;
6757   } else {
6758     VIXL_ASSERT(lane_size == kDRegSize);
6759     VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
6760     fexpa_coeff = fexpa_coeff64;
6761     op_highbit = 16;
6762     op_shift = 52;
6763   }
6764 
6765   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6766     uint64_t op = src.Uint(vform, i);
6767     uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6768     result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6769     dst.SetUint(vform, i, result);
6770   }
6771   return dst;
6772 }
6773 
6774 template <typename T>
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6775 LogicVRegister Simulator::fscale(VectorFormat vform,
6776                                  LogicVRegister dst,
6777                                  const LogicVRegister& src1,
6778                                  const LogicVRegister& src2) {
6779   T two = T(2.0);
6780   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6781     T src1_val = src1.Float<T>(i);
6782     if (!IsNaN(src1_val)) {
6783       int64_t scale = src2.Int(vform, i);
6784       // TODO: this is a low-performance implementation, but it's simple and
6785       // less likely to be buggy. Consider replacing it with something faster.
6786 
6787       // Scales outside of these bounds become infinity or zero, so there's no
6788       // point iterating further.
6789       scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6790 
6791       // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and
6792       // decrement scale until it's zero.
6793       while (scale-- > 0) {
6794         src1_val = FPMul(src1_val, two);
6795       }
6796 
6797       // If scale is negative, divide by two and increment scale until it's
6798       // zero. Initially, scale is (src2 - 1), so we pre-increment.
6799       while (++scale < 0) {
6800         src1_val = FPDiv(src1_val, two);
6801       }
6802     }
6803     dst.SetFloat<T>(i, src1_val);
6804   }
6805   return dst;
6806 }
6807 
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6808 LogicVRegister Simulator::fscale(VectorFormat vform,
6809                                  LogicVRegister dst,
6810                                  const LogicVRegister& src1,
6811                                  const LogicVRegister& src2) {
6812   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6813     fscale<SimFloat16>(vform, dst, src1, src2);
6814   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6815     fscale<float>(vform, dst, src1, src2);
6816   } else {
6817     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6818     fscale<double>(vform, dst, src1, src2);
6819   }
6820   return dst;
6821 }
6822 
scvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6823 LogicVRegister Simulator::scvtf(VectorFormat vform,
6824                                 unsigned dst_data_size_in_bits,
6825                                 unsigned src_data_size_in_bits,
6826                                 LogicVRegister dst,
6827                                 const LogicPRegister& pg,
6828                                 const LogicVRegister& src,
6829                                 FPRounding round,
6830                                 int fbits) {
6831   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6832   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6833   dst.ClearForWrite(vform);
6834 
6835   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6836     if (!pg.IsActive(vform, i)) continue;
6837 
6838     int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6839                                             0,
6840                                             src.Uint(vform, i));
6841 
6842     switch (dst_data_size_in_bits) {
6843       case kHRegSize: {
6844         SimFloat16 result = FixedToFloat16(value, fbits, round);
6845         dst.SetUint(vform, i, Float16ToRawbits(result));
6846         break;
6847       }
6848       case kSRegSize: {
6849         float result = FixedToFloat(value, fbits, round);
6850         dst.SetUint(vform, i, FloatToRawbits(result));
6851         break;
6852       }
6853       case kDRegSize: {
6854         double result = FixedToDouble(value, fbits, round);
6855         dst.SetUint(vform, i, DoubleToRawbits(result));
6856         break;
6857       }
6858       default:
6859         VIXL_UNIMPLEMENTED();
6860         break;
6861     }
6862   }
6863 
6864   return dst;
6865 }
6866 
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6867 LogicVRegister Simulator::scvtf(VectorFormat vform,
6868                                 LogicVRegister dst,
6869                                 const LogicVRegister& src,
6870                                 int fbits,
6871                                 FPRounding round) {
6872   return scvtf(vform,
6873                LaneSizeInBitsFromFormat(vform),
6874                LaneSizeInBitsFromFormat(vform),
6875                dst,
6876                GetPTrue(),
6877                src,
6878                round,
6879                fbits);
6880 }
6881 
ucvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6882 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6883                                 unsigned dst_data_size_in_bits,
6884                                 unsigned src_data_size_in_bits,
6885                                 LogicVRegister dst,
6886                                 const LogicPRegister& pg,
6887                                 const LogicVRegister& src,
6888                                 FPRounding round,
6889                                 int fbits) {
6890   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6891   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6892   dst.ClearForWrite(vform);
6893 
6894   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6895     if (!pg.IsActive(vform, i)) continue;
6896 
6897     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6898                                                0,
6899                                                src.Uint(vform, i));
6900 
6901     switch (dst_data_size_in_bits) {
6902       case kHRegSize: {
6903         SimFloat16 result = UFixedToFloat16(value, fbits, round);
6904         dst.SetUint(vform, i, Float16ToRawbits(result));
6905         break;
6906       }
6907       case kSRegSize: {
6908         float result = UFixedToFloat(value, fbits, round);
6909         dst.SetUint(vform, i, FloatToRawbits(result));
6910         break;
6911       }
6912       case kDRegSize: {
6913         double result = UFixedToDouble(value, fbits, round);
6914         dst.SetUint(vform, i, DoubleToRawbits(result));
6915         break;
6916       }
6917       default:
6918         VIXL_UNIMPLEMENTED();
6919         break;
6920     }
6921   }
6922 
6923   return dst;
6924 }
6925 
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6926 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6927                                 LogicVRegister dst,
6928                                 const LogicVRegister& src,
6929                                 int fbits,
6930                                 FPRounding round) {
6931   return ucvtf(vform,
6932                LaneSizeInBitsFromFormat(vform),
6933                LaneSizeInBitsFromFormat(vform),
6934                dst,
6935                GetPTrue(),
6936                src,
6937                round,
6938                fbits);
6939 }
6940 
unpk(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,UnpackType unpack_type,ExtendType extend_type)6941 LogicVRegister Simulator::unpk(VectorFormat vform,
6942                                LogicVRegister dst,
6943                                const LogicVRegister& src,
6944                                UnpackType unpack_type,
6945                                ExtendType extend_type) {
6946   VectorFormat vform_half = VectorFormatHalfWidth(vform);
6947   const int lane_count = LaneCountFromFormat(vform);
6948   const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
6949 
6950   switch (extend_type) {
6951     case kSignedExtend: {
6952       int64_t result[kZRegMaxSizeInBytes];
6953       for (int i = 0; i < lane_count; ++i) {
6954         result[i] = src.Int(vform_half, i + src_start_lane);
6955       }
6956       for (int i = 0; i < lane_count; ++i) {
6957         dst.SetInt(vform, i, result[i]);
6958       }
6959       break;
6960     }
6961     case kUnsignedExtend: {
6962       uint64_t result[kZRegMaxSizeInBytes];
6963       for (int i = 0; i < lane_count; ++i) {
6964         result[i] = src.Uint(vform_half, i + src_start_lane);
6965       }
6966       for (int i = 0; i < lane_count; ++i) {
6967         dst.SetUint(vform, i, result[i]);
6968       }
6969       break;
6970     }
6971     default:
6972       VIXL_UNREACHABLE();
6973   }
6974   return dst;
6975 }
6976 
SVEIntCompareVectorsHelper(Condition cond,VectorFormat vform,LogicPRegister dst,const LogicPRegister & mask,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements,FlagsUpdate flags)6977 LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
6978                                                      VectorFormat vform,
6979                                                      LogicPRegister dst,
6980                                                      const LogicPRegister& mask,
6981                                                      const LogicVRegister& src1,
6982                                                      const LogicVRegister& src2,
6983                                                      bool is_wide_elements,
6984                                                      FlagsUpdate flags) {
6985   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
6986     bool result = false;
6987     if (mask.IsActive(vform, lane)) {
6988       int64_t op1 = 0xbadbeef;
6989       int64_t op2 = 0xbadbeef;
6990       int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
6991       switch (cond) {
6992         case eq:
6993         case ge:
6994         case gt:
6995         case lt:
6996         case le:
6997         case ne:
6998           op1 = src1.Int(vform, lane);
6999           op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
7000                                  : src2.Int(vform, lane);
7001           break;
7002         case hi:
7003         case hs:
7004         case ls:
7005         case lo:
7006           op1 = src1.Uint(vform, lane);
7007           op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
7008                                  : src2.Uint(vform, lane);
7009           break;
7010         default:
7011           VIXL_UNREACHABLE();
7012       }
7013 
7014       switch (cond) {
7015         case eq:
7016           result = (op1 == op2);
7017           break;
7018         case ne:
7019           result = (op1 != op2);
7020           break;
7021         case ge:
7022           result = (op1 >= op2);
7023           break;
7024         case gt:
7025           result = (op1 > op2);
7026           break;
7027         case le:
7028           result = (op1 <= op2);
7029           break;
7030         case lt:
7031           result = (op1 < op2);
7032           break;
7033         case hs:
7034           result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
7035           break;
7036         case hi:
7037           result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
7038           break;
7039         case ls:
7040           result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
7041           break;
7042         case lo:
7043           result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
7044           break;
7045         default:
7046           VIXL_UNREACHABLE();
7047       }
7048     }
7049     dst.SetActive(vform, lane, result);
7050   }
7051 
7052   if (flags == SetFlags) PredTest(vform, mask, dst);
7053 
7054   return dst;
7055 }
7056 
SVEBitwiseShiftHelper(Shift shift_op,VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements)7057 LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
7058                                                 VectorFormat vform,
7059                                                 LogicVRegister dst,
7060                                                 const LogicVRegister& src1,
7061                                                 const LogicVRegister& src2,
7062                                                 bool is_wide_elements) {
7063   unsigned lane_size = LaneSizeInBitsFromFormat(vform);
7064   VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
7065 
7066   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7067     int shift_src_lane = lane;
7068     if (is_wide_elements) {
7069       // If the shift amount comes from wide elements, select the D-sized lane
7070       // which occupies the corresponding lanes of the value to be shifted.
7071       shift_src_lane = (lane * lane_size) / kDRegSize;
7072     }
7073     uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
7074 
7075     // Saturate shift_amount to the size of the lane that will be shifted.
7076     if (shift_amount > lane_size) shift_amount = lane_size;
7077 
7078     uint64_t value = src1.Uint(vform, lane);
7079     int64_t result = ShiftOperand(lane_size,
7080                                   value,
7081                                   shift_op,
7082                                   static_cast<unsigned>(shift_amount));
7083     dst.SetUint(vform, lane, result);
7084   }
7085 
7086   return dst;
7087 }
7088 
asrd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int shift)7089 LogicVRegister Simulator::asrd(VectorFormat vform,
7090                                LogicVRegister dst,
7091                                const LogicVRegister& src1,
7092                                int shift) {
7093   VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
7094                               LaneSizeInBitsFromFormat(vform)));
7095 
7096   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7097     int64_t value = src1.Int(vform, i);
7098     if (shift <= 63) {
7099       if (value < 0) {
7100         // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
7101         // cast to int64_t, and cannot cause signed overflow in the result.
7102         value = value + GetUintMask(shift);
7103       }
7104       value = ShiftOperand(kDRegSize, value, ASR, shift);
7105     } else {
7106       value = 0;
7107     }
7108     dst.SetInt(vform, i, value);
7109   }
7110   return dst;
7111 }
7112 
SVEBitwiseLogicalUnpredicatedHelper(LogicalOp logical_op,VectorFormat vform,LogicVRegister zd,const LogicVRegister & zn,const LogicVRegister & zm)7113 LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
7114     LogicalOp logical_op,
7115     VectorFormat vform,
7116     LogicVRegister zd,
7117     const LogicVRegister& zn,
7118     const LogicVRegister& zm) {
7119   VIXL_ASSERT(IsSVEFormat(vform));
7120   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7121     uint64_t op1 = zn.Uint(vform, i);
7122     uint64_t op2 = zm.Uint(vform, i);
7123     uint64_t result = 0;
7124     switch (logical_op) {
7125       case AND:
7126         result = op1 & op2;
7127         break;
7128       case BIC:
7129         result = op1 & ~op2;
7130         break;
7131       case EOR:
7132         result = op1 ^ op2;
7133         break;
7134       case ORR:
7135         result = op1 | op2;
7136         break;
7137       default:
7138         VIXL_UNIMPLEMENTED();
7139     }
7140     zd.SetUint(vform, i, result);
7141   }
7142 
7143   return zd;
7144 }
7145 
SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,LogicPRegister pd,const LogicPRegister & pn,const LogicPRegister & pm)7146 LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
7147                                                     LogicPRegister pd,
7148                                                     const LogicPRegister& pn,
7149                                                     const LogicPRegister& pm) {
7150   for (int i = 0; i < pn.GetChunkCount(); i++) {
7151     LogicPRegister::ChunkType op1 = pn.GetChunk(i);
7152     LogicPRegister::ChunkType op2 = pm.GetChunk(i);
7153     LogicPRegister::ChunkType result = 0;
7154     switch (op) {
7155       case ANDS_p_p_pp_z:
7156       case AND_p_p_pp_z:
7157         result = op1 & op2;
7158         break;
7159       case BICS_p_p_pp_z:
7160       case BIC_p_p_pp_z:
7161         result = op1 & ~op2;
7162         break;
7163       case EORS_p_p_pp_z:
7164       case EOR_p_p_pp_z:
7165         result = op1 ^ op2;
7166         break;
7167       case NANDS_p_p_pp_z:
7168       case NAND_p_p_pp_z:
7169         result = ~(op1 & op2);
7170         break;
7171       case NORS_p_p_pp_z:
7172       case NOR_p_p_pp_z:
7173         result = ~(op1 | op2);
7174         break;
7175       case ORNS_p_p_pp_z:
7176       case ORN_p_p_pp_z:
7177         result = op1 | ~op2;
7178         break;
7179       case ORRS_p_p_pp_z:
7180       case ORR_p_p_pp_z:
7181         result = op1 | op2;
7182         break;
7183       default:
7184         VIXL_UNIMPLEMENTED();
7185     }
7186     pd.SetChunk(i, result);
7187   }
7188   return pd;
7189 }
7190 
SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,VectorFormat vform,LogicVRegister zd,uint64_t imm)7191 LogicVRegister Simulator::SVEBitwiseImmHelper(
7192     SVEBitwiseLogicalWithImm_UnpredicatedOp op,
7193     VectorFormat vform,
7194     LogicVRegister zd,
7195     uint64_t imm) {
7196   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7197     uint64_t op1 = zd.Uint(vform, i);
7198     uint64_t result = 0;
7199     switch (op) {
7200       case AND_z_zi:
7201         result = op1 & imm;
7202         break;
7203       case EOR_z_zi:
7204         result = op1 ^ imm;
7205         break;
7206       case ORR_z_zi:
7207         result = op1 | imm;
7208         break;
7209       default:
7210         VIXL_UNIMPLEMENTED();
7211     }
7212     zd.SetUint(vform, i, result);
7213   }
7214 
7215   return zd;
7216 }
7217 
SVEStructuredStoreHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr)7218 void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
7219                                          const LogicPRegister& pg,
7220                                          unsigned zt_code,
7221                                          const LogicSVEAddressVector& addr) {
7222   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7223 
7224   int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7225   int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7226   int msize_in_bytes = addr.GetMsizeInBytes();
7227   int reg_count = addr.GetRegCount();
7228 
7229   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7230   VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7231 
7232   unsigned zt_codes[4] = {zt_code,
7233                           (zt_code + 1) % kNumberOfZRegisters,
7234                           (zt_code + 2) % kNumberOfZRegisters,
7235                           (zt_code + 3) % kNumberOfZRegisters};
7236 
7237   LogicVRegister zt[4] = {
7238       ReadVRegister(zt_codes[0]),
7239       ReadVRegister(zt_codes[1]),
7240       ReadVRegister(zt_codes[2]),
7241       ReadVRegister(zt_codes[3]),
7242   };
7243 
7244   // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7245   // are ignored, so read the source register using the VectorFormat that
7246   // corresponds with the storage format, and multiply the index accordingly.
7247   VectorFormat unpack_vform =
7248       SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7249   int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7250 
7251   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7252     if (!pg.IsActive(vform, i)) continue;
7253 
7254     for (int r = 0; r < reg_count; r++) {
7255       uint64_t element_address = addr.GetElementAddress(i, r);
7256       StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address);
7257     }
7258   }
7259 
7260   if (ShouldTraceWrites()) {
7261     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7262     if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7263       // Use an FP format where it's likely that we're accessing FP data.
7264       format = GetPrintRegisterFormatTryFP(format);
7265     }
7266     // Stores don't represent a change to the source register's value, so only
7267     // print the relevant part of the value.
7268     format = GetPrintRegPartial(format);
7269 
7270     PrintZStructAccess(zt_code,
7271                        reg_count,
7272                        pg,
7273                        format,
7274                        msize_in_bytes,
7275                        "->",
7276                        addr);
7277   }
7278 }
7279 
SVEStructuredLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,bool is_signed)7280 void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7281                                         const LogicPRegister& pg,
7282                                         unsigned zt_code,
7283                                         const LogicSVEAddressVector& addr,
7284                                         bool is_signed) {
7285   int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7286   int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7287   int msize_in_bytes = addr.GetMsizeInBytes();
7288   int reg_count = addr.GetRegCount();
7289 
7290   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7291   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7292   VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7293 
7294   unsigned zt_codes[4] = {zt_code,
7295                           (zt_code + 1) % kNumberOfZRegisters,
7296                           (zt_code + 2) % kNumberOfZRegisters,
7297                           (zt_code + 3) % kNumberOfZRegisters};
7298   LogicVRegister zt[4] = {
7299       ReadVRegister(zt_codes[0]),
7300       ReadVRegister(zt_codes[1]),
7301       ReadVRegister(zt_codes[2]),
7302       ReadVRegister(zt_codes[3]),
7303   };
7304 
7305   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7306     for (int r = 0; r < reg_count; r++) {
7307       uint64_t element_address = addr.GetElementAddress(i, r);
7308 
7309       if (!pg.IsActive(vform, i)) {
7310         zt[r].SetUint(vform, i, 0);
7311         continue;
7312       }
7313 
7314       if (is_signed) {
7315         LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address);
7316       } else {
7317         LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address);
7318       }
7319     }
7320   }
7321 
7322   if (ShouldTraceVRegs()) {
7323     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7324     if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7325       // Use an FP format where it's likely that we're accessing FP data.
7326       format = GetPrintRegisterFormatTryFP(format);
7327     }
7328     PrintZStructAccess(zt_code,
7329                        reg_count,
7330                        pg,
7331                        format,
7332                        msize_in_bytes,
7333                        "<-",
7334                        addr);
7335   }
7336 }
7337 
brka(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7338 LogicPRegister Simulator::brka(LogicPRegister pd,
7339                                const LogicPRegister& pg,
7340                                const LogicPRegister& pn) {
7341   bool break_ = false;
7342   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7343     if (pg.IsActive(kFormatVnB, i)) {
7344       pd.SetActive(kFormatVnB, i, !break_);
7345       break_ |= pn.IsActive(kFormatVnB, i);
7346     }
7347   }
7348 
7349   return pd;
7350 }
7351 
brkb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7352 LogicPRegister Simulator::brkb(LogicPRegister pd,
7353                                const LogicPRegister& pg,
7354                                const LogicPRegister& pn) {
7355   bool break_ = false;
7356   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7357     if (pg.IsActive(kFormatVnB, i)) {
7358       break_ |= pn.IsActive(kFormatVnB, i);
7359       pd.SetActive(kFormatVnB, i, !break_);
7360     }
7361   }
7362 
7363   return pd;
7364 }
7365 
brkn(LogicPRegister pdm,const LogicPRegister & pg,const LogicPRegister & pn)7366 LogicPRegister Simulator::brkn(LogicPRegister pdm,
7367                                const LogicPRegister& pg,
7368                                const LogicPRegister& pn) {
7369   if (!IsLastActive(kFormatVnB, pg, pn)) {
7370     pfalse(pdm);
7371   }
7372   return pdm;
7373 }
7374 
brkpa(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7375 LogicPRegister Simulator::brkpa(LogicPRegister pd,
7376                                 const LogicPRegister& pg,
7377                                 const LogicPRegister& pn,
7378                                 const LogicPRegister& pm) {
7379   bool last_active = IsLastActive(kFormatVnB, pg, pn);
7380 
7381   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7382     bool active = false;
7383     if (pg.IsActive(kFormatVnB, i)) {
7384       active = last_active;
7385       last_active = last_active && !pm.IsActive(kFormatVnB, i);
7386     }
7387     pd.SetActive(kFormatVnB, i, active);
7388   }
7389 
7390   return pd;
7391 }
7392 
brkpb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7393 LogicPRegister Simulator::brkpb(LogicPRegister pd,
7394                                 const LogicPRegister& pg,
7395                                 const LogicPRegister& pn,
7396                                 const LogicPRegister& pm) {
7397   bool last_active = IsLastActive(kFormatVnB, pg, pn);
7398 
7399   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7400     bool active = false;
7401     if (pg.IsActive(kFormatVnB, i)) {
7402       last_active = last_active && !pm.IsActive(kFormatVnB, i);
7403       active = last_active;
7404     }
7405     pd.SetActive(kFormatVnB, i, active);
7406   }
7407 
7408   return pd;
7409 }
7410 
SVEFaultTolerantLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,SVEFaultTolerantLoadType type,bool is_signed)7411 void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7412                                            const LogicPRegister& pg,
7413                                            unsigned zt_code,
7414                                            const LogicSVEAddressVector& addr,
7415                                            SVEFaultTolerantLoadType type,
7416                                            bool is_signed) {
7417   int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7418   int msize_in_bits = addr.GetMsizeInBits();
7419   int msize_in_bytes = addr.GetMsizeInBytes();
7420 
7421   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7422   VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7423   VIXL_ASSERT(addr.GetRegCount() == 1);
7424 
7425   LogicVRegister zt = ReadVRegister(zt_code);
7426   LogicPRegister ffr = ReadFFR();
7427 
7428   // Non-faulting loads are allowed to fail arbitrarily. To stress user
7429   // code, fail a random element in roughly one in eight full-vector loads.
7430   uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
7431   int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7432 
7433   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7434     uint64_t value = 0;
7435 
7436     if (pg.IsActive(vform, i)) {
7437       uint64_t element_address = addr.GetElementAddress(i, 0);
7438 
7439       if (type == kSVEFirstFaultLoad) {
7440         // First-faulting loads always load the first active element, regardless
7441         // of FFR. The result will be discarded if its FFR lane is inactive, but
7442         // it could still generate a fault.
7443         value = MemReadUint(msize_in_bytes, element_address);
7444         // All subsequent elements have non-fault semantics.
7445         type = kSVENonFaultLoad;
7446 
7447       } else if (ffr.IsActive(vform, i)) {
7448         // Simulation of fault-tolerant loads relies on system calls, and is
7449         // likely to be relatively slow, so we only actually perform the load if
7450         // its FFR lane is active.
7451 
7452         bool can_read = (i < fake_fault_at_lane) &&
7453                         CanReadMemory(element_address, msize_in_bytes);
7454         if (can_read) {
7455           value = MemReadUint(msize_in_bytes, element_address);
7456         } else {
7457           // Propagate the fault to the end of FFR.
7458           for (int j = i; j < LaneCountFromFormat(vform); j++) {
7459             ffr.SetActive(vform, j, false);
7460           }
7461         }
7462       }
7463     }
7464 
7465     // The architecture permits a few possible results for inactive FFR lanes
7466     // (including those caused by a fault in this instruction). We choose to
7467     // leave the register value unchanged (like merging predication) because
7468     // no other input to this instruction can have the same behaviour.
7469     //
7470     // Note that this behaviour takes precedence over pg's zeroing predication.
7471 
7472     if (ffr.IsActive(vform, i)) {
7473       int msb = msize_in_bits - 1;
7474       if (is_signed) {
7475         zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7476       } else {
7477         zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7478       }
7479     }
7480   }
7481 
7482   if (ShouldTraceVRegs()) {
7483     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7484     if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7485       // Use an FP format where it's likely that we're accessing FP data.
7486       format = GetPrintRegisterFormatTryFP(format);
7487     }
7488     // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7489     // expects a single mask, so combine the two predicates.
7490     SimPRegister mask;
7491     SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7492     PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7493   }
7494 }
7495 
SVEGatherLoadScalarPlusVectorHelper(const Instruction * instr,VectorFormat vform,SVEOffsetModifier mod)7496 void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7497                                                     VectorFormat vform,
7498                                                     SVEOffsetModifier mod) {
7499   bool is_signed = instr->ExtractBit(14) == 0;
7500   bool is_ff = instr->ExtractBit(13) == 1;
7501   // Note that these instructions don't use the Dtype encoding.
7502   int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7503   int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7504   uint64_t base = ReadXRegister(instr->GetRn());
7505   LogicSVEAddressVector addr(base,
7506                              &ReadVRegister(instr->GetRm()),
7507                              vform,
7508                              mod,
7509                              scale);
7510   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7511   if (is_ff) {
7512     SVEFaultTolerantLoadHelper(vform,
7513                                ReadPRegister(instr->GetPgLow8()),
7514                                instr->GetRt(),
7515                                addr,
7516                                kSVEFirstFaultLoad,
7517                                is_signed);
7518   } else {
7519     SVEStructuredLoadHelper(vform,
7520                             ReadPRegister(instr->GetPgLow8()),
7521                             instr->GetRt(),
7522                             addr,
7523                             is_signed);
7524   }
7525 }
7526 
GetFirstActive(VectorFormat vform,const LogicPRegister & pg) const7527 int Simulator::GetFirstActive(VectorFormat vform,
7528                               const LogicPRegister& pg) const {
7529   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7530     if (pg.IsActive(vform, i)) return i;
7531   }
7532   return -1;
7533 }
7534 
GetLastActive(VectorFormat vform,const LogicPRegister & pg) const7535 int Simulator::GetLastActive(VectorFormat vform,
7536                              const LogicPRegister& pg) const {
7537   for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7538     if (pg.IsActive(vform, i)) return i;
7539   }
7540   return -1;
7541 }
7542 
CountActiveLanes(VectorFormat vform,const LogicPRegister & pg) const7543 int Simulator::CountActiveLanes(VectorFormat vform,
7544                                 const LogicPRegister& pg) const {
7545   int count = 0;
7546   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7547     count += pg.IsActive(vform, i) ? 1 : 0;
7548   }
7549   return count;
7550 }
7551 
CountActiveAndTrueLanes(VectorFormat vform,const LogicPRegister & pg,const LogicPRegister & pn) const7552 int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7553                                        const LogicPRegister& pg,
7554                                        const LogicPRegister& pn) const {
7555   int count = 0;
7556   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7557     count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7558   }
7559   return count;
7560 }
7561 
GetPredicateConstraintLaneCount(VectorFormat vform,int pattern) const7562 int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7563                                                int pattern) const {
7564   VIXL_ASSERT(IsSVEFormat(vform));
7565   int all = LaneCountFromFormat(vform);
7566   VIXL_ASSERT(all > 0);
7567 
7568   switch (pattern) {
7569     case SVE_VL1:
7570     case SVE_VL2:
7571     case SVE_VL3:
7572     case SVE_VL4:
7573     case SVE_VL5:
7574     case SVE_VL6:
7575     case SVE_VL7:
7576     case SVE_VL8:
7577       // VL1-VL8 are encoded directly.
7578       VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7579       VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7580       return (pattern <= all) ? pattern : 0;
7581     case SVE_VL16:
7582     case SVE_VL32:
7583     case SVE_VL64:
7584     case SVE_VL128:
7585     case SVE_VL256: {
7586       // VL16-VL256 are encoded as log2(N) + c.
7587       int min = 16 << (pattern - SVE_VL16);
7588       return (min <= all) ? min : 0;
7589     }
7590     // Special cases.
7591     case SVE_POW2:
7592       return 1 << HighestSetBitPosition(all);
7593     case SVE_MUL4:
7594       return all - (all % 4);
7595     case SVE_MUL3:
7596       return all - (all % 3);
7597     case SVE_ALL:
7598       return all;
7599   }
7600   // Unnamed cases architecturally return 0.
7601   return 0;
7602 }
7603 
match(VectorFormat vform,LogicPRegister dst,const LogicVRegister & haystack,const LogicVRegister & needles,bool negate_match)7604 LogicPRegister Simulator::match(VectorFormat vform,
7605                                 LogicPRegister dst,
7606                                 const LogicVRegister& haystack,
7607                                 const LogicVRegister& needles,
7608                                 bool negate_match) {
7609   SimVRegister ztemp;
7610   SimPRegister ptemp;
7611 
7612   pfalse(dst);
7613   int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
7614   for (int i = 0; i < lanes_per_segment; i++) {
7615     dup_elements_to_segments(vform, ztemp, needles, i);
7616     SVEIntCompareVectorsHelper(eq,
7617                                vform,
7618                                ptemp,
7619                                GetPTrue(),
7620                                haystack,
7621                                ztemp,
7622                                false,
7623                                LeaveFlags);
7624     SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);
7625   }
7626   if (negate_match) {
7627     ptrue(vform, ptemp, SVE_ALL);
7628     SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);
7629   }
7630   return dst;
7631 }
7632 
GetStructAddress(int lane) const7633 uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7634   if (IsContiguous()) {
7635     return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7636   }
7637 
7638   VIXL_ASSERT(IsScatterGather());
7639   VIXL_ASSERT(vector_ != NULL);
7640 
7641   // For scatter-gather accesses, we need to extract the offset from vector_,
7642   // and apply modifiers.
7643 
7644   uint64_t offset = 0;
7645   switch (vector_form_) {
7646     case kFormatVnS:
7647       offset = vector_->GetLane<uint32_t>(lane);
7648       break;
7649     case kFormatVnD:
7650       offset = vector_->GetLane<uint64_t>(lane);
7651       break;
7652     default:
7653       VIXL_UNIMPLEMENTED();
7654       break;
7655   }
7656 
7657   switch (vector_mod_) {
7658     case SVE_MUL_VL:
7659       VIXL_UNIMPLEMENTED();
7660       break;
7661     case SVE_LSL:
7662       // We apply the shift below. There's nothing to do here.
7663       break;
7664     case NO_SVE_OFFSET_MODIFIER:
7665       VIXL_ASSERT(vector_shift_ == 0);
7666       break;
7667     case SVE_UXTW:
7668       offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7669       break;
7670     case SVE_SXTW:
7671       offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7672       break;
7673   }
7674 
7675   return base_ + (offset << vector_shift_);
7676 }
7677 
pack_odd_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7678 LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,
7679                                             LogicVRegister dst,
7680                                             const LogicVRegister& src) {
7681   SimVRegister zero;
7682   zero.Clear();
7683   return uzp2(vform, dst, src, zero);
7684 }
7685 
pack_even_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7686 LogicVRegister Simulator::pack_even_elements(VectorFormat vform,
7687                                              LogicVRegister dst,
7688                                              const LogicVRegister& src) {
7689   SimVRegister zero;
7690   zero.Clear();
7691   return uzp1(vform, dst, src, zero);
7692 }
7693 
adcl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool top)7694 LogicVRegister Simulator::adcl(VectorFormat vform,
7695                                LogicVRegister dst,
7696                                const LogicVRegister& src1,
7697                                const LogicVRegister& src2,
7698                                bool top) {
7699   unsigned reg_size = LaneSizeInBitsFromFormat(vform);
7700   VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));
7701 
7702   for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
7703     uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));
7704     uint64_t right = dst.Uint(vform, i);
7705     unsigned carry_in = src2.Uint(vform, i + 1) & 1;
7706     std::pair<uint64_t, uint8_t> val_and_flags =
7707         AddWithCarry(reg_size, left, right, carry_in);
7708 
7709     // Set even lanes to the result of the addition.
7710     dst.SetUint(vform, i, val_and_flags.first);
7711 
7712     // Set odd lanes to the carry flag from the addition.
7713     uint64_t carry_out = (val_and_flags.second >> 1) & 1;
7714     dst.SetUint(vform, i + 1, carry_out);
7715   }
7716   return dst;
7717 }
7718 
7719 // Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add
7720 // the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.
7721 //
7722 // Matrices of the form:
7723 //
7724 //  src1 = ( a b c d e f g h )  src2 = ( A B )
7725 //         ( i j k l m n o p )         ( C D )
7726 //                                     ( E F )
7727 //                                     ( G H )
7728 //                                     ( I J )
7729 //                                     ( K L )
7730 //                                     ( M N )
7731 //                                     ( O P )
7732 //
7733 // Are stored in the input vector registers as:
7734 //
7735 //           15  14  13  12  11  10  9   8   7   6   5   4   3   2   1   0
7736 //  src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]
7737 //  src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]
7738 //
matmul(VectorFormat vform_dst,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,bool src1_signed,bool src2_signed)7739 LogicVRegister Simulator::matmul(VectorFormat vform_dst,
7740                                  LogicVRegister srcdst,
7741                                  const LogicVRegister& src1,
7742                                  const LogicVRegister& src2,
7743                                  bool src1_signed,
7744                                  bool src2_signed) {
7745   // Two destination forms are supported: Q register containing four S-sized
7746   // elements (4S) and Z register containing n S-sized elements (VnS).
7747   VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));
7748   VectorFormat vform_src = kFormatVnB;
7749   int b_per_segment = kQRegSize / kBRegSize;
7750   int s_per_segment = kQRegSize / kSRegSize;
7751   int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};
7752   int segment_count = LaneCountFromFormat(vform_dst) / 4;
7753   for (int seg = 0; seg < segment_count; seg++) {
7754     for (int i = 0; i < 2; i++) {
7755       for (int j = 0; j < 2; j++) {
7756         int dstidx = (2 * i) + j + (seg * s_per_segment);
7757         int64_t sum = srcdst.Int(vform_dst, dstidx);
7758         for (int k = 0; k < 8; k++) {
7759           int idx1 = (8 * i) + k + (seg * b_per_segment);
7760           int idx2 = (8 * j) + k + (seg * b_per_segment);
7761           int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)
7762                                    : src1.Uint(vform_src, idx1);
7763           int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)
7764                                    : src2.Uint(vform_src, idx2);
7765           sum += e1 * e2;
7766         }
7767         result[dstidx] = sum;
7768       }
7769     }
7770   }
7771   srcdst.SetIntArray(vform_dst, result);
7772   return srcdst;
7773 }
7774 
7775 // Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2
7776 // result to the matrix in srcdst, and write back to srcdst.
7777 //
7778 // Matrices of the form:
7779 //
7780 //  src1 = ( a b )  src2 = ( A B )
7781 //         ( c d )         ( C D )
7782 //
7783 // Are stored in the input vector registers as:
7784 //
7785 //           3   2   1   0
7786 //  src1 = [ d | c | b | a ]
7787 //  src2 = [ D | B | C | A ]
7788 //
7789 template <typename T>
fmatmul(VectorFormat vform,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)7790 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7791                                   LogicVRegister srcdst,
7792                                   const LogicVRegister& src1,
7793                                   const LogicVRegister& src2) {
7794   T result[kZRegMaxSizeInBytes / sizeof(T)];
7795   int T_per_segment = 4;
7796   int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
7797   for (int seg = 0; seg < segment_count; seg++) {
7798     int segoff = seg * T_per_segment;
7799     for (int i = 0; i < 2; i++) {
7800       for (int j = 0; j < 2; j++) {
7801         T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),
7802                             src2.Float<T>(2 * j + 0 + segoff));
7803         T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),
7804                             src2.Float<T>(2 * j + 1 + segoff));
7805         T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);
7806         result[2 * i + j + segoff] = FPAdd(sum, prod1);
7807       }
7808     }
7809   }
7810   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7811     // Elements outside a multiple of 4T are set to zero. This happens only
7812     // for double precision operations, when the VL is a multiple of 128 bits,
7813     // but not a mutiple of 256 bits.
7814     T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
7815     srcdst.SetFloat<T>(vform, i, value);
7816   }
7817   return srcdst;
7818 }
7819 
fmatmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)7820 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7821                                   LogicVRegister dst,
7822                                   const LogicVRegister& src1,
7823                                   const LogicVRegister& src2) {
7824   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
7825     fmatmul<float>(vform, dst, src1, src2);
7826   } else {
7827     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
7828     fmatmul<double>(vform, dst, src1, src2);
7829   }
7830   return dst;
7831 }
7832 
7833 }  // namespace aarch64
7834 }  // namespace vixl
7835 
7836 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
7837